[feaLib] Check that glyph names referenced in the feature file are part of the glyph set (#1828)

This checks that glyph names that appear in a feature file are actually in the glyph set provided in glyphNames. If the set is empty, no check is done. This preempts a KeyError later during saving of a TTFont object and makes this case much more easily catchable. Closes #1723.
2020-02-13 14:47:29 +00:00 · 2020-02-13 14:47:29 +00:00 · 5cda8381f9
commit 5cda8381f9
parent e35228603d
4 changed files with 51 additions and 2 deletions
--- a/Lib/fontTools/feaLib/parser.py
+++ b/Lib/fontTools/feaLib/parser.py
@ -19,6 +19,14 @@ class Parser(object):
    def __init__(self, featurefile, glyphNames=(), followIncludes=True,
                 **kwargs):
        """Initializes a Parser object.
        Note: the `glyphNames` iterable serves a double role to help distinguish
        glyph names from ranges in the presence of hyphens and to ensure that glyph
        names referenced in a feature file are actually part of a font's glyph set.
        If the iterable is left empty, no glyph name in glyph set checking takes
        place.
        """
        if "glyphMap" in kwargs:
            from fontTools.misc.loggingTools import deprecateArgument
            deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead")
@ -268,6 +276,7 @@ class Parser(object):
        if (accept_glyphname and
                self.next_token_type_ in (Lexer.NAME, Lexer.CID)):
            glyph = self.expect_glyph_()
            self.check_glyph_name_in_glyph_set(glyph)
            return self.ast.GlyphName(glyph, location=self.cur_token_location_)
        if self.next_token_type_ is Lexer.GLYPHCLASS:
            self.advance_lexer_()
@ -292,6 +301,7 @@ class Parser(object):
                location = self.cur_token_location_
                if '-' in glyph and glyph not in self.glyphNames_:
                    start, limit = self.split_glyph_range_(glyph, location)
                    self.check_glyph_name_in_glyph_set(start, limit)
                    glyphs.add_range(
                        start, limit,
                        self.make_glyph_range_(location, start, limit))
@ -299,10 +309,12 @@ class Parser(object):
                    start = glyph
                    self.expect_symbol_("-")
                    limit = self.expect_glyph_()
                    self.check_glyph_name_in_glyph_set(start, limit)
                    glyphs.add_range(
                        start, limit,
                        self.make_glyph_range_(location, start, limit))
                else:
                    self.check_glyph_name_in_glyph_set(glyph)
                    glyphs.append(glyph)
            elif self.next_token_type_ is Lexer.CID:
                glyph = self.expect_glyph_()
@ -311,11 +323,17 @@ class Parser(object):
                    range_start = self.cur_token_
                    self.expect_symbol_("-")
                    range_end = self.expect_cid_()
                    self.check_glyph_name_in_glyph_set(
                        f"cid{range_start:05d}",
                        f"cid{range_end:05d}",
                    )
                    glyphs.add_cid_range(range_start, range_end,
                                         self.make_cid_range_(range_location,
                                                              range_start, range_end))
                else:
-                    glyphs.append("cid%05d" % self.cur_token_)
+                    glyph_name = f"cid{self.cur_token_:05d}"
                    self.check_glyph_name_in_glyph_set(glyph_name)
                    glyphs.append(glyph_name)
            elif self.next_token_type_ is Lexer.GLYPHCLASS:
                self.advance_lexer_()
                gc = self.glyphclasses_.resolve(self.cur_token_)
@ -1509,6 +1527,21 @@ class Parser(object):
        raise FeatureLibError("Expected a glyph name or CID",
                              self.cur_token_location_)
    def check_glyph_name_in_glyph_set(self, *names):
        """Raises if glyph name (just `start`) or glyph names of a
        range (`start` and `end`) are not in the glyph set.
        If no glyph set is present, does nothing.
        """
        if self.glyphNames_:
            missing = [name for name in names if name not in self.glyphNames_]
            if missing:
                raise FeatureLibError(
                    "The following glyph names are referenced but are missing from the "
                    f"glyph set: {', '.join(missing)}",
                    self.cur_token_location_
                )
    def expect_markClass_reference_(self):
        name = self.expect_class_name_()
        mc = self.glyphclasses_.resolve(name)
--- a/NEWS.rst
+++ b/NEWS.rst
@ -1,3 +1,5 @@
 - [feaLib] Parsing feature code now ensures that referenced glyph names are part of
  the known glyph set, unless a glyph set was not provided.
 - [varLib] When filling in the default axis value for a missing location of a source or
  instance, correctly map the value forward.
--- a/Tests/feaLib/builder_test.py
+++ b/Tests/feaLib/builder_test.py
@ -41,7 +41,7 @@ def makeTTFont():
        a_n_d T_h T_h.swash germandbls ydieresis yacute breve
        grave acute dieresis macron circumflex cedilla umlaut ogonek caron
        damma hamza sukun kasratan lam_meem_jeem noon.final noon.initial
-        by feature lookup sub table
+        by feature lookup sub table uni0327 uni0328 e.fina
    """.split()
    font = TTFont()
    font.setGlyphOrder(glyphs)
--- a/Tests/feaLib/parser_test.py
+++ b/Tests/feaLib/parser_test.py
@ -39,6 +39,14 @@ GLYPHNAMES = ("""
    n.sc o.sc p.sc q.sc r.sc s.sc t.sc u.sc v.sc w.sc x.sc y.sc z.sc
    a.swash b.swash x.swash y.swash z.swash
    foobar foo.09 foo.1234 foo.9876
    one two five six acute grave dieresis umlaut cedilla ogonek macron
    a_f_f_i o_f_f_i f_i f_f_i one.fitted one.oldstyle a.1 a.2 a.3 c_t
    PRE SUF FIX BACK TRACK LOOK AHEAD ampersand ampersand.1 ampersand.2
    cid00001 cid00002 cid00003 cid00004 cid00005 cid00006 cid00007
    cid12345 cid78987 cid00999 cid01000 cid01001 cid00998 cid00995
    cid00111 cid00222
    comma endash emdash figuredash damma hamza
    c_d d.alt n.end s.end f_f
 """).split() + ["foo.%d" % i for i in range(1, 200)]
@ -260,6 +268,12 @@ class ParserTest(unittest.TestCase):
            FeatureLibError, "Font revision numbers must be positive",
            self.parse, "table head {FontRevision -17.2;} head;")
    def test_strict_glyph_name_check(self):
        self.parse("@bad = [a b ccc];", glyphNames=("a", "b", "ccc"))
        with self.assertRaisesRegex(FeatureLibError, "missing from the glyph set: ccc"):
            self.parse("@bad = [a b ccc];", glyphNames=("a", "b"))
    def test_glyphclass(self):
        [gc] = self.parse("@dash = [endash emdash figuredash];").statements
        self.assertEqual(gc.name, "dash")