[feaLib] Check that glyph names referenced in the feature file are part of the glyph set (#1828)

This checks that glyph names that appear in a feature file are actually
in the glyph set provided in glyphNames. If the set is empty, no check
is done. This preempts a KeyError later during saving of a TTFont object
and makes this case much more easily catchable.

Closes #1723.
This commit is contained in:
Nikolaus Waxweiler 2020-02-13 14:47:29 +00:00 committed by GitHub
parent e35228603d
commit 5cda8381f9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 2 deletions

View File

@ -19,6 +19,14 @@ class Parser(object):
def __init__(self, featurefile, glyphNames=(), followIncludes=True, def __init__(self, featurefile, glyphNames=(), followIncludes=True,
**kwargs): **kwargs):
"""Initializes a Parser object.
Note: the `glyphNames` iterable serves a double role to help distinguish
glyph names from ranges in the presence of hyphens and to ensure that glyph
names referenced in a feature file are actually part of a font's glyph set.
If the iterable is left empty, no glyph name in glyph set checking takes
place.
"""
if "glyphMap" in kwargs: if "glyphMap" in kwargs:
from fontTools.misc.loggingTools import deprecateArgument from fontTools.misc.loggingTools import deprecateArgument
deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead") deprecateArgument("glyphMap", "use 'glyphNames' (iterable) instead")
@ -268,6 +276,7 @@ class Parser(object):
if (accept_glyphname and if (accept_glyphname and
self.next_token_type_ in (Lexer.NAME, Lexer.CID)): self.next_token_type_ in (Lexer.NAME, Lexer.CID)):
glyph = self.expect_glyph_() glyph = self.expect_glyph_()
self.check_glyph_name_in_glyph_set(glyph)
return self.ast.GlyphName(glyph, location=self.cur_token_location_) return self.ast.GlyphName(glyph, location=self.cur_token_location_)
if self.next_token_type_ is Lexer.GLYPHCLASS: if self.next_token_type_ is Lexer.GLYPHCLASS:
self.advance_lexer_() self.advance_lexer_()
@ -292,6 +301,7 @@ class Parser(object):
location = self.cur_token_location_ location = self.cur_token_location_
if '-' in glyph and glyph not in self.glyphNames_: if '-' in glyph and glyph not in self.glyphNames_:
start, limit = self.split_glyph_range_(glyph, location) start, limit = self.split_glyph_range_(glyph, location)
self.check_glyph_name_in_glyph_set(start, limit)
glyphs.add_range( glyphs.add_range(
start, limit, start, limit,
self.make_glyph_range_(location, start, limit)) self.make_glyph_range_(location, start, limit))
@ -299,10 +309,12 @@ class Parser(object):
start = glyph start = glyph
self.expect_symbol_("-") self.expect_symbol_("-")
limit = self.expect_glyph_() limit = self.expect_glyph_()
self.check_glyph_name_in_glyph_set(start, limit)
glyphs.add_range( glyphs.add_range(
start, limit, start, limit,
self.make_glyph_range_(location, start, limit)) self.make_glyph_range_(location, start, limit))
else: else:
self.check_glyph_name_in_glyph_set(glyph)
glyphs.append(glyph) glyphs.append(glyph)
elif self.next_token_type_ is Lexer.CID: elif self.next_token_type_ is Lexer.CID:
glyph = self.expect_glyph_() glyph = self.expect_glyph_()
@ -311,11 +323,17 @@ class Parser(object):
range_start = self.cur_token_ range_start = self.cur_token_
self.expect_symbol_("-") self.expect_symbol_("-")
range_end = self.expect_cid_() range_end = self.expect_cid_()
self.check_glyph_name_in_glyph_set(
f"cid{range_start:05d}",
f"cid{range_end:05d}",
)
glyphs.add_cid_range(range_start, range_end, glyphs.add_cid_range(range_start, range_end,
self.make_cid_range_(range_location, self.make_cid_range_(range_location,
range_start, range_end)) range_start, range_end))
else: else:
glyphs.append("cid%05d" % self.cur_token_) glyph_name = f"cid{self.cur_token_:05d}"
self.check_glyph_name_in_glyph_set(glyph_name)
glyphs.append(glyph_name)
elif self.next_token_type_ is Lexer.GLYPHCLASS: elif self.next_token_type_ is Lexer.GLYPHCLASS:
self.advance_lexer_() self.advance_lexer_()
gc = self.glyphclasses_.resolve(self.cur_token_) gc = self.glyphclasses_.resolve(self.cur_token_)
@ -1509,6 +1527,21 @@ class Parser(object):
raise FeatureLibError("Expected a glyph name or CID", raise FeatureLibError("Expected a glyph name or CID",
self.cur_token_location_) self.cur_token_location_)
def check_glyph_name_in_glyph_set(self, *names):
"""Raises if glyph name (just `start`) or glyph names of a
range (`start` and `end`) are not in the glyph set.
If no glyph set is present, does nothing.
"""
if self.glyphNames_:
missing = [name for name in names if name not in self.glyphNames_]
if missing:
raise FeatureLibError(
"The following glyph names are referenced but are missing from the "
f"glyph set: {', '.join(missing)}",
self.cur_token_location_
)
def expect_markClass_reference_(self): def expect_markClass_reference_(self):
name = self.expect_class_name_() name = self.expect_class_name_()
mc = self.glyphclasses_.resolve(name) mc = self.glyphclasses_.resolve(name)

View File

@ -1,3 +1,5 @@
- [feaLib] Parsing feature code now ensures that referenced glyph names are part of
the known glyph set, unless a glyph set was not provided.
- [varLib] When filling in the default axis value for a missing location of a source or - [varLib] When filling in the default axis value for a missing location of a source or
instance, correctly map the value forward. instance, correctly map the value forward.

View File

@ -41,7 +41,7 @@ def makeTTFont():
a_n_d T_h T_h.swash germandbls ydieresis yacute breve a_n_d T_h T_h.swash germandbls ydieresis yacute breve
grave acute dieresis macron circumflex cedilla umlaut ogonek caron grave acute dieresis macron circumflex cedilla umlaut ogonek caron
damma hamza sukun kasratan lam_meem_jeem noon.final noon.initial damma hamza sukun kasratan lam_meem_jeem noon.final noon.initial
by feature lookup sub table by feature lookup sub table uni0327 uni0328 e.fina
""".split() """.split()
font = TTFont() font = TTFont()
font.setGlyphOrder(glyphs) font.setGlyphOrder(glyphs)

View File

@ -39,6 +39,14 @@ GLYPHNAMES = ("""
n.sc o.sc p.sc q.sc r.sc s.sc t.sc u.sc v.sc w.sc x.sc y.sc z.sc n.sc o.sc p.sc q.sc r.sc s.sc t.sc u.sc v.sc w.sc x.sc y.sc z.sc
a.swash b.swash x.swash y.swash z.swash a.swash b.swash x.swash y.swash z.swash
foobar foo.09 foo.1234 foo.9876 foobar foo.09 foo.1234 foo.9876
one two five six acute grave dieresis umlaut cedilla ogonek macron
a_f_f_i o_f_f_i f_i f_f_i one.fitted one.oldstyle a.1 a.2 a.3 c_t
PRE SUF FIX BACK TRACK LOOK AHEAD ampersand ampersand.1 ampersand.2
cid00001 cid00002 cid00003 cid00004 cid00005 cid00006 cid00007
cid12345 cid78987 cid00999 cid01000 cid01001 cid00998 cid00995
cid00111 cid00222
comma endash emdash figuredash damma hamza
c_d d.alt n.end s.end f_f
""").split() + ["foo.%d" % i for i in range(1, 200)] """).split() + ["foo.%d" % i for i in range(1, 200)]
@ -260,6 +268,12 @@ class ParserTest(unittest.TestCase):
FeatureLibError, "Font revision numbers must be positive", FeatureLibError, "Font revision numbers must be positive",
self.parse, "table head {FontRevision -17.2;} head;") self.parse, "table head {FontRevision -17.2;} head;")
def test_strict_glyph_name_check(self):
self.parse("@bad = [a b ccc];", glyphNames=("a", "b", "ccc"))
with self.assertRaisesRegex(FeatureLibError, "missing from the glyph set: ccc"):
self.parse("@bad = [a b ccc];", glyphNames=("a", "b"))
def test_glyphclass(self): def test_glyphclass(self):
[gc] = self.parse("@dash = [endash emdash figuredash];").statements [gc] = self.parse("@dash = [endash emdash figuredash];").statements
self.assertEqual(gc.name, "dash") self.assertEqual(gc.name, "dash")