From b31ed09421ff9f44d36678480737a54ad8d68bd9 Mon Sep 17 00:00:00 2001 From: Sascha Brawer Date: Sat, 11 Feb 2017 12:05:01 +0100 Subject: [PATCH] Support glyph names with dashes The OpenType Feature File Syntax has been changed to support dashes: https://github.com/adobe-type-tools/afdko/issues/152 Resolves https://github.com/fonttools/fonttools/issues/559. Needed for https://github.com/googlei18n/fontmake/issues/249. --- Lib/fontTools/feaLib/builder.py | 2 +- Lib/fontTools/feaLib/lexer.py | 2 +- Lib/fontTools/feaLib/parser.py | 61 ++++++++++++++++++++++++++++----- NEWS.rst | 3 ++ Tests/feaLib/builder_test.py | 3 +- Tests/feaLib/lexer_test.py | 3 ++ Tests/feaLib/parser_test.py | 50 +++++++++++++++++++++++++-- 7 files changed, 110 insertions(+), 14 deletions(-) diff --git a/Lib/fontTools/feaLib/builder.py b/Lib/fontTools/feaLib/builder.py index 27480db24..7f7250fd3 100644 --- a/Lib/fontTools/feaLib/builder.py +++ b/Lib/fontTools/feaLib/builder.py @@ -75,7 +75,7 @@ class Builder(object): self.vhea_ = {} def build(self): - self.parseTree = Parser(self.file).parse() + self.parseTree = Parser(self.file, self.glyphMap).parse() self.parseTree.build(self) self.build_feature_aalt_() self.build_head() diff --git a/Lib/fontTools/feaLib/lexer.py b/Lib/fontTools/feaLib/lexer.py index f11c93385..0c29ce895 100644 --- a/Lib/fontTools/feaLib/lexer.py +++ b/Lib/fontTools/feaLib/lexer.py @@ -26,7 +26,7 @@ class Lexer(object): CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef" CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\" - CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/" + CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-" RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.]+$") diff --git a/Lib/fontTools/feaLib/parser.py b/Lib/fontTools/feaLib/parser.py index 7b13008ec..23cabab40 100644 --- a/Lib/fontTools/feaLib/parser.py +++ b/Lib/fontTools/feaLib/parser.py @@ -17,7 +17,8 @@ class Parser(object): extensions = {} ast = ast - def __init__(self, featurefile): + def __init__(self, featurefile, glyphMap): + self.glyphMap_ = glyphMap self.doc_ = self.ast.FeatureFile() self.anchors_ = SymbolTable() self.glyphclasses_ = SymbolTable() @@ -193,6 +194,45 @@ class Parser(object): self.glyphclasses_.define(name, glyphclass) return glyphclass + def split_glyph_range_(self, name, location): + # Since v1.20, the OpenType Feature File specification allows + # for dashes in glyph names. A sequence like "a-b-c-d" could + # therefore mean a single glyph whose name happens to be + # "a-b-c-d", or it could mean a range from glyph "a" to glyph + # "b-c-d", or a range from glyph "a-b" to glyph "c-d", or a + # range from glyph "a-b-c" to glyph "d".Technically, this + # example could be resolved because the (pretty complex) + # definition of glyph ranges renders most of these splits + # invalid. But the specification does not say that a compiler + # should try to apply such fancy heuristics. To encourage + # unambiguous feature files, we therefore try all possible + # splits and reject the feature file if there are multiple + # splits possible. It is intentional that we don't just emit a + # warning; warnings tend to get ignored. To fix the problem, + # font designers can trivially add spaces around the intended + # split point, and we emit a compiler error that suggests + # how exactly the source should be rewritten to make things + # unambiguous. + parts = name.split("-") + solutions = [] + for i in range(len(parts)): + start, limit = "-".join(parts[0:i]), "-".join(parts[i:]) + if start in self.glyphMap_ and limit in self.glyphMap_: + solutions.append((start, limit)) + if len(solutions) == 1: + start, limit = solutions[0] + return start, limit + elif len(solutions) == 0: + raise FeatureLibError( + "\"%s\" is not a glyph in the font, and it can not be split " + "into a range of known glyphs" % name, location) + else: + ranges = " or ".join(["\"%s - %s\"" % (s, l) for s, l in solutions]) + raise FeatureLibError( + "Ambiguous glyph range \"%s\"; " + "please use %s to clarify what you mean" % (name, ranges), + location) + def parse_glyphclass_(self, accept_glyphname): if (accept_glyphname and self.next_token_type_ in (Lexer.NAME, Lexer.CID)): @@ -216,14 +256,19 @@ class Parser(object): while self.next_token_ != "]": if self.next_token_type_ is Lexer.NAME: glyph = self.expect_glyph_() - if self.next_token_ == "-": - range_location = self.cur_token_location_ - range_start = glyph + location = self.cur_token_location_ + if '-' in glyph and glyph not in self.glyphMap_: + start, limit = self.split_glyph_range_(glyph, location) + glyphs.add_range( + start, limit, + self.make_glyph_range_(location, start, limit)) + elif self.next_token_ == "-": + start = glyph self.expect_symbol_("-") - range_end = self.expect_glyph_() - glyphs.add_range(range_start, range_end, - self.make_glyph_range_(range_location, - range_start, range_end)) + limit = self.expect_glyph_() + glyphs.add_range( + start, limit, + self.make_glyph_range_(location, start, limit)) else: glyphs.append(glyph) elif self.next_token_type_ is Lexer.CID: diff --git a/NEWS.rst b/NEWS.rst index 6c8654d0e..403cb3cec 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -1,3 +1,6 @@ +- [feaLib] Glyph names can have dashes, as per new AFDKO syntax v1.20 (#559). +- [feaLib] feaLib.Parser now needs the font's glyph map for parsing. + 3.6.3 (released 2017-02-06) --------------------------- diff --git a/Tests/feaLib/builder_test.py b/Tests/feaLib/builder_test.py index 8f3e3eea7..f54e02bf4 100644 --- a/Tests/feaLib/builder_test.py +++ b/Tests/feaLib/builder_test.py @@ -134,8 +134,9 @@ class BuilderTest(unittest.TestCase): font[tag].compile(font) def check_fea2fea_file(self, name, base=None, parser=Parser): + font = makeTTFont() fname = (name + ".fea") if '.' not in name else name - p = parser(self.getpath(fname)) + p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap()) doc = p.parse() actual = self.normal_fea(doc.asFea().split("\n")) diff --git a/Tests/feaLib/lexer_test.py b/Tests/feaLib/lexer_test.py index 7e4c249f9..5626eadb5 100644 --- a/Tests/feaLib/lexer_test.py +++ b/Tests/feaLib/lexer_test.py @@ -29,6 +29,7 @@ class LexerTest(unittest.TestCase): self.assertEqual(lex("_"), [(Lexer.NAME, "_")]) self.assertEqual(lex("\\table"), [(Lexer.NAME, "\\table")]) self.assertEqual(lex("a+*:^~!"), [(Lexer.NAME, "a+*:^~!")]) + self.assertEqual(lex("with-dash"), [(Lexer.NAME, "with-dash")]) def test_cid(self): self.assertEqual(lex("\\0 \\987"), [(Lexer.CID, 0), (Lexer.CID, 987)]) @@ -72,6 +73,8 @@ class LexerTest(unittest.TestCase): def test_symbol(self): self.assertEqual(lex("a'"), [(Lexer.NAME, "a"), (Lexer.SYMBOL, "'")]) + self.assertEqual(lex("-A-B"), + [(Lexer.SYMBOL, "-"), (Lexer.NAME, "A-B")]) self.assertEqual( lex("foo - -2"), [(Lexer.NAME, "foo"), (Lexer.SYMBOL, "-"), (Lexer.NUMBER, -2)]) diff --git a/Tests/feaLib/parser_test.py b/Tests/feaLib/parser_test.py index 4fa22371a..91849bf18 100644 --- a/Tests/feaLib/parser_test.py +++ b/Tests/feaLib/parser_test.py @@ -29,6 +29,23 @@ def mapping(s): return dict(zip(b, c)) +def makeGlyphMap(glyphs): + return {g: i for i, g in enumerate(glyphs)} + + +GLYPHMAP = makeGlyphMap((""" + .notdef space A B C D E F G H I J K L M N O P Q R S T U V W X Y Z + A.sc B.sc C.sc D.sc E.sc F.sc G.sc H.sc I.sc J.sc K.sc L.sc M.sc + N.sc O.sc P.sc Q.sc R.sc S.sc T.sc U.sc V.sc W.sc X.sc Y.sc Z.sc + A.swash B.swash X.swash Y.swash Z.swash + a b c d e f g h i j k l m n o p q r s t u v w x y z + a.sc b.sc c.sc d.sc e.sc f.sc g.sc h.sc i.sc j.sc k.sc l.sc m.sc + n.sc o.sc p.sc q.sc r.sc s.sc t.sc u.sc v.sc w.sc x.sc y.sc z.sc + a.swash b.swash x.swash y.swash z.swash + foobar foo.09 foo.1234 foo.9876 +""").split() + ["foo.%d" % i for i in range(1, 200)]) + + class ParserTest(unittest.TestCase): def __init__(self, methodName): unittest.TestCase.__init__(self, methodName) @@ -237,6 +254,33 @@ class ParserTest(unittest.TestCase): self.assertEqual(gc.name, "defg.sc") self.assertEqual(gc.glyphSet(), ("d.sc", "e.sc", "f.sc", "g.sc")) + def test_glyphclass_range_dash(self): + glyphMap = makeGlyphMap("A-foo.sc B-foo.sc C-foo.sc".split()) + [gc] = self.parse("@range = [A-foo.sc-C-foo.sc];", glyphMap).statements + self.assertEqual(gc.glyphSet(), ("A-foo.sc", "B-foo.sc", "C-foo.sc")) + + def test_glyphclass_range_dash_with_space(self): + g = makeGlyphMap("A-foo.sc B-foo.sc C-foo.sc".split()) + [gc] = self.parse("@range = [A-foo.sc - C-foo.sc];", g).statements + self.assertEqual(gc.glyphSet(), ("A-foo.sc", "B-foo.sc", "C-foo.sc")) + + def test_glyphclass_glyph_name_should_win_over_range(self): + # The OpenType Feature File Specification v1.20 makes it clear + # that if a dashed name could be interpreted either as a glyph name + # or as a range, then the semantics should be the single dashed name. + glyphMap = makeGlyphMap( + "A-foo.sc-C-foo.sc A-foo.sc B-foo.sc C-foo.sc".split()) + [gc] = self.parse("@range = [A-foo.sc-C-foo.sc];", glyphMap).statements + self.assertEqual(gc.glyphSet(), ("A-foo.sc-C-foo.sc",)) + + def test_glyphclass_range_dash_ambiguous(self): + glyphMap = makeGlyphMap("A B C A-B B-C".split()) + self.assertRaisesRegex( + FeatureLibError, + 'Ambiguous glyph range "A-B-C"; ' + 'please use "A - B-C" or "A-B - C" to clarify what you mean', + self.parse, r"@bad = [A-B-C];", glyphMap) + def test_glyphclass_range_digit1(self): [gc] = self.parse("@range = [foo.2-foo.5];").statements self.assertEqual(gc.glyphSet(), ("foo.2", "foo.3", "foo.4", "foo.5")) @@ -1114,7 +1158,7 @@ class ParserTest(unittest.TestCase): self.assertEqual(glyphstr(sub.suffix), "Z") def test_substitute_lookups(self): # GSUB LookupType 6 - doc = Parser(self.getpath("spec5fi1.fea")).parse() + doc = Parser(self.getpath("spec5fi1.fea"), GLYPHMAP).parse() [langsys, ligs, sub, feature] = doc.statements self.assertEqual(feature.statements[0].lookups, [ligs, None, sub]) self.assertEqual(feature.statements[1].lookups, [ligs, None, sub]) @@ -1274,9 +1318,9 @@ class ParserTest(unittest.TestCase): doc = self.parse(";;;") self.assertFalse(doc.statements) - def parse(self, text): + def parse(self, text, glyphMap=GLYPHMAP): featurefile = UnicodeIO(text) - return Parser(featurefile).parse() + return Parser(featurefile, glyphMap).parse() @staticmethod def getpath(testfile):