From b31ed09421ff9f44d36678480737a54ad8d68bd9 Mon Sep 17 00:00:00 2001
From: Sascha Brawer <sascha@brawer.ch>
Date: Sat, 11 Feb 2017 12:05:01 +0100
Subject: [PATCH] Support glyph names with dashes

The OpenType Feature File Syntax has been changed to support dashes:
https://github.com/adobe-type-tools/afdko/issues/152

Resolves https://github.com/fonttools/fonttools/issues/559.
Needed for https://github.com/googlei18n/fontmake/issues/249.
---
 Lib/fontTools/feaLib/builder.py |  2 +-
 Lib/fontTools/feaLib/lexer.py   |  2 +-
 Lib/fontTools/feaLib/parser.py  | 61 ++++++++++++++++++++++++++++-----
 NEWS.rst                        |  3 ++
 Tests/feaLib/builder_test.py    |  3 +-
 Tests/feaLib/lexer_test.py      |  3 ++
 Tests/feaLib/parser_test.py     | 50 +++++++++++++++++++++++++--
 7 files changed, 110 insertions(+), 14 deletions(-)

diff --git a/Lib/fontTools/feaLib/builder.py b/Lib/fontTools/feaLib/builder.py
index 27480db24..7f7250fd3 100644
--- a/Lib/fontTools/feaLib/builder.py
+++ b/Lib/fontTools/feaLib/builder.py
@@ -75,7 +75,7 @@ class Builder(object):
         self.vhea_ = {}
 
     def build(self):
-        self.parseTree = Parser(self.file).parse()
+        self.parseTree = Parser(self.file, self.glyphMap).parse()
         self.parseTree.build(self)
         self.build_feature_aalt_()
         self.build_head()
diff --git a/Lib/fontTools/feaLib/lexer.py b/Lib/fontTools/feaLib/lexer.py
index f11c93385..0c29ce895 100644
--- a/Lib/fontTools/feaLib/lexer.py
+++ b/Lib/fontTools/feaLib/lexer.py
@@ -26,7 +26,7 @@ class Lexer(object):
     CHAR_HEXDIGIT_ = "0123456789ABCDEFabcdef"
     CHAR_LETTER_ = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
     CHAR_NAME_START_ = CHAR_LETTER_ + "_+*:.^~!\\"
-    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/"
+    CHAR_NAME_CONTINUATION_ = CHAR_LETTER_ + CHAR_DIGIT_ + "_.+*:^~!/-"
 
     RE_GLYPHCLASS = re.compile(r"^[A-Za-z_0-9.]+$")
 
diff --git a/Lib/fontTools/feaLib/parser.py b/Lib/fontTools/feaLib/parser.py
index 7b13008ec..23cabab40 100644
--- a/Lib/fontTools/feaLib/parser.py
+++ b/Lib/fontTools/feaLib/parser.py
@@ -17,7 +17,8 @@ class Parser(object):
     extensions = {}
     ast = ast
 
-    def __init__(self, featurefile):
+    def __init__(self, featurefile, glyphMap):
+        self.glyphMap_ = glyphMap
         self.doc_ = self.ast.FeatureFile()
         self.anchors_ = SymbolTable()
         self.glyphclasses_ = SymbolTable()
@@ -193,6 +194,45 @@ class Parser(object):
         self.glyphclasses_.define(name, glyphclass)
         return glyphclass
 
+    def split_glyph_range_(self, name, location):
+        # Since v1.20, the OpenType Feature File specification allows
+        # for dashes in glyph names. A sequence like "a-b-c-d" could
+        # therefore mean a single glyph whose name happens to be
+        # "a-b-c-d", or it could mean a range from glyph "a" to glyph
+        # "b-c-d", or a range from glyph "a-b" to glyph "c-d", or a
+        # range from glyph "a-b-c" to glyph "d".Technically, this
+        # example could be resolved because the (pretty complex)
+        # definition of glyph ranges renders most of these splits
+        # invalid. But the specification does not say that a compiler
+        # should try to apply such fancy heuristics. To encourage
+        # unambiguous feature files, we therefore try all possible
+        # splits and reject the feature file if there are multiple
+        # splits possible. It is intentional that we don't just emit a
+        # warning; warnings tend to get ignored. To fix the problem,
+        # font designers can trivially add spaces around the intended
+        # split point, and we emit a compiler error that suggests
+        # how exactly the source should be rewritten to make things
+        # unambiguous.
+        parts = name.split("-")
+        solutions = []
+        for i in range(len(parts)):
+            start, limit = "-".join(parts[0:i]), "-".join(parts[i:])
+            if start in self.glyphMap_ and limit in self.glyphMap_:
+                solutions.append((start, limit))
+        if len(solutions) == 1:
+            start, limit = solutions[0]
+            return start, limit
+        elif len(solutions) == 0:
+            raise FeatureLibError(
+                "\"%s\" is not a glyph in the font, and it can not be split "
+                "into a range of known glyphs" % name, location)
+        else:
+            ranges = " or ".join(["\"%s - %s\"" % (s, l) for s, l in solutions])
+            raise FeatureLibError(
+                "Ambiguous glyph range \"%s\"; "
+                "please use %s to clarify what you mean" % (name, ranges),
+                location)
+
     def parse_glyphclass_(self, accept_glyphname):
         if (accept_glyphname and
                 self.next_token_type_ in (Lexer.NAME, Lexer.CID)):
@@ -216,14 +256,19 @@ class Parser(object):
         while self.next_token_ != "]":
             if self.next_token_type_ is Lexer.NAME:
                 glyph = self.expect_glyph_()
-                if self.next_token_ == "-":
-                    range_location = self.cur_token_location_
-                    range_start = glyph
+                location = self.cur_token_location_
+                if '-' in glyph and glyph not in self.glyphMap_:
+                    start, limit = self.split_glyph_range_(glyph, location)
+                    glyphs.add_range(
+                        start, limit,
+                        self.make_glyph_range_(location, start, limit))
+                elif self.next_token_ == "-":
+                    start = glyph
                     self.expect_symbol_("-")
-                    range_end = self.expect_glyph_()
-                    glyphs.add_range(range_start, range_end,
-                                     self.make_glyph_range_(range_location,
-                                                            range_start, range_end))
+                    limit = self.expect_glyph_()
+                    glyphs.add_range(
+                        start, limit,
+                        self.make_glyph_range_(location, start, limit))
                 else:
                     glyphs.append(glyph)
             elif self.next_token_type_ is Lexer.CID:
diff --git a/NEWS.rst b/NEWS.rst
index 6c8654d0e..403cb3cec 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -1,3 +1,6 @@
+- [feaLib] Glyph names can have dashes, as per new AFDKO syntax v1.20 (#559).
+- [feaLib] feaLib.Parser now needs the font's glyph map for parsing.
+
 3.6.3 (released 2017-02-06)
 ---------------------------
 
diff --git a/Tests/feaLib/builder_test.py b/Tests/feaLib/builder_test.py
index 8f3e3eea7..f54e02bf4 100644
--- a/Tests/feaLib/builder_test.py
+++ b/Tests/feaLib/builder_test.py
@@ -134,8 +134,9 @@ class BuilderTest(unittest.TestCase):
                 font[tag].compile(font)
 
     def check_fea2fea_file(self, name, base=None, parser=Parser):
+        font = makeTTFont()
         fname = (name + ".fea") if '.' not in name else name
-        p = parser(self.getpath(fname))
+        p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
         doc = p.parse()
         actual = self.normal_fea(doc.asFea().split("\n"))
 
diff --git a/Tests/feaLib/lexer_test.py b/Tests/feaLib/lexer_test.py
index 7e4c249f9..5626eadb5 100644
--- a/Tests/feaLib/lexer_test.py
+++ b/Tests/feaLib/lexer_test.py
@@ -29,6 +29,7 @@ class LexerTest(unittest.TestCase):
         self.assertEqual(lex("_"), [(Lexer.NAME, "_")])
         self.assertEqual(lex("\\table"), [(Lexer.NAME, "\\table")])
         self.assertEqual(lex("a+*:^~!"), [(Lexer.NAME, "a+*:^~!")])
+        self.assertEqual(lex("with-dash"), [(Lexer.NAME, "with-dash")])
 
     def test_cid(self):
         self.assertEqual(lex("\\0 \\987"), [(Lexer.CID, 0), (Lexer.CID, 987)])
@@ -72,6 +73,8 @@ class LexerTest(unittest.TestCase):
 
     def test_symbol(self):
         self.assertEqual(lex("a'"), [(Lexer.NAME, "a"), (Lexer.SYMBOL, "'")])
+        self.assertEqual(lex("-A-B"),
+                         [(Lexer.SYMBOL, "-"), (Lexer.NAME, "A-B")])
         self.assertEqual(
             lex("foo - -2"),
             [(Lexer.NAME, "foo"), (Lexer.SYMBOL, "-"), (Lexer.NUMBER, -2)])
diff --git a/Tests/feaLib/parser_test.py b/Tests/feaLib/parser_test.py
index 4fa22371a..91849bf18 100644
--- a/Tests/feaLib/parser_test.py
+++ b/Tests/feaLib/parser_test.py
@@ -29,6 +29,23 @@ def mapping(s):
     return dict(zip(b, c))
 
 
+def makeGlyphMap(glyphs):
+    return {g: i for i, g in enumerate(glyphs)}
+
+
+GLYPHMAP = makeGlyphMap(("""
+    .notdef space A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
+    A.sc B.sc C.sc D.sc E.sc F.sc G.sc H.sc I.sc J.sc K.sc L.sc M.sc
+    N.sc O.sc P.sc Q.sc R.sc S.sc T.sc U.sc V.sc W.sc X.sc Y.sc Z.sc
+    A.swash B.swash X.swash Y.swash Z.swash
+    a b c d e f g h i j k l m n o p q r s t u v w x y z
+    a.sc b.sc c.sc d.sc e.sc f.sc g.sc h.sc i.sc j.sc k.sc l.sc m.sc
+    n.sc o.sc p.sc q.sc r.sc s.sc t.sc u.sc v.sc w.sc x.sc y.sc z.sc
+    a.swash b.swash x.swash y.swash z.swash
+    foobar foo.09 foo.1234 foo.9876
+""").split() + ["foo.%d" % i for i in range(1, 200)])
+
+
 class ParserTest(unittest.TestCase):
     def __init__(self, methodName):
         unittest.TestCase.__init__(self, methodName)
@@ -237,6 +254,33 @@ class ParserTest(unittest.TestCase):
         self.assertEqual(gc.name, "defg.sc")
         self.assertEqual(gc.glyphSet(), ("d.sc", "e.sc", "f.sc", "g.sc"))
 
+    def test_glyphclass_range_dash(self):
+        glyphMap = makeGlyphMap("A-foo.sc B-foo.sc C-foo.sc".split())
+        [gc] = self.parse("@range = [A-foo.sc-C-foo.sc];", glyphMap).statements
+        self.assertEqual(gc.glyphSet(), ("A-foo.sc", "B-foo.sc", "C-foo.sc"))
+
+    def test_glyphclass_range_dash_with_space(self):
+        g = makeGlyphMap("A-foo.sc B-foo.sc C-foo.sc".split())
+        [gc] = self.parse("@range = [A-foo.sc - C-foo.sc];", g).statements
+        self.assertEqual(gc.glyphSet(), ("A-foo.sc", "B-foo.sc", "C-foo.sc"))
+
+    def test_glyphclass_glyph_name_should_win_over_range(self):
+        # The OpenType Feature File Specification v1.20 makes it clear
+        # that if a dashed name could be interpreted either as a glyph name
+        # or as a range, then the semantics should be the single dashed name.
+        glyphMap = makeGlyphMap(
+            "A-foo.sc-C-foo.sc A-foo.sc B-foo.sc C-foo.sc".split())
+        [gc] = self.parse("@range = [A-foo.sc-C-foo.sc];", glyphMap).statements
+        self.assertEqual(gc.glyphSet(), ("A-foo.sc-C-foo.sc",))
+
+    def test_glyphclass_range_dash_ambiguous(self):
+        glyphMap = makeGlyphMap("A B C A-B B-C".split())
+        self.assertRaisesRegex(
+            FeatureLibError,
+            'Ambiguous glyph range "A-B-C"; '
+            'please use "A - B-C" or "A-B - C" to clarify what you mean',
+            self.parse, r"@bad = [A-B-C];", glyphMap)
+
     def test_glyphclass_range_digit1(self):
         [gc] = self.parse("@range = [foo.2-foo.5];").statements
         self.assertEqual(gc.glyphSet(), ("foo.2", "foo.3", "foo.4", "foo.5"))
@@ -1114,7 +1158,7 @@ class ParserTest(unittest.TestCase):
         self.assertEqual(glyphstr(sub.suffix), "Z")
 
     def test_substitute_lookups(self):  # GSUB LookupType 6
-        doc = Parser(self.getpath("spec5fi1.fea")).parse()
+        doc = Parser(self.getpath("spec5fi1.fea"), GLYPHMAP).parse()
         [langsys, ligs, sub, feature] = doc.statements
         self.assertEqual(feature.statements[0].lookups, [ligs, None, sub])
         self.assertEqual(feature.statements[1].lookups, [ligs, None, sub])
@@ -1274,9 +1318,9 @@ class ParserTest(unittest.TestCase):
         doc = self.parse(";;;")
         self.assertFalse(doc.statements)
 
-    def parse(self, text):
+    def parse(self, text, glyphMap=GLYPHMAP):
         featurefile = UnicodeIO(text)
-        return Parser(featurefile).parse()
+        return Parser(featurefile, glyphMap).parse()
 
     @staticmethod
     def getpath(testfile):