Merge pull request #879 from silnrsi/parsecomments

Add support for comments to parser
2017-03-08 20:55:16 +00:00 · 2017-03-08 20:55:16 +00:00 · 857862b94f
commit 857862b94f
parent fb0c60cd2c 5906b5358a
6 changed files with 131 additions and 59 deletions
--- a/Lib/fontTools/feaLib/ast.py
+++ b/Lib/fontTools/feaLib/ast.py
@ -69,6 +69,18 @@ class Expression(object):
        pass


+class Comment(object):
+    def __init__(self, location, text):
+        self.location = location
+        self.text = text
+
+    def build(self, builder):
+        pass
+
+    def asFea(self, indent=""):
+        return self.text
+
+
 class GlyphName(Expression):
    """A single glyph name, such as cedilla."""
    def __init__(self, location, glyph):
--- a/Lib/fontTools/feaLib/lexer.py
+++ b/Lib/fontTools/feaLib/lexer.py
@ -51,7 +51,7 @@ class Lexer(object):
    def __next__(self):  # Python 3
        while True:
            token_type, token, location = self.next_()
-            if token_type not in {Lexer.COMMENT, Lexer.NEWLINE}:
+            if token_type != Lexer.NEWLINE:
                return (token_type, token, location)

    def location_(self):
@ -200,7 +200,7 @@ class IncludingLexer(object):
        while self.lexers_:
            lexer = self.lexers_[-1]
            try:
-                token_type, token, location = lexer.next()
+                token_type, token, location = next(lexer)
            except StopIteration:
                self.lexers_.pop()
                continue
--- a/Lib/fontTools/feaLib/parser.py
+++ b/Lib/fontTools/feaLib/parser.py
@ -17,6 +17,7 @@ class Parser(object):

    extensions = {}
    ast = ast
+    ignore_comments = True

    def __init__(self, featurefile, glyphMap):
        self.glyphMap_ = glyphMap
@ -29,15 +30,18 @@ class Parser(object):
            self.anchors_, self.valuerecords_
        }
        self.next_token_type_, self.next_token_ = (None, None)
+        self.cur_comments_ = []
        self.next_token_location_ = None
        self.lexer_ = IncludingLexer(featurefile)
-        self.advance_lexer_()
+        self.advance_lexer_(comments=True)

    def parse(self):
        statements = self.doc_.statements
        while self.next_token_type_ is not None:
-            self.advance_lexer_()
-            if self.cur_token_type_ is Lexer.GLYPHCLASS:
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
                statements.append(self.parse_glyphclass_definition_())
            elif self.is_cur_keyword_(("anon", "anonymous")):
                statements.append(self.parse_anonymous_())
@ -63,7 +67,7 @@ class Parser(object):
            else:
                raise FeatureLibError(
                    "Expected feature, languagesystem, lookup, markClass, "
-                    "table, or glyph class definition",
+                    "table, or glyph class definition, got {} \"{}\"".format(self.cur_token_type_, self.cur_token_),
                    self.cur_token_location_)
        return self.doc_

@ -787,9 +791,11 @@ class Parser(object):

    def parse_table_GDEF_(self, table):
        statements = table.statements
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.is_cur_keyword_("Attach"):
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.is_cur_keyword_("Attach"):
                statements.append(self.parse_attach_())
            elif self.is_cur_keyword_("GlyphClassDef"):
                statements.append(self.parse_GlyphClassDef_())
@ -805,9 +811,11 @@ class Parser(object):

    def parse_table_head_(self, table):
        statements = table.statements
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.is_cur_keyword_("FontRevision"):
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.is_cur_keyword_("FontRevision"):
                statements.append(self.parse_FontRevision_())
            else:
                raise FeatureLibError("Expected FontRevision",
@ -816,13 +824,17 @@ class Parser(object):
    def parse_table_hhea_(self, table):
        statements = table.statements
        fields = ("CaretOffset", "Ascender", "Descender", "LineGap")
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
                key = self.cur_token_.lower()
                value = self.expect_number_()
                statements.append(
                    self.ast.HheaField(self.cur_token_location_, key, value))
+                if self.next_token_ != ";":
+                    raise FeatureLibError("Incomplete statement", self.next_token_location_)
            elif self.cur_token_ == ";":
                continue
            else:
@ -833,13 +845,17 @@ class Parser(object):
    def parse_table_vhea_(self, table):
        statements = table.statements
        fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap")
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
                key = self.cur_token_.lower()
                value = self.expect_number_()
                statements.append(
                    self.ast.VheaField(self.cur_token_location_, key, value))
+                if self.next_token_ != ";":
+                    raise FeatureLibError("Incomplete statement", self.next_token_location_)
            elif self.cur_token_ == ";":
                continue
            else:
@ -849,9 +865,11 @@ class Parser(object):

    def parse_table_name_(self, table):
        statements = table.statements
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.is_cur_keyword_("nameid"):
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.is_cur_keyword_("nameid"):
                statement = self.parse_nameid_()
                if statement:
                    statements.append(statement)
@ -930,9 +948,11 @@ class Parser(object):

    def parse_table_BASE_(self, table):
        statements = table.statements
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.is_cur_keyword_("HorizAxis.BaseTagList"):
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.is_cur_keyword_("HorizAxis.BaseTagList"):
                horiz_bases = self.parse_base_tag_list_()
            elif self.is_cur_keyword_("HorizAxis.BaseScriptList"):
                horiz_scripts = self.parse_base_script_list_(len(horiz_bases))
@ -955,9 +975,11 @@ class Parser(object):
                   "winAscent", "winDescent", "XHeight", "CapHeight",
                   "WeightClass", "WidthClass", "LowerOpSize", "UpperOpSize")
        ranges = ("UnicodeRange", "CodePageRange")
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.cur_token_type_ is Lexer.NAME:
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.cur_token_type_ is Lexer.NAME:
                key = self.cur_token_.lower()
                value = None
                if self.cur_token_ in numbers:
@ -1164,9 +1186,11 @@ class Parser(object):
            symtab.enter_scope()

        statements = block.statements
-        while self.next_token_ != "}":
-            self.advance_lexer_()
-            if self.cur_token_type_ is Lexer.GLYPHCLASS:
+        while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
+            self.advance_lexer_(comments=True)
+            if self.cur_token_type_ is Lexer.COMMENT:
+                statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
+            elif self.cur_token_type_ is Lexer.GLYPHCLASS:
                statements.append(self.parse_glyphclass_definition_())
            elif self.is_cur_keyword_("anchorDef"):
                statements.append(self.parse_anchordef_())
@ -1208,7 +1232,7 @@ class Parser(object):
                continue
            else:
                raise FeatureLibError(
-                    "Expected glyph class definition or statement",
+                    "Expected glyph class definition or statement: got {} {}".format(self.cur_token_type_, self.cur_token_),
                    self.cur_token_location_)

        self.expect_symbol_("}")
@ -1339,14 +1363,24 @@ class Parser(object):
            return self.cur_token_
        raise FeatureLibError("Expected a string", self.cur_token_location_)

-    def advance_lexer_(self):
-        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
-            self.next_token_type_, self.next_token_, self.next_token_location_)
-        try:
-            (self.next_token_type_, self.next_token_,
-             self.next_token_location_) = self.lexer_.next()
-        except StopIteration:
-            self.next_token_type_, self.next_token_ = (None, None)
+    def advance_lexer_(self, comments = False):
+        if not self.ignore_comments and comments and len(self.cur_comments_):
+            self.cur_token_type_ = Lexer.COMMENT
+            self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0)
+            return
+        else:
+            self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
+                self.next_token_type_, self.next_token_, self.next_token_location_)
+            self.cur_comments_ = []
+        while True:
+            try:
+                (self.next_token_type_, self.next_token_,
+                 self.next_token_location_) = next(self.lexer_)
+            except StopIteration:
+                self.next_token_type_, self.next_token_ = (None, None)
+            if self.next_token_type_ != Lexer.COMMENT:
+                break
+            self.cur_comments_.append((self.next_token_, self.next_token_location_))

    @staticmethod
    def reverse_string_(s):
--- a/Tests/feaLib/builder_test.py
+++ b/Tests/feaLib/builder_test.py
@ -138,22 +138,27 @@ class BuilderTest(unittest.TestCase):
    def check_fea2fea_file(self, name, base=None, parser=Parser):
        font = makeTTFont()
        fname = (name + ".fea") if '.' not in name else name
-        p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
-        doc = p.parse()
-        actual = self.normal_fea(doc.asFea().split("\n"))
+        temp = parser.ignore_comments
+        parser.ignore_comments = False
+        try:
+            p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
+            doc = p.parse()
+            actual = self.normal_fea(doc.asFea().split("\n"))

-        with open(self.getpath(base or fname), "r", encoding="utf-8") as ofile:
-            expected = self.normal_fea(ofile.readlines())
+            with open(self.getpath(base or fname), "r", encoding="utf-8") as ofile:
+                expected = self.normal_fea(ofile.readlines())

-        if expected != actual:
-            fname = name.rsplit(".", 1)[0] + ".fea"
-            for line in difflib.unified_diff(
-                    expected, actual,
-                    fromfile=fname + " (expected)",
-                    tofile=fname + " (actual)"):
-                sys.stderr.write(line+"\n")
-            self.fail("Fea2Fea output is different from expected. "
-                      "Generated:\n{}\n".format("\n".join(actual)))
+            if expected != actual:
+                fname = name.rsplit(".", 1)[0] + ".fea"
+                for line in difflib.unified_diff(
+                        expected, actual,
+                        fromfile=fname + " (expected)",
+                        tofile=fname + " (actual)"):
+                    sys.stderr.write(line+"\n")
+                self.fail("Fea2Fea output is different from expected. "
+                          "Generated:\n{}\n".format("\n".join(actual)))
+        finally:
+            parser.ignore_comments = temp

    def normal_fea(self, lines):
        output = []
--- a/Tests/feaLib/lexer_test.py
+++ b/Tests/feaLib/lexer_test.py
@ -9,7 +9,6 @@ import unittest
 def lex(s):
    return [(typ, tok) for (typ, tok, _) in Lexer(s, "test.fea")]

-
 class LexerTest(unittest.TestCase):
    def __init__(self, methodName):
        unittest.TestCase.__init__(self, methodName)
@ -55,6 +54,7 @@ class LexerTest(unittest.TestCase):
        ])
        self.assertEqual(lex("include # Comment\n    (foo) \n;"), [
            (Lexer.NAME, "include"),
+            (Lexer.COMMENT, "# Comment"),
            (Lexer.FILENAME, "foo"),
            (Lexer.SYMBOL, ";")
        ])
@ -79,9 +79,13 @@ class LexerTest(unittest.TestCase):
            lex("foo - -2"),
            [(Lexer.NAME, "foo"), (Lexer.SYMBOL, "-"), (Lexer.NUMBER, -2)])

-    def test_comment(self):
-        self.assertEqual(lex("# Comment\n#"), [])
+    #def test_comment(self):
+    #    self.assertEqual(lex("# Comment\n#"), [])

+    def test_comment_kept(self):
+        self.assertEqual(lex("# Comment\n#"),
+                         [(Lexer.COMMENT, "# Comment"), (Lexer.COMMENT, "#")])
+            
    def test_string(self):
        self.assertEqual(lex('"foo" "bar"'),
                         [(Lexer.STRING, "foo"), (Lexer.STRING, "bar")])
@ -112,7 +116,7 @@ class LexerTest(unittest.TestCase):
        def locs(s):
            return ["%s:%d:%d" % loc for (_, _, loc) in Lexer(s, "test.fea")]
        self.assertEqual(locs("a b # Comment\n12 @x"), [
-            "test.fea:1:1", "test.fea:1:3", "test.fea:2:1",
+            "test.fea:1:1", "test.fea:1:3", "test.fea:1:5", "test.fea:2:1",
            "test.fea:2:4"
        ])

--- a/Tests/feaLib/parser_test.py
+++ b/Tests/feaLib/parser_test.py
@ -55,6 +55,20 @@ class ParserTest(unittest.TestCase):
        if not hasattr(self, "assertRaisesRegex"):
            self.assertRaisesRegex = self.assertRaisesRegexp

+    def test_comments(self):
+        doc = self.parse(
+            """ # Initial
+                feature test {
+                    sub A by B; # simple
+                } test;""", comments=True)
+        c1 = doc.statements[0]
+        c2 = doc.statements[1].statements[1]
+        self.assertEqual(type(c1), ast.Comment)
+        self.assertEqual(c1.text, "# Initial")
+        self.assertEqual(type(c2), ast.Comment)
+        self.assertEqual(c2.text, "# simple")
+        self.assertEqual(doc.statements[1].name, "test")
+
    def test_anchor_format_a(self):
        doc = self.parse(
            "feature test {"
@ -1424,9 +1438,12 @@ class ParserTest(unittest.TestCase):
        doc = self.parse(";;;")
        self.assertFalse(doc.statements)

-    def parse(self, text, glyphMap=GLYPHMAP):
+    def parse(self, text, glyphMap=GLYPHMAP, comments=False):
        featurefile = UnicodeIO(text)
-        return Parser(featurefile, glyphMap).parse()
+        p = Parser(featurefile, glyphMap)
+        if comments :
+            p.ignore_comments = False
+        return p.parse()

    @staticmethod
    def getpath(testfile):