Merge pull request #879 from silnrsi/parsecomments

Add support for comments to parser
This commit is contained in:
Cosimo Lupo 2017-03-08 20:55:16 +00:00 committed by GitHub
commit 857862b94f
6 changed files with 131 additions and 59 deletions

View File

@ -69,6 +69,18 @@ class Expression(object):
pass
class Comment(object):
def __init__(self, location, text):
self.location = location
self.text = text
def build(self, builder):
pass
def asFea(self, indent=""):
return self.text
class GlyphName(Expression):
"""A single glyph name, such as cedilla."""
def __init__(self, location, glyph):

View File

@ -51,7 +51,7 @@ class Lexer(object):
def __next__(self): # Python 3
while True:
token_type, token, location = self.next_()
if token_type not in {Lexer.COMMENT, Lexer.NEWLINE}:
if token_type != Lexer.NEWLINE:
return (token_type, token, location)
def location_(self):
@ -200,7 +200,7 @@ class IncludingLexer(object):
while self.lexers_:
lexer = self.lexers_[-1]
try:
token_type, token, location = lexer.next()
token_type, token, location = next(lexer)
except StopIteration:
self.lexers_.pop()
continue

View File

@ -17,6 +17,7 @@ class Parser(object):
extensions = {}
ast = ast
ignore_comments = True
def __init__(self, featurefile, glyphMap):
self.glyphMap_ = glyphMap
@ -29,15 +30,18 @@ class Parser(object):
self.anchors_, self.valuerecords_
}
self.next_token_type_, self.next_token_ = (None, None)
self.cur_comments_ = []
self.next_token_location_ = None
self.lexer_ = IncludingLexer(featurefile)
self.advance_lexer_()
self.advance_lexer_(comments=True)
def parse(self):
statements = self.doc_.statements
while self.next_token_type_ is not None:
self.advance_lexer_()
if self.cur_token_type_ is Lexer.GLYPHCLASS:
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.cur_token_type_ is Lexer.GLYPHCLASS:
statements.append(self.parse_glyphclass_definition_())
elif self.is_cur_keyword_(("anon", "anonymous")):
statements.append(self.parse_anonymous_())
@ -63,7 +67,7 @@ class Parser(object):
else:
raise FeatureLibError(
"Expected feature, languagesystem, lookup, markClass, "
"table, or glyph class definition",
"table, or glyph class definition, got {} \"{}\"".format(self.cur_token_type_, self.cur_token_),
self.cur_token_location_)
return self.doc_
@ -787,9 +791,11 @@ class Parser(object):
def parse_table_GDEF_(self, table):
statements = table.statements
while self.next_token_ != "}":
self.advance_lexer_()
if self.is_cur_keyword_("Attach"):
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.is_cur_keyword_("Attach"):
statements.append(self.parse_attach_())
elif self.is_cur_keyword_("GlyphClassDef"):
statements.append(self.parse_GlyphClassDef_())
@ -805,9 +811,11 @@ class Parser(object):
def parse_table_head_(self, table):
statements = table.statements
while self.next_token_ != "}":
self.advance_lexer_()
if self.is_cur_keyword_("FontRevision"):
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.is_cur_keyword_("FontRevision"):
statements.append(self.parse_FontRevision_())
else:
raise FeatureLibError("Expected FontRevision",
@ -816,13 +824,17 @@ class Parser(object):
def parse_table_hhea_(self, table):
statements = table.statements
fields = ("CaretOffset", "Ascender", "Descender", "LineGap")
while self.next_token_ != "}":
self.advance_lexer_()
if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
key = self.cur_token_.lower()
value = self.expect_number_()
statements.append(
self.ast.HheaField(self.cur_token_location_, key, value))
if self.next_token_ != ";":
raise FeatureLibError("Incomplete statement", self.next_token_location_)
elif self.cur_token_ == ";":
continue
else:
@ -833,13 +845,17 @@ class Parser(object):
def parse_table_vhea_(self, table):
statements = table.statements
fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap")
while self.next_token_ != "}":
self.advance_lexer_()
if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
key = self.cur_token_.lower()
value = self.expect_number_()
statements.append(
self.ast.VheaField(self.cur_token_location_, key, value))
if self.next_token_ != ";":
raise FeatureLibError("Incomplete statement", self.next_token_location_)
elif self.cur_token_ == ";":
continue
else:
@ -849,9 +865,11 @@ class Parser(object):
def parse_table_name_(self, table):
statements = table.statements
while self.next_token_ != "}":
self.advance_lexer_()
if self.is_cur_keyword_("nameid"):
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.is_cur_keyword_("nameid"):
statement = self.parse_nameid_()
if statement:
statements.append(statement)
@ -930,9 +948,11 @@ class Parser(object):
def parse_table_BASE_(self, table):
statements = table.statements
while self.next_token_ != "}":
self.advance_lexer_()
if self.is_cur_keyword_("HorizAxis.BaseTagList"):
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.is_cur_keyword_("HorizAxis.BaseTagList"):
horiz_bases = self.parse_base_tag_list_()
elif self.is_cur_keyword_("HorizAxis.BaseScriptList"):
horiz_scripts = self.parse_base_script_list_(len(horiz_bases))
@ -955,9 +975,11 @@ class Parser(object):
"winAscent", "winDescent", "XHeight", "CapHeight",
"WeightClass", "WidthClass", "LowerOpSize", "UpperOpSize")
ranges = ("UnicodeRange", "CodePageRange")
while self.next_token_ != "}":
self.advance_lexer_()
if self.cur_token_type_ is Lexer.NAME:
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.cur_token_type_ is Lexer.NAME:
key = self.cur_token_.lower()
value = None
if self.cur_token_ in numbers:
@ -1164,9 +1186,11 @@ class Parser(object):
symtab.enter_scope()
statements = block.statements
while self.next_token_ != "}":
self.advance_lexer_()
if self.cur_token_type_ is Lexer.GLYPHCLASS:
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
self.advance_lexer_(comments=True)
if self.cur_token_type_ is Lexer.COMMENT:
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
elif self.cur_token_type_ is Lexer.GLYPHCLASS:
statements.append(self.parse_glyphclass_definition_())
elif self.is_cur_keyword_("anchorDef"):
statements.append(self.parse_anchordef_())
@ -1208,7 +1232,7 @@ class Parser(object):
continue
else:
raise FeatureLibError(
"Expected glyph class definition or statement",
"Expected glyph class definition or statement: got {} {}".format(self.cur_token_type_, self.cur_token_),
self.cur_token_location_)
self.expect_symbol_("}")
@ -1339,14 +1363,24 @@ class Parser(object):
return self.cur_token_
raise FeatureLibError("Expected a string", self.cur_token_location_)
def advance_lexer_(self):
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
self.next_token_type_, self.next_token_, self.next_token_location_)
try:
(self.next_token_type_, self.next_token_,
self.next_token_location_) = self.lexer_.next()
except StopIteration:
self.next_token_type_, self.next_token_ = (None, None)
def advance_lexer_(self, comments = False):
if not self.ignore_comments and comments and len(self.cur_comments_):
self.cur_token_type_ = Lexer.COMMENT
self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0)
return
else:
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
self.next_token_type_, self.next_token_, self.next_token_location_)
self.cur_comments_ = []
while True:
try:
(self.next_token_type_, self.next_token_,
self.next_token_location_) = next(self.lexer_)
except StopIteration:
self.next_token_type_, self.next_token_ = (None, None)
if self.next_token_type_ != Lexer.COMMENT:
break
self.cur_comments_.append((self.next_token_, self.next_token_location_))
@staticmethod
def reverse_string_(s):

View File

@ -138,22 +138,27 @@ class BuilderTest(unittest.TestCase):
def check_fea2fea_file(self, name, base=None, parser=Parser):
font = makeTTFont()
fname = (name + ".fea") if '.' not in name else name
p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
doc = p.parse()
actual = self.normal_fea(doc.asFea().split("\n"))
temp = parser.ignore_comments
parser.ignore_comments = False
try:
p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
doc = p.parse()
actual = self.normal_fea(doc.asFea().split("\n"))
with open(self.getpath(base or fname), "r", encoding="utf-8") as ofile:
expected = self.normal_fea(ofile.readlines())
with open(self.getpath(base or fname), "r", encoding="utf-8") as ofile:
expected = self.normal_fea(ofile.readlines())
if expected != actual:
fname = name.rsplit(".", 1)[0] + ".fea"
for line in difflib.unified_diff(
expected, actual,
fromfile=fname + " (expected)",
tofile=fname + " (actual)"):
sys.stderr.write(line+"\n")
self.fail("Fea2Fea output is different from expected. "
"Generated:\n{}\n".format("\n".join(actual)))
if expected != actual:
fname = name.rsplit(".", 1)[0] + ".fea"
for line in difflib.unified_diff(
expected, actual,
fromfile=fname + " (expected)",
tofile=fname + " (actual)"):
sys.stderr.write(line+"\n")
self.fail("Fea2Fea output is different from expected. "
"Generated:\n{}\n".format("\n".join(actual)))
finally:
parser.ignore_comments = temp
def normal_fea(self, lines):
output = []

View File

@ -9,7 +9,6 @@ import unittest
def lex(s):
return [(typ, tok) for (typ, tok, _) in Lexer(s, "test.fea")]
class LexerTest(unittest.TestCase):
def __init__(self, methodName):
unittest.TestCase.__init__(self, methodName)
@ -55,6 +54,7 @@ class LexerTest(unittest.TestCase):
])
self.assertEqual(lex("include # Comment\n (foo) \n;"), [
(Lexer.NAME, "include"),
(Lexer.COMMENT, "# Comment"),
(Lexer.FILENAME, "foo"),
(Lexer.SYMBOL, ";")
])
@ -79,9 +79,13 @@ class LexerTest(unittest.TestCase):
lex("foo - -2"),
[(Lexer.NAME, "foo"), (Lexer.SYMBOL, "-"), (Lexer.NUMBER, -2)])
def test_comment(self):
self.assertEqual(lex("# Comment\n#"), [])
#def test_comment(self):
# self.assertEqual(lex("# Comment\n#"), [])
def test_comment_kept(self):
self.assertEqual(lex("# Comment\n#"),
[(Lexer.COMMENT, "# Comment"), (Lexer.COMMENT, "#")])
def test_string(self):
self.assertEqual(lex('"foo" "bar"'),
[(Lexer.STRING, "foo"), (Lexer.STRING, "bar")])
@ -112,7 +116,7 @@ class LexerTest(unittest.TestCase):
def locs(s):
return ["%s:%d:%d" % loc for (_, _, loc) in Lexer(s, "test.fea")]
self.assertEqual(locs("a b # Comment\n12 @x"), [
"test.fea:1:1", "test.fea:1:3", "test.fea:2:1",
"test.fea:1:1", "test.fea:1:3", "test.fea:1:5", "test.fea:2:1",
"test.fea:2:4"
])

View File

@ -55,6 +55,20 @@ class ParserTest(unittest.TestCase):
if not hasattr(self, "assertRaisesRegex"):
self.assertRaisesRegex = self.assertRaisesRegexp
def test_comments(self):
doc = self.parse(
""" # Initial
feature test {
sub A by B; # simple
} test;""", comments=True)
c1 = doc.statements[0]
c2 = doc.statements[1].statements[1]
self.assertEqual(type(c1), ast.Comment)
self.assertEqual(c1.text, "# Initial")
self.assertEqual(type(c2), ast.Comment)
self.assertEqual(c2.text, "# simple")
self.assertEqual(doc.statements[1].name, "test")
def test_anchor_format_a(self):
doc = self.parse(
"feature test {"
@ -1424,9 +1438,12 @@ class ParserTest(unittest.TestCase):
doc = self.parse(";;;")
self.assertFalse(doc.statements)
def parse(self, text, glyphMap=GLYPHMAP):
def parse(self, text, glyphMap=GLYPHMAP, comments=False):
featurefile = UnicodeIO(text)
return Parser(featurefile, glyphMap).parse()
p = Parser(featurefile, glyphMap)
if comments :
p.ignore_comments = False
return p.parse()
@staticmethod
def getpath(testfile):