Merge pull request #879 from silnrsi/parsecomments
Add support for comments to parser
This commit is contained in:
commit
857862b94f
@ -69,6 +69,18 @@ class Expression(object):
|
||||
pass
|
||||
|
||||
|
||||
class Comment(object):
|
||||
def __init__(self, location, text):
|
||||
self.location = location
|
||||
self.text = text
|
||||
|
||||
def build(self, builder):
|
||||
pass
|
||||
|
||||
def asFea(self, indent=""):
|
||||
return self.text
|
||||
|
||||
|
||||
class GlyphName(Expression):
|
||||
"""A single glyph name, such as cedilla."""
|
||||
def __init__(self, location, glyph):
|
||||
|
@ -51,7 +51,7 @@ class Lexer(object):
|
||||
def __next__(self): # Python 3
|
||||
while True:
|
||||
token_type, token, location = self.next_()
|
||||
if token_type not in {Lexer.COMMENT, Lexer.NEWLINE}:
|
||||
if token_type != Lexer.NEWLINE:
|
||||
return (token_type, token, location)
|
||||
|
||||
def location_(self):
|
||||
@ -200,7 +200,7 @@ class IncludingLexer(object):
|
||||
while self.lexers_:
|
||||
lexer = self.lexers_[-1]
|
||||
try:
|
||||
token_type, token, location = lexer.next()
|
||||
token_type, token, location = next(lexer)
|
||||
except StopIteration:
|
||||
self.lexers_.pop()
|
||||
continue
|
||||
|
@ -17,6 +17,7 @@ class Parser(object):
|
||||
|
||||
extensions = {}
|
||||
ast = ast
|
||||
ignore_comments = True
|
||||
|
||||
def __init__(self, featurefile, glyphMap):
|
||||
self.glyphMap_ = glyphMap
|
||||
@ -29,15 +30,18 @@ class Parser(object):
|
||||
self.anchors_, self.valuerecords_
|
||||
}
|
||||
self.next_token_type_, self.next_token_ = (None, None)
|
||||
self.cur_comments_ = []
|
||||
self.next_token_location_ = None
|
||||
self.lexer_ = IncludingLexer(featurefile)
|
||||
self.advance_lexer_()
|
||||
self.advance_lexer_(comments=True)
|
||||
|
||||
def parse(self):
|
||||
statements = self.doc_.statements
|
||||
while self.next_token_type_ is not None:
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.GLYPHCLASS:
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.cur_token_type_ is Lexer.GLYPHCLASS:
|
||||
statements.append(self.parse_glyphclass_definition_())
|
||||
elif self.is_cur_keyword_(("anon", "anonymous")):
|
||||
statements.append(self.parse_anonymous_())
|
||||
@ -63,7 +67,7 @@ class Parser(object):
|
||||
else:
|
||||
raise FeatureLibError(
|
||||
"Expected feature, languagesystem, lookup, markClass, "
|
||||
"table, or glyph class definition",
|
||||
"table, or glyph class definition, got {} \"{}\"".format(self.cur_token_type_, self.cur_token_),
|
||||
self.cur_token_location_)
|
||||
return self.doc_
|
||||
|
||||
@ -787,9 +791,11 @@ class Parser(object):
|
||||
|
||||
def parse_table_GDEF_(self, table):
|
||||
statements = table.statements
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.is_cur_keyword_("Attach"):
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.is_cur_keyword_("Attach"):
|
||||
statements.append(self.parse_attach_())
|
||||
elif self.is_cur_keyword_("GlyphClassDef"):
|
||||
statements.append(self.parse_GlyphClassDef_())
|
||||
@ -805,9 +811,11 @@ class Parser(object):
|
||||
|
||||
def parse_table_head_(self, table):
|
||||
statements = table.statements
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.is_cur_keyword_("FontRevision"):
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.is_cur_keyword_("FontRevision"):
|
||||
statements.append(self.parse_FontRevision_())
|
||||
else:
|
||||
raise FeatureLibError("Expected FontRevision",
|
||||
@ -816,13 +824,17 @@ class Parser(object):
|
||||
def parse_table_hhea_(self, table):
|
||||
statements = table.statements
|
||||
fields = ("CaretOffset", "Ascender", "Descender", "LineGap")
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
|
||||
key = self.cur_token_.lower()
|
||||
value = self.expect_number_()
|
||||
statements.append(
|
||||
self.ast.HheaField(self.cur_token_location_, key, value))
|
||||
if self.next_token_ != ";":
|
||||
raise FeatureLibError("Incomplete statement", self.next_token_location_)
|
||||
elif self.cur_token_ == ";":
|
||||
continue
|
||||
else:
|
||||
@ -833,13 +845,17 @@ class Parser(object):
|
||||
def parse_table_vhea_(self, table):
|
||||
statements = table.statements
|
||||
fields = ("VertTypoAscender", "VertTypoDescender", "VertTypoLineGap")
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.cur_token_type_ is Lexer.NAME and self.cur_token_ in fields:
|
||||
key = self.cur_token_.lower()
|
||||
value = self.expect_number_()
|
||||
statements.append(
|
||||
self.ast.VheaField(self.cur_token_location_, key, value))
|
||||
if self.next_token_ != ";":
|
||||
raise FeatureLibError("Incomplete statement", self.next_token_location_)
|
||||
elif self.cur_token_ == ";":
|
||||
continue
|
||||
else:
|
||||
@ -849,9 +865,11 @@ class Parser(object):
|
||||
|
||||
def parse_table_name_(self, table):
|
||||
statements = table.statements
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.is_cur_keyword_("nameid"):
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.is_cur_keyword_("nameid"):
|
||||
statement = self.parse_nameid_()
|
||||
if statement:
|
||||
statements.append(statement)
|
||||
@ -930,9 +948,11 @@ class Parser(object):
|
||||
|
||||
def parse_table_BASE_(self, table):
|
||||
statements = table.statements
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.is_cur_keyword_("HorizAxis.BaseTagList"):
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.is_cur_keyword_("HorizAxis.BaseTagList"):
|
||||
horiz_bases = self.parse_base_tag_list_()
|
||||
elif self.is_cur_keyword_("HorizAxis.BaseScriptList"):
|
||||
horiz_scripts = self.parse_base_script_list_(len(horiz_bases))
|
||||
@ -955,9 +975,11 @@ class Parser(object):
|
||||
"winAscent", "winDescent", "XHeight", "CapHeight",
|
||||
"WeightClass", "WidthClass", "LowerOpSize", "UpperOpSize")
|
||||
ranges = ("UnicodeRange", "CodePageRange")
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.NAME:
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.cur_token_type_ is Lexer.NAME:
|
||||
key = self.cur_token_.lower()
|
||||
value = None
|
||||
if self.cur_token_ in numbers:
|
||||
@ -1164,9 +1186,11 @@ class Parser(object):
|
||||
symtab.enter_scope()
|
||||
|
||||
statements = block.statements
|
||||
while self.next_token_ != "}":
|
||||
self.advance_lexer_()
|
||||
if self.cur_token_type_ is Lexer.GLYPHCLASS:
|
||||
while self.next_token_ != "}" or (not self.ignore_comments and len(self.cur_comments_)):
|
||||
self.advance_lexer_(comments=True)
|
||||
if self.cur_token_type_ is Lexer.COMMENT:
|
||||
statements.append(self.ast.Comment(self.cur_token_location_, self.cur_token_))
|
||||
elif self.cur_token_type_ is Lexer.GLYPHCLASS:
|
||||
statements.append(self.parse_glyphclass_definition_())
|
||||
elif self.is_cur_keyword_("anchorDef"):
|
||||
statements.append(self.parse_anchordef_())
|
||||
@ -1208,7 +1232,7 @@ class Parser(object):
|
||||
continue
|
||||
else:
|
||||
raise FeatureLibError(
|
||||
"Expected glyph class definition or statement",
|
||||
"Expected glyph class definition or statement: got {} {}".format(self.cur_token_type_, self.cur_token_),
|
||||
self.cur_token_location_)
|
||||
|
||||
self.expect_symbol_("}")
|
||||
@ -1339,14 +1363,24 @@ class Parser(object):
|
||||
return self.cur_token_
|
||||
raise FeatureLibError("Expected a string", self.cur_token_location_)
|
||||
|
||||
def advance_lexer_(self):
|
||||
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
|
||||
self.next_token_type_, self.next_token_, self.next_token_location_)
|
||||
try:
|
||||
(self.next_token_type_, self.next_token_,
|
||||
self.next_token_location_) = self.lexer_.next()
|
||||
except StopIteration:
|
||||
self.next_token_type_, self.next_token_ = (None, None)
|
||||
def advance_lexer_(self, comments = False):
|
||||
if not self.ignore_comments and comments and len(self.cur_comments_):
|
||||
self.cur_token_type_ = Lexer.COMMENT
|
||||
self.cur_token_, self.cur_token_location_ = self.cur_comments_.pop(0)
|
||||
return
|
||||
else:
|
||||
self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
|
||||
self.next_token_type_, self.next_token_, self.next_token_location_)
|
||||
self.cur_comments_ = []
|
||||
while True:
|
||||
try:
|
||||
(self.next_token_type_, self.next_token_,
|
||||
self.next_token_location_) = next(self.lexer_)
|
||||
except StopIteration:
|
||||
self.next_token_type_, self.next_token_ = (None, None)
|
||||
if self.next_token_type_ != Lexer.COMMENT:
|
||||
break
|
||||
self.cur_comments_.append((self.next_token_, self.next_token_location_))
|
||||
|
||||
@staticmethod
|
||||
def reverse_string_(s):
|
||||
|
@ -138,22 +138,27 @@ class BuilderTest(unittest.TestCase):
|
||||
def check_fea2fea_file(self, name, base=None, parser=Parser):
|
||||
font = makeTTFont()
|
||||
fname = (name + ".fea") if '.' not in name else name
|
||||
p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
|
||||
doc = p.parse()
|
||||
actual = self.normal_fea(doc.asFea().split("\n"))
|
||||
temp = parser.ignore_comments
|
||||
parser.ignore_comments = False
|
||||
try:
|
||||
p = parser(self.getpath(fname), glyphMap=font.getReverseGlyphMap())
|
||||
doc = p.parse()
|
||||
actual = self.normal_fea(doc.asFea().split("\n"))
|
||||
|
||||
with open(self.getpath(base or fname), "r", encoding="utf-8") as ofile:
|
||||
expected = self.normal_fea(ofile.readlines())
|
||||
with open(self.getpath(base or fname), "r", encoding="utf-8") as ofile:
|
||||
expected = self.normal_fea(ofile.readlines())
|
||||
|
||||
if expected != actual:
|
||||
fname = name.rsplit(".", 1)[0] + ".fea"
|
||||
for line in difflib.unified_diff(
|
||||
expected, actual,
|
||||
fromfile=fname + " (expected)",
|
||||
tofile=fname + " (actual)"):
|
||||
sys.stderr.write(line+"\n")
|
||||
self.fail("Fea2Fea output is different from expected. "
|
||||
"Generated:\n{}\n".format("\n".join(actual)))
|
||||
if expected != actual:
|
||||
fname = name.rsplit(".", 1)[0] + ".fea"
|
||||
for line in difflib.unified_diff(
|
||||
expected, actual,
|
||||
fromfile=fname + " (expected)",
|
||||
tofile=fname + " (actual)"):
|
||||
sys.stderr.write(line+"\n")
|
||||
self.fail("Fea2Fea output is different from expected. "
|
||||
"Generated:\n{}\n".format("\n".join(actual)))
|
||||
finally:
|
||||
parser.ignore_comments = temp
|
||||
|
||||
def normal_fea(self, lines):
|
||||
output = []
|
||||
|
@ -9,7 +9,6 @@ import unittest
|
||||
def lex(s):
|
||||
return [(typ, tok) for (typ, tok, _) in Lexer(s, "test.fea")]
|
||||
|
||||
|
||||
class LexerTest(unittest.TestCase):
|
||||
def __init__(self, methodName):
|
||||
unittest.TestCase.__init__(self, methodName)
|
||||
@ -55,6 +54,7 @@ class LexerTest(unittest.TestCase):
|
||||
])
|
||||
self.assertEqual(lex("include # Comment\n (foo) \n;"), [
|
||||
(Lexer.NAME, "include"),
|
||||
(Lexer.COMMENT, "# Comment"),
|
||||
(Lexer.FILENAME, "foo"),
|
||||
(Lexer.SYMBOL, ";")
|
||||
])
|
||||
@ -79,9 +79,13 @@ class LexerTest(unittest.TestCase):
|
||||
lex("foo - -2"),
|
||||
[(Lexer.NAME, "foo"), (Lexer.SYMBOL, "-"), (Lexer.NUMBER, -2)])
|
||||
|
||||
def test_comment(self):
|
||||
self.assertEqual(lex("# Comment\n#"), [])
|
||||
#def test_comment(self):
|
||||
# self.assertEqual(lex("# Comment\n#"), [])
|
||||
|
||||
def test_comment_kept(self):
|
||||
self.assertEqual(lex("# Comment\n#"),
|
||||
[(Lexer.COMMENT, "# Comment"), (Lexer.COMMENT, "#")])
|
||||
|
||||
def test_string(self):
|
||||
self.assertEqual(lex('"foo" "bar"'),
|
||||
[(Lexer.STRING, "foo"), (Lexer.STRING, "bar")])
|
||||
@ -112,7 +116,7 @@ class LexerTest(unittest.TestCase):
|
||||
def locs(s):
|
||||
return ["%s:%d:%d" % loc for (_, _, loc) in Lexer(s, "test.fea")]
|
||||
self.assertEqual(locs("a b # Comment\n12 @x"), [
|
||||
"test.fea:1:1", "test.fea:1:3", "test.fea:2:1",
|
||||
"test.fea:1:1", "test.fea:1:3", "test.fea:1:5", "test.fea:2:1",
|
||||
"test.fea:2:4"
|
||||
])
|
||||
|
||||
|
@ -55,6 +55,20 @@ class ParserTest(unittest.TestCase):
|
||||
if not hasattr(self, "assertRaisesRegex"):
|
||||
self.assertRaisesRegex = self.assertRaisesRegexp
|
||||
|
||||
def test_comments(self):
|
||||
doc = self.parse(
|
||||
""" # Initial
|
||||
feature test {
|
||||
sub A by B; # simple
|
||||
} test;""", comments=True)
|
||||
c1 = doc.statements[0]
|
||||
c2 = doc.statements[1].statements[1]
|
||||
self.assertEqual(type(c1), ast.Comment)
|
||||
self.assertEqual(c1.text, "# Initial")
|
||||
self.assertEqual(type(c2), ast.Comment)
|
||||
self.assertEqual(c2.text, "# simple")
|
||||
self.assertEqual(doc.statements[1].name, "test")
|
||||
|
||||
def test_anchor_format_a(self):
|
||||
doc = self.parse(
|
||||
"feature test {"
|
||||
@ -1424,9 +1438,12 @@ class ParserTest(unittest.TestCase):
|
||||
doc = self.parse(";;;")
|
||||
self.assertFalse(doc.statements)
|
||||
|
||||
def parse(self, text, glyphMap=GLYPHMAP):
|
||||
def parse(self, text, glyphMap=GLYPHMAP, comments=False):
|
||||
featurefile = UnicodeIO(text)
|
||||
return Parser(featurefile, glyphMap).parse()
|
||||
p = Parser(featurefile, glyphMap)
|
||||
if comments :
|
||||
p.ignore_comments = False
|
||||
return p.parse()
|
||||
|
||||
@staticmethod
|
||||
def getpath(testfile):
|
||||
|
Loading…
x
Reference in New Issue
Block a user