[voltLib] Add Parser and tests

2015-09-30 09:52:49 +01:00 · 2015-09-30 09:52:49 +01:00 · 1957fd2cd1
commit 1957fd2cd1
parent 34e7a8d904
2 changed files with 179 additions and 0 deletions
--- a/Lib/fontTools/voltLib/parser.py
+++ b/Lib/fontTools/voltLib/parser.py
@ -0,0 +1,108 @@
+from __future__ import print_function, division, absolute_import
+import fontTools.voltLib.ast as ast
+from fontTools.voltLib.lexer import Lexer
+from fontTools.voltLib.error import VoltLibError
+import codecs
+
+class Parser(object):
+    def __init__(self, path):
+        self.doc_ = ast.VoltFile()
+        self.next_token_type_, self.next_token_ = (None, None)
+        self.next_token_location_ = None
+        try:
+            with codecs.open(path, "rb", "utf-8") as f:
+                self.lexer_ = Lexer(f.read(), path)
+        except IOError as err:
+            raise VoltLibError(str(err), location)
+        self.advance_lexer_()
+
+    def parse(self):
+        statements = self.doc_.statements
+        while self.next_token_type_ is not None:
+            self.advance_lexer_()
+            if self.is_cur_keyword_("DEF_GLYPH"):
+                statements.append(self.parse_def_glyph_())
+            elif self.is_cur_keyword_("END"):
+                if self.next_token_type_ is not None:
+                    raise VoltLibError("Expected the end of the file",
+                                       self.cur_token_location_)
+                return self.doc_
+            else:
+                raise VoltLibError("Expected DEF_GLYPH",
+                                   self.cur_token_location_)
+        return self.doc_
+
+    def parse_def_glyph_(self):
+        assert self.is_cur_keyword_("DEF_GLYPH")
+        location = self.cur_token_location_
+        name = self.expect_string_()
+        self.expect_keyword_("ID")
+        gid = self.expect_number_()
+        if gid < 0:
+            raise VoltLibError("Invalid glyph ID", self.cur_token_location_)
+        gunicode = None
+        if self.next_token_ == "UNICODE":
+            self.expect_keyword_("UNICODE")
+            gunicode = [self.expect_number_()]
+            if gunicode[0] < 0:
+                raise VoltLibError("Invalid glyph UNICODE",
+                                   self.cur_token_location_)
+        elif self.next_token_ == "UNICODEVALUES":
+            self.expect_keyword_("UNICODEVALUES")
+            gunicode = self.parse_unicode_values()
+        # Apparently TYPE is optional
+        gtype = None
+        if self.next_token_ == "TYPE":
+            self.expect_keyword_("TYPE")
+            gtype = self.expect_name_()
+            assert gtype in ("BASE", "LIGATURE", "MARK")
+        components = None
+        if gtype == "LIGATURE":
+            self.expect_keyword_("COMPONENTS")
+            components = self.expect_number_()
+        self.expect_keyword_("END_GLYPH")
+        def_glyph = ast.GlyphDefinition(location, name, gid,
+                                        gunicode, gtype, components)
+        return def_glyph
+
+    def parse_unicode_values(self):
+        location = self.cur_token_location_
+        unicode_values = self.expect_string_().split(',')
+        return [int(uni[2:], 16) for uni in unicode_values]
+
+    def is_cur_keyword_(self, k):
+        return (self.cur_token_type_ is Lexer.NAME) and (self.cur_token_ == k)
+
+    def expect_string_(self):
+        self.advance_lexer_()
+        if self.cur_token_type_ is not Lexer.STRING:
+            raise VoltLibError("Expected a string", self.cur_token_location_)
+        return self.cur_token_
+
+    def expect_keyword_(self, keyword):
+        self.advance_lexer_()
+        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ == keyword:
+            return self.cur_token_
+        raise VoltLibError("Expected \"%s\"" % keyword,
+                              self.cur_token_location_)
+
+    def expect_name_(self):
+        self.advance_lexer_()
+        if self.cur_token_type_ is Lexer.NAME:
+            return self.cur_token_
+        raise VoltLibError("Expected a name", self.cur_token_location_)
+
+    def expect_number_(self):
+        self.advance_lexer_()
+        if self.cur_token_type_ is not Lexer.NUMBER:
+            raise VoltLibError("Expected a number", self.cur_token_location_)
+        return self.cur_token_
+
+    def advance_lexer_(self):
+        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
+            self.next_token_type_, self.next_token_, self.next_token_location_)
+        try:
+            (self.next_token_type_, self.next_token_,
+             self.next_token_location_) = self.lexer_.next()
+        except StopIteration:
+            self.next_token_type_, self.next_token_ = (None, None)
--- a/Lib/fontTools/voltLib/parser_test.py
+++ b/Lib/fontTools/voltLib/parser_test.py
@ -0,0 +1,71 @@
+from __future__ import print_function, division, absolute_import
+from __future__ import unicode_literals
+# from fontTools.voltLib.error import VoltLibError
+from fontTools.voltLib.parser import Parser
+import codecs
+import os
+import shutil
+import tempfile
+import unittest
+
+
+class ParserTest(unittest.TestCase):
+    def __init__(self, methodName):
+        unittest.TestCase.__init__(self, methodName)
+        # Python 3 renamed assertRaisesRegexp to assertRaisesRegex,
+        # and fires deprecation warnings if a program uses the old name.
+        if not hasattr(self, "assertRaisesRegex"):
+            self.assertRaisesRegex = self.assertRaisesRegexp
+
+    def test_def_glyph(self):
+        [def_glyph] = self.parse(
+            'DEF_GLYPH ".notdef" ID 0 TYPE BASE END_GLYPH'
+        ).statements
+        self.assertEqual((def_glyph.name, def_glyph.id, def_glyph.unicode,
+                          def_glyph.type, def_glyph.components),
+                         (".notdef", 0, None, "BASE", None))
+        [def_glyph] = self.parse(
+            'DEF_GLYPH "space" ID 3 UNICODE 32 TYPE BASE END_GLYPH'
+        ).statements
+        self.assertEqual((def_glyph.name, def_glyph.id, def_glyph.unicode,
+                          def_glyph.type, def_glyph.components),
+                         ("space", 3, [0x0020], "BASE", None))
+        [def_glyph] = self.parse(
+            'DEF_GLYPH "CR" ID 2 UNICODEVALUES "U+0009,U+000D" '
+            'TYPE BASE END_GLYPH'
+        ).statements
+        self.assertEqual((def_glyph.name, def_glyph.id, def_glyph.unicode,
+                          def_glyph.type, def_glyph.components),
+                         ("CR", 2, [0x0009, 0x000D], "BASE", None))
+        [def_glyph] = self.parse(
+            'DEF_GLYPH "f_f" ID 320 TYPE LIGATURE COMPONENTS 2 END_GLYPH'
+        ).statements
+        self.assertEqual((def_glyph.name, def_glyph.id, def_glyph.unicode,
+                          def_glyph.type, def_glyph.components),
+                         ("f_f", 320, None, "LIGATURE", 2))
+        [def_glyph] = self.parse(
+            'DEF_GLYPH "glyph20" ID 20 END_GLYPH'
+        ).statements
+        self.assertEqual((def_glyph.name, def_glyph.id, def_glyph.unicode,
+                          def_glyph.type, def_glyph.components),
+                         ("glyph20", 20, None, None, None))
+
+    def setUp(self):
+        self.tempdir = None
+        self.num_tempfiles = 0
+
+    def tearDown(self):
+        if self.tempdir:
+            shutil.rmtree(self.tempdir)
+
+    def parse(self, text):
+        if not self.tempdir:
+            self.tempdir = tempfile.mkdtemp()
+        self.num_tempfiles += 1
+        path = os.path.join(self.tempdir, "tmp%d.vtp" % self.num_tempfiles)
+        with codecs.open(path, "wb", "utf-8") as outfile:
+            outfile.write(text)
+        return Parser(path).parse()
+
+if __name__ == "__main__":
+    unittest.main()