[feaLib] Parse languagesystem

2015-08-01 12:35:22 +02:00 · 2015-08-01 12:35:22 +02:00 · 05fe077654
commit 05fe077654
parent f4ed6b5a85
4 changed files with 174 additions and 0 deletions
--- a/Lib/fontTools/feaLib/features.py
+++ b/Lib/fontTools/feaLib/features.py
@ -0,0 +1,17 @@
+from __future__ import print_function, division, absolute_import
+from __future__ import unicode_literals
+import os
+
+
+def write(buffer, text):
+    buffer.write(text.encode("utf-8"))
+
+
+class Features:
+    def __init__(self):
+        self.language_system = {}  # script --> {language}
+
+    def write(self, out, linesep=os.linesep):
+        for script in sorted(self.language_system.keys()):
+            for lang in sorted(self.language_system[script]):
+                write(out, "languagesystem %s %s;%s" % (script, lang, linesep))
--- a/Lib/fontTools/feaLib/parser.py
+++ b/Lib/fontTools/feaLib/parser.py
@ -0,0 +1,75 @@
+from __future__ import print_function, division, absolute_import
+from __future__ import unicode_literals
+from fontTools.feaLib.features import Features
+from fontTools.feaLib.lexer import Lexer, IncludingLexer
+
+
+class ParserError(Exception):
+    def __init__(self, message, location):
+        Exception.__init__(self, message)
+        self.location = location
+
+    def __str__(self):
+        message = Exception.__str__(self)
+        if self.location:
+            path, line, column = self.location
+            return "%s:%d:%d: %s" % (path, line, column, message)
+        else:
+            return message
+
+
+class Parser(object):
+    def __init__(self, path):
+        self.doc_ = Features()
+
+        self.next_token_type_, self.next_token_ = (None, None)
+        self.next_token_location_ = None
+        self.lexer_ = IncludingLexer(path)
+        self.advance_lexer_()
+
+    def parse(self):
+        while self.next_token_type_ is not None:
+            keyword = self.expect_keyword_({"feature", "languagesystem"})
+            if keyword == "languagesystem":
+                self.parse_languagesystem_()
+            elif keyword == "feature":
+                break  # TODO: Implement
+        return self.doc_
+
+    def parse_languagesystem_(self):
+        script, language = self.expect_tag_(), self.expect_tag_()
+        self.expect_symbol_(";")
+        langsys = self.doc_.language_system.setdefault(script, set())
+        langsys.add(language)
+
+    def expect_keyword_(self, keywords):
+        self.advance_lexer_()
+        if self.cur_token_type_ is Lexer.NAME and self.cur_token_ in keywords:
+            return self.cur_token_
+        s = ", ".join(sorted(list(keywords)))
+        raise ParserError("Expected one of %s" % s,
+                          self.cur_token_location_)
+
+    def expect_tag_(self):
+        self.advance_lexer_()
+        if self.cur_token_type_ is not Lexer.NAME:
+            raise ParserError("Expected a tag", self.cur_token_location_)
+        if len(self.cur_token_) > 4:
+            raise ParserError("Tags can not be longer than 4 characters",
+                              self.cur_token_location_)
+        return (self.cur_token_ + "    ")[:4]
+
+    def expect_symbol_(self, symbol):
+        self.advance_lexer_()
+        if self.cur_token_type_ is Lexer.SYMBOL and self.cur_token_ == symbol:
+            return symbol
+        raise ParserError("Expected '%s'" % symbol, self.cur_token_location_)
+
+    def advance_lexer_(self):
+        self.cur_token_type_, self.cur_token_, self.cur_token_location_ = (
+            self.next_token_type_, self.next_token_, self.next_token_location_)
+        try:
+            (self.next_token_type_, self.next_token_,
+             self.next_token_location_) = self.lexer_.next()
+        except StopIteration:
+            self.next_token_type_, self.next_token_ = (None, None)
--- a/Lib/fontTools/feaLib/parser_test.py
+++ b/Lib/fontTools/feaLib/parser_test.py
@ -0,0 +1,63 @@
+from __future__ import print_function, division, absolute_import
+from __future__ import unicode_literals
+from fontTools.feaLib.lexer import LexerError
+from fontTools.feaLib.parser import Parser, ParserError
+from fontTools.misc.py23 import *
+import codecs
+import os
+import shutil
+import sys
+import tempfile
+import unittest
+
+
+class ParserTest(unittest.TestCase):
+    def test_languagesystem(self):
+        langsys = self.parse("languagesystem latn DEU;").language_system
+        self.assertEqual(langsys, {"latn": {"DEU "}})
+        self.assertRaisesRegexp(
+            ParserError, "Expected ';'",
+            self.parse, "languagesystem latn DEU")
+        self.assertRaisesRegexp(
+            ParserError, "longer than 4 characters",
+            self.parse, "languagesystem foobar DEU")
+        self.assertRaisesRegexp(
+            ParserError, "longer than 4 characters",
+            self.parse, "languagesystem latn FOOBAR")
+
+    def test_roundtrip(self):
+        self.roundtrip("mini.fea")
+
+    def setUp(self):
+        self.tempdir = None
+        self.num_tempfiles = 0
+
+    def tearDown(self):
+        if self.tempdir:
+            shutil.rmtree(self.tempdir)
+
+    def parse(self, text):
+        if not self.tempdir:
+            self.tempdir = tempfile.mkdtemp()
+        self.num_tempfiles += 1
+        path = os.path.join(self.tempdir, "tmp%d.fea" % self.num_tempfiles)
+        with codecs.open(path, "wb", "utf-8") as outfile:
+            outfile.write(text)
+        return Parser(path).parse()
+
+    def roundtrip(self, testfile):
+        buffer1, buffer2 = StringIO(), StringIO()
+        Parser(ParserTest.getpath(testfile)).parse().write(buffer1)
+        text1 = buffer1.getvalue().decode("utf-8")
+        self.parse(text1).write(buffer2)
+        text2 = buffer2.getvalue().decode("utf-8")
+        self.assertEqual(text1, text2)
+
+    @staticmethod
+    def getpath(testfile):
+        path, _ = os.path.split(__file__)
+        return os.path.join(path, "testdata", testfile)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/Lib/fontTools/feaLib/testdata/mini.fea
+++ b/Lib/fontTools/feaLib/testdata/mini.fea
@ -0,0 +1,19 @@
+# Example file from OpenType Feature File specification, section 1.
+# http://www.adobe.com/devnet/opentype/afdko/topic_feature_file_syntax.html
+
+# Script and language coverage
+languagesystem DFLT dflt;
+languagesystem latn dflt;
+
+# Ligature formation
+feature liga {
+    substitute f i by f_i;
+    substitute f l by f_l;
+} liga;
+
+# Kerning
+feature kern {
+    position A Y -100;
+    position a y -80;
+    position s f' <0 0 10 0> t;
+} kern;