diff --git a/Lib/fontTools/feaLib/parser.py b/Lib/fontTools/feaLib/parser.py index 23cabab40..8ea1a5ad2 100644 --- a/Lib/fontTools/feaLib/parser.py +++ b/Lib/fontTools/feaLib/parser.py @@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import from __future__ import unicode_literals from fontTools.feaLib.error import FeatureLibError from fontTools.feaLib.lexer import Lexer, IncludingLexer +from fontTools.misc.encodingTools import getEncoding from fontTools.misc.py23 import * import fontTools.feaLib.ast as ast import logging @@ -863,14 +864,15 @@ class Parser(object): langID = None if self.next_token_type_ == Lexer.NUMBER: platformID = self.expect_number_() + location = self.cur_token_location_ if platformID not in (1, 3): - raise FeatureLibError("Expected platform id 1 or 3", - self.cur_token_location_) + raise FeatureLibError("Expected platform id 1 or 3", location) if self.next_token_type_ == Lexer.NUMBER: platEncID = self.expect_number_() langID = self.expect_number_() else: platformID = 3 + location = self.cur_token_location_ if platformID == 1: # Macintosh platEncID = platEncID or 0 # Roman @@ -882,12 +884,11 @@ class Parser(object): string = self.expect_string_() self.expect_symbol_(";") - if platformID == 1 and platEncID == 0: - string = self.unescape_mac_name_string(string) - elif platformID == 3 and platEncID == 1: - string = self.unescape_windows_name_string(string) - - return platformID, platEncID, langID, string + encoding = getEncoding(platformID, platEncID, langID) + if encoding is None: + raise FeatureLibError("Unsupported encoding", location) + unescaped = self.unescape_string_(string, encoding) + return platformID, platEncID, langID, unescaped def parse_nameid_(self): assert self.cur_token_ == "nameid", self.cur_token_ @@ -905,21 +906,27 @@ class Parser(object): return self.ast.NameRecord(location, nameID, platformID, platEncID, langID, string) - def unescape_mac_name_string(self, string): - def unescape(match): - n = match.group(0)[1:] - c = bytechr(int(n, 16)).decode('mac_roman') - return c + def unescape_string_(self, string, encoding): + if encoding == "utf_16_be": + s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string) + else: + unescape = lambda m: self.unescape_byte_(m, encoding) + s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string) + # We now have a Unicode string, but it might contain surrogate pairs. + # We convert surrogates to actual Unicode by round-tripping through + # Python's UTF-16 codec in a special mode. + utf16 = tobytes(s, "utf_16_be", "surrogatepass") + return tounicode(utf16, "utf_16_be") - return re.sub(r'\\[0-9a-fA-F]{2}', unescape, string) + @staticmethod + def unescape_unichr_(match): + n = match.group(0)[1:] + return unichr(int(n, 16)) - def unescape_windows_name_string(self, string): - def unescape(match): - n = match.group(0)[1:] - c = unichr(int(n, 16)) - return c - - return re.sub(r'\\[0-9a-fA-F]{4}', unescape, string) + @staticmethod + def unescape_byte_(match, encoding): + n = match.group(0)[1:] + return bytechr(int(n, 16)).decode(encoding) def parse_table_BASE_(self, table): statements = table.statements diff --git a/NEWS.rst b/NEWS.rst index e686b1d34..191e9ef40 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -1,4 +1,7 @@ - [feaLib] include statements now resolve relative paths like makeotf (#838) +- [feaLib] `table name` now handles Unicode codepoints beyond the Basic + Multilingual Plane, also supports old-style MacOS platform encodings (#842) +- [feaLib] correctly escape string literals when emitting feature syntax (#780) 3.7.0 (released 2017-02-11) --------------------------- diff --git a/Tests/feaLib/data/spec8b.fea b/Tests/feaLib/data/spec8b.fea index af7925691..538ac39e2 100644 --- a/Tests/feaLib/data/spec8b.fea +++ b/Tests/feaLib/data/spec8b.fea @@ -4,5 +4,9 @@ feature size { # 139 - range end (inclusive, decipoints) sizemenuname "Win MinionPro Size Name"; sizemenuname 1 "Mac MinionPro Size Name"; - sizemenuname 1 21 0 "Mac MinionPro Size Name"; + # The specification says: sizemenuname 1 21 0 "Mac MinionPro Size Name"; + # which means Macintosh platform, MacOS Thai encoding, English language. + # Since fonttools currently does not support the MacOS Thai encoding, + # we use instead MacOS Roman encoding (0), Swedish language (5) for our test. + sizemenuname 1 0 5 "Mac MinionPro Size Name"; } size; diff --git a/Tests/feaLib/data/spec8b.ttx b/Tests/feaLib/data/spec8b.ttx index 836d60eab..6e66c16b1 100644 --- a/Tests/feaLib/data/spec8b.ttx +++ b/Tests/feaLib/data/spec8b.ttx @@ -8,7 +8,7 @@ Mac MinionPro Size Name - + Mac MinionPro Size Name diff --git a/Tests/feaLib/parser_test.py b/Tests/feaLib/parser_test.py index c610d37ef..f5343c9e0 100644 --- a/Tests/feaLib/parser_test.py +++ b/Tests/feaLib/parser_test.py @@ -928,12 +928,10 @@ class ParserTest(unittest.TestCase): self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";') def test_nameid_windows_utf16_surroates(self): - pass - # TODO: https://github.com/fonttools/fonttools/issues/842 - # doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;') - # name = doc.statements[0].statements[0] - # self.assertEquals(name.string, r"Carrot 🥕") - # self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";') + doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;') + name = doc.statements[0].statements[0] + self.assertEquals(name.string, r"Carrot 🥕") + self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";') def test_nameid_mac_roman(self): doc = self.parse( @@ -956,9 +954,8 @@ class ParserTest(unittest.TestCase): self.assertEquals(name.platformID, 1) self.assertEquals(name.platEncID, 0) self.assertEquals(name.langID, 18) - # TODO: https://github.com/fonttools/fonttools/issues/842 - # self.assertEquals(name.string, "Jovica Veljović") - # self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";') + self.assertEquals(name.string, "Jovica Veljović") + self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";') def test_nameid_unsupported_platform(self): self.assertRaisesRegex(