diff --git a/Lib/fontTools/feaLib/parser.py b/Lib/fontTools/feaLib/parser.py
index 23cabab40..8ea1a5ad2 100644
--- a/Lib/fontTools/feaLib/parser.py
+++ b/Lib/fontTools/feaLib/parser.py
@@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals
from fontTools.feaLib.error import FeatureLibError
from fontTools.feaLib.lexer import Lexer, IncludingLexer
+from fontTools.misc.encodingTools import getEncoding
from fontTools.misc.py23 import *
import fontTools.feaLib.ast as ast
import logging
@@ -863,14 +864,15 @@ class Parser(object):
langID = None
if self.next_token_type_ == Lexer.NUMBER:
platformID = self.expect_number_()
+ location = self.cur_token_location_
if platformID not in (1, 3):
- raise FeatureLibError("Expected platform id 1 or 3",
- self.cur_token_location_)
+ raise FeatureLibError("Expected platform id 1 or 3", location)
if self.next_token_type_ == Lexer.NUMBER:
platEncID = self.expect_number_()
langID = self.expect_number_()
else:
platformID = 3
+ location = self.cur_token_location_
if platformID == 1: # Macintosh
platEncID = platEncID or 0 # Roman
@@ -882,12 +884,11 @@ class Parser(object):
string = self.expect_string_()
self.expect_symbol_(";")
- if platformID == 1 and platEncID == 0:
- string = self.unescape_mac_name_string(string)
- elif platformID == 3 and platEncID == 1:
- string = self.unescape_windows_name_string(string)
-
- return platformID, platEncID, langID, string
+ encoding = getEncoding(platformID, platEncID, langID)
+ if encoding is None:
+ raise FeatureLibError("Unsupported encoding", location)
+ unescaped = self.unescape_string_(string, encoding)
+ return platformID, platEncID, langID, unescaped
def parse_nameid_(self):
assert self.cur_token_ == "nameid", self.cur_token_
@@ -905,21 +906,27 @@ class Parser(object):
return self.ast.NameRecord(location, nameID, platformID, platEncID,
langID, string)
- def unescape_mac_name_string(self, string):
- def unescape(match):
- n = match.group(0)[1:]
- c = bytechr(int(n, 16)).decode('mac_roman')
- return c
+ def unescape_string_(self, string, encoding):
+ if encoding == "utf_16_be":
+ s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string)
+ else:
+ unescape = lambda m: self.unescape_byte_(m, encoding)
+ s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string)
+ # We now have a Unicode string, but it might contain surrogate pairs.
+ # We convert surrogates to actual Unicode by round-tripping through
+ # Python's UTF-16 codec in a special mode.
+ utf16 = tobytes(s, "utf_16_be", "surrogatepass")
+ return tounicode(utf16, "utf_16_be")
- return re.sub(r'\\[0-9a-fA-F]{2}', unescape, string)
+ @staticmethod
+ def unescape_unichr_(match):
+ n = match.group(0)[1:]
+ return unichr(int(n, 16))
- def unescape_windows_name_string(self, string):
- def unescape(match):
- n = match.group(0)[1:]
- c = unichr(int(n, 16))
- return c
-
- return re.sub(r'\\[0-9a-fA-F]{4}', unescape, string)
+ @staticmethod
+ def unescape_byte_(match, encoding):
+ n = match.group(0)[1:]
+ return bytechr(int(n, 16)).decode(encoding)
def parse_table_BASE_(self, table):
statements = table.statements
diff --git a/NEWS.rst b/NEWS.rst
index e686b1d34..191e9ef40 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -1,4 +1,7 @@
- [feaLib] include statements now resolve relative paths like makeotf (#838)
+- [feaLib] `table name` now handles Unicode codepoints beyond the Basic
+ Multilingual Plane, also supports old-style MacOS platform encodings (#842)
+- [feaLib] correctly escape string literals when emitting feature syntax (#780)
3.7.0 (released 2017-02-11)
---------------------------
diff --git a/Tests/feaLib/data/spec8b.fea b/Tests/feaLib/data/spec8b.fea
index af7925691..538ac39e2 100644
--- a/Tests/feaLib/data/spec8b.fea
+++ b/Tests/feaLib/data/spec8b.fea
@@ -4,5 +4,9 @@ feature size {
# 139 - range end (inclusive, decipoints)
sizemenuname "Win MinionPro Size Name";
sizemenuname 1 "Mac MinionPro Size Name";
- sizemenuname 1 21 0 "Mac MinionPro Size Name";
+ # The specification says: sizemenuname 1 21 0 "Mac MinionPro Size Name";
+ # which means Macintosh platform, MacOS Thai encoding, English language.
+ # Since fonttools currently does not support the MacOS Thai encoding,
+ # we use instead MacOS Roman encoding (0), Swedish language (5) for our test.
+ sizemenuname 1 0 5 "Mac MinionPro Size Name";
} size;
diff --git a/Tests/feaLib/data/spec8b.ttx b/Tests/feaLib/data/spec8b.ttx
index 836d60eab..6e66c16b1 100644
--- a/Tests/feaLib/data/spec8b.ttx
+++ b/Tests/feaLib/data/spec8b.ttx
@@ -8,7 +8,7 @@
Mac MinionPro Size Name
-
+
Mac MinionPro Size Name
diff --git a/Tests/feaLib/parser_test.py b/Tests/feaLib/parser_test.py
index c610d37ef..f5343c9e0 100644
--- a/Tests/feaLib/parser_test.py
+++ b/Tests/feaLib/parser_test.py
@@ -928,12 +928,10 @@ class ParserTest(unittest.TestCase):
self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";')
def test_nameid_windows_utf16_surroates(self):
- pass
- # TODO: https://github.com/fonttools/fonttools/issues/842
- # doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
- # name = doc.statements[0].statements[0]
- # self.assertEquals(name.string, r"Carrot 🥕")
- # self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
+ doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
+ name = doc.statements[0].statements[0]
+ self.assertEquals(name.string, r"Carrot 🥕")
+ self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
def test_nameid_mac_roman(self):
doc = self.parse(
@@ -956,9 +954,8 @@ class ParserTest(unittest.TestCase):
self.assertEquals(name.platformID, 1)
self.assertEquals(name.platEncID, 0)
self.assertEquals(name.langID, 18)
- # TODO: https://github.com/fonttools/fonttools/issues/842
- # self.assertEquals(name.string, "Jovica Veljović")
- # self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
+ self.assertEquals(name.string, "Jovica Veljović")
+ self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
def test_nameid_unsupported_platform(self):
self.assertRaisesRegex(