[feaLib] Make nameid parsing more robust

We now correctly handle nameid statements with surrogate pairs and
old-style macOS-encoded names (provided that fonttools supports the
specified encoding).

Resolves https://github.com/fonttools/fonttools/issues/842.
This commit is contained in:
Sascha Brawer 2017-02-14 12:39:41 +01:00
parent b22df7ff48
commit eac7ef89c0
5 changed files with 43 additions and 32 deletions

View File

@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import
from __future__ import unicode_literals from __future__ import unicode_literals
from fontTools.feaLib.error import FeatureLibError from fontTools.feaLib.error import FeatureLibError
from fontTools.feaLib.lexer import Lexer, IncludingLexer from fontTools.feaLib.lexer import Lexer, IncludingLexer
from fontTools.misc.encodingTools import getEncoding
from fontTools.misc.py23 import * from fontTools.misc.py23 import *
import fontTools.feaLib.ast as ast import fontTools.feaLib.ast as ast
import logging import logging
@ -863,14 +864,15 @@ class Parser(object):
langID = None langID = None
if self.next_token_type_ == Lexer.NUMBER: if self.next_token_type_ == Lexer.NUMBER:
platformID = self.expect_number_() platformID = self.expect_number_()
location = self.cur_token_location_
if platformID not in (1, 3): if platformID not in (1, 3):
raise FeatureLibError("Expected platform id 1 or 3", raise FeatureLibError("Expected platform id 1 or 3", location)
self.cur_token_location_)
if self.next_token_type_ == Lexer.NUMBER: if self.next_token_type_ == Lexer.NUMBER:
platEncID = self.expect_number_() platEncID = self.expect_number_()
langID = self.expect_number_() langID = self.expect_number_()
else: else:
platformID = 3 platformID = 3
location = self.cur_token_location_
if platformID == 1: # Macintosh if platformID == 1: # Macintosh
platEncID = platEncID or 0 # Roman platEncID = platEncID or 0 # Roman
@ -882,12 +884,11 @@ class Parser(object):
string = self.expect_string_() string = self.expect_string_()
self.expect_symbol_(";") self.expect_symbol_(";")
if platformID == 1 and platEncID == 0: encoding = getEncoding(platformID, platEncID, langID)
string = self.unescape_mac_name_string(string) if encoding is None:
elif platformID == 3 and platEncID == 1: raise FeatureLibError("Unsupported encoding", location)
string = self.unescape_windows_name_string(string) unescaped = self.unescape_string_(string, encoding)
return platformID, platEncID, langID, unescaped
return platformID, platEncID, langID, string
def parse_nameid_(self): def parse_nameid_(self):
assert self.cur_token_ == "nameid", self.cur_token_ assert self.cur_token_ == "nameid", self.cur_token_
@ -905,21 +906,27 @@ class Parser(object):
return self.ast.NameRecord(location, nameID, platformID, platEncID, return self.ast.NameRecord(location, nameID, platformID, platEncID,
langID, string) langID, string)
def unescape_mac_name_string(self, string): def unescape_string_(self, string, encoding):
def unescape(match): if encoding == "utf_16_be":
s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string)
else:
unescape = lambda m: self.unescape_byte_(m, encoding)
s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string)
# We now have a Unicode string, but it might contain surrogate pairs.
# We convert surrogates to actual Unicode by round-tripping through
# Python's UTF-16 codec in a special mode.
utf16 = tobytes(s, "utf_16_be", "surrogatepass")
return tounicode(utf16, "utf_16_be")
@staticmethod
def unescape_unichr_(match):
n = match.group(0)[1:] n = match.group(0)[1:]
c = bytechr(int(n, 16)).decode('mac_roman') return unichr(int(n, 16))
return c
return re.sub(r'\\[0-9a-fA-F]{2}', unescape, string) @staticmethod
def unescape_byte_(match, encoding):
def unescape_windows_name_string(self, string):
def unescape(match):
n = match.group(0)[1:] n = match.group(0)[1:]
c = unichr(int(n, 16)) return bytechr(int(n, 16)).decode(encoding)
return c
return re.sub(r'\\[0-9a-fA-F]{4}', unescape, string)
def parse_table_BASE_(self, table): def parse_table_BASE_(self, table):
statements = table.statements statements = table.statements

View File

@ -1,4 +1,7 @@
- [feaLib] include statements now resolve relative paths like makeotf (#838) - [feaLib] include statements now resolve relative paths like makeotf (#838)
- [feaLib] `table name` now handles Unicode codepoints beyond the Basic
Multilingual Plane, also supports old-style MacOS platform encodings (#842)
- [feaLib] correctly escape string literals when emitting feature syntax (#780)
3.7.0 (released 2017-02-11) 3.7.0 (released 2017-02-11)
--------------------------- ---------------------------

View File

@ -4,5 +4,9 @@ feature size {
# 139 - range end (inclusive, decipoints) # 139 - range end (inclusive, decipoints)
sizemenuname "Win MinionPro Size Name"; sizemenuname "Win MinionPro Size Name";
sizemenuname 1 "Mac MinionPro Size Name"; sizemenuname 1 "Mac MinionPro Size Name";
sizemenuname 1 21 0 "Mac MinionPro Size Name"; # The specification says: sizemenuname 1 21 0 "Mac MinionPro Size Name";
# which means Macintosh platform, MacOS Thai encoding, English language.
# Since fonttools currently does not support the MacOS Thai encoding,
# we use instead MacOS Roman encoding (0), Swedish language (5) for our test.
sizemenuname 1 0 5 "Mac MinionPro Size Name";
} size; } size;

View File

@ -8,7 +8,7 @@
<namerecord nameID="256" platformID="1" platEncID="0" langID="0x0" unicode="True"> <namerecord nameID="256" platformID="1" platEncID="0" langID="0x0" unicode="True">
Mac MinionPro Size Name Mac MinionPro Size Name
</namerecord> </namerecord>
<namerecord nameID="256" platformID="1" platEncID="21" langID="0x0" unicode="True"> <namerecord nameID="256" platformID="1" platEncID="0" langID="0x5" unicode="True">
Mac MinionPro Size Name Mac MinionPro Size Name
</namerecord> </namerecord>
</name> </name>

View File

@ -928,12 +928,10 @@ class ParserTest(unittest.TestCase):
self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";') self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";')
def test_nameid_windows_utf16_surroates(self): def test_nameid_windows_utf16_surroates(self):
pass doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
# TODO: https://github.com/fonttools/fonttools/issues/842 name = doc.statements[0].statements[0]
# doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;') self.assertEquals(name.string, r"Carrot 🥕")
# name = doc.statements[0].statements[0] self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
# self.assertEquals(name.string, r"Carrot 🥕")
# self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
def test_nameid_mac_roman(self): def test_nameid_mac_roman(self):
doc = self.parse( doc = self.parse(
@ -956,9 +954,8 @@ class ParserTest(unittest.TestCase):
self.assertEquals(name.platformID, 1) self.assertEquals(name.platformID, 1)
self.assertEquals(name.platEncID, 0) self.assertEquals(name.platEncID, 0)
self.assertEquals(name.langID, 18) self.assertEquals(name.langID, 18)
# TODO: https://github.com/fonttools/fonttools/issues/842 self.assertEquals(name.string, "Jovica Veljović")
# self.assertEquals(name.string, "Jovica Veljović") self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
# self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
def test_nameid_unsupported_platform(self): def test_nameid_unsupported_platform(self):
self.assertRaisesRegex( self.assertRaisesRegex(