[feaLib] Make nameid parsing more robust
We now correctly handle nameid statements with surrogate pairs and old-style macOS-encoded names (provided that fonttools supports the specified encoding). Resolves https://github.com/fonttools/fonttools/issues/842.
This commit is contained in:
parent
b22df7ff48
commit
eac7ef89c0
@ -2,6 +2,7 @@ from __future__ import print_function, division, absolute_import
|
||||
from __future__ import unicode_literals
|
||||
from fontTools.feaLib.error import FeatureLibError
|
||||
from fontTools.feaLib.lexer import Lexer, IncludingLexer
|
||||
from fontTools.misc.encodingTools import getEncoding
|
||||
from fontTools.misc.py23 import *
|
||||
import fontTools.feaLib.ast as ast
|
||||
import logging
|
||||
@ -863,14 +864,15 @@ class Parser(object):
|
||||
langID = None
|
||||
if self.next_token_type_ == Lexer.NUMBER:
|
||||
platformID = self.expect_number_()
|
||||
location = self.cur_token_location_
|
||||
if platformID not in (1, 3):
|
||||
raise FeatureLibError("Expected platform id 1 or 3",
|
||||
self.cur_token_location_)
|
||||
raise FeatureLibError("Expected platform id 1 or 3", location)
|
||||
if self.next_token_type_ == Lexer.NUMBER:
|
||||
platEncID = self.expect_number_()
|
||||
langID = self.expect_number_()
|
||||
else:
|
||||
platformID = 3
|
||||
location = self.cur_token_location_
|
||||
|
||||
if platformID == 1: # Macintosh
|
||||
platEncID = platEncID or 0 # Roman
|
||||
@ -882,12 +884,11 @@ class Parser(object):
|
||||
string = self.expect_string_()
|
||||
self.expect_symbol_(";")
|
||||
|
||||
if platformID == 1 and platEncID == 0:
|
||||
string = self.unescape_mac_name_string(string)
|
||||
elif platformID == 3 and platEncID == 1:
|
||||
string = self.unescape_windows_name_string(string)
|
||||
|
||||
return platformID, platEncID, langID, string
|
||||
encoding = getEncoding(platformID, platEncID, langID)
|
||||
if encoding is None:
|
||||
raise FeatureLibError("Unsupported encoding", location)
|
||||
unescaped = self.unescape_string_(string, encoding)
|
||||
return platformID, platEncID, langID, unescaped
|
||||
|
||||
def parse_nameid_(self):
|
||||
assert self.cur_token_ == "nameid", self.cur_token_
|
||||
@ -905,21 +906,27 @@ class Parser(object):
|
||||
return self.ast.NameRecord(location, nameID, platformID, platEncID,
|
||||
langID, string)
|
||||
|
||||
def unescape_mac_name_string(self, string):
|
||||
def unescape(match):
|
||||
n = match.group(0)[1:]
|
||||
c = bytechr(int(n, 16)).decode('mac_roman')
|
||||
return c
|
||||
def unescape_string_(self, string, encoding):
|
||||
if encoding == "utf_16_be":
|
||||
s = re.sub(r"\\[0-9a-fA-F]{4}", self.unescape_unichr_, string)
|
||||
else:
|
||||
unescape = lambda m: self.unescape_byte_(m, encoding)
|
||||
s = re.sub(r"\\[0-9a-fA-F]{2}", unescape, string)
|
||||
# We now have a Unicode string, but it might contain surrogate pairs.
|
||||
# We convert surrogates to actual Unicode by round-tripping through
|
||||
# Python's UTF-16 codec in a special mode.
|
||||
utf16 = tobytes(s, "utf_16_be", "surrogatepass")
|
||||
return tounicode(utf16, "utf_16_be")
|
||||
|
||||
return re.sub(r'\\[0-9a-fA-F]{2}', unescape, string)
|
||||
@staticmethod
|
||||
def unescape_unichr_(match):
|
||||
n = match.group(0)[1:]
|
||||
return unichr(int(n, 16))
|
||||
|
||||
def unescape_windows_name_string(self, string):
|
||||
def unescape(match):
|
||||
n = match.group(0)[1:]
|
||||
c = unichr(int(n, 16))
|
||||
return c
|
||||
|
||||
return re.sub(r'\\[0-9a-fA-F]{4}', unescape, string)
|
||||
@staticmethod
|
||||
def unescape_byte_(match, encoding):
|
||||
n = match.group(0)[1:]
|
||||
return bytechr(int(n, 16)).decode(encoding)
|
||||
|
||||
def parse_table_BASE_(self, table):
|
||||
statements = table.statements
|
||||
|
3
NEWS.rst
3
NEWS.rst
@ -1,4 +1,7 @@
|
||||
- [feaLib] include statements now resolve relative paths like makeotf (#838)
|
||||
- [feaLib] `table name` now handles Unicode codepoints beyond the Basic
|
||||
Multilingual Plane, also supports old-style MacOS platform encodings (#842)
|
||||
- [feaLib] correctly escape string literals when emitting feature syntax (#780)
|
||||
|
||||
3.7.0 (released 2017-02-11)
|
||||
---------------------------
|
||||
|
@ -4,5 +4,9 @@ feature size {
|
||||
# 139 - range end (inclusive, decipoints)
|
||||
sizemenuname "Win MinionPro Size Name";
|
||||
sizemenuname 1 "Mac MinionPro Size Name";
|
||||
sizemenuname 1 21 0 "Mac MinionPro Size Name";
|
||||
# The specification says: sizemenuname 1 21 0 "Mac MinionPro Size Name";
|
||||
# which means Macintosh platform, MacOS Thai encoding, English language.
|
||||
# Since fonttools currently does not support the MacOS Thai encoding,
|
||||
# we use instead MacOS Roman encoding (0), Swedish language (5) for our test.
|
||||
sizemenuname 1 0 5 "Mac MinionPro Size Name";
|
||||
} size;
|
||||
|
@ -8,7 +8,7 @@
|
||||
<namerecord nameID="256" platformID="1" platEncID="0" langID="0x0" unicode="True">
|
||||
Mac MinionPro Size Name
|
||||
</namerecord>
|
||||
<namerecord nameID="256" platformID="1" platEncID="21" langID="0x0" unicode="True">
|
||||
<namerecord nameID="256" platformID="1" platEncID="0" langID="0x5" unicode="True">
|
||||
Mac MinionPro Size Name
|
||||
</namerecord>
|
||||
</name>
|
||||
|
@ -928,12 +928,10 @@ class ParserTest(unittest.TestCase):
|
||||
self.assertEquals(name.asFea(), r'nameid 9 "Quotation \0022Mark\0022";')
|
||||
|
||||
def test_nameid_windows_utf16_surroates(self):
|
||||
pass
|
||||
# TODO: https://github.com/fonttools/fonttools/issues/842
|
||||
# doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
|
||||
# name = doc.statements[0].statements[0]
|
||||
# self.assertEquals(name.string, r"Carrot 🥕")
|
||||
# self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
|
||||
doc = self.parse(r'table name { nameid 9 "Carrot \D83E\DD55"; } name;')
|
||||
name = doc.statements[0].statements[0]
|
||||
self.assertEquals(name.string, r"Carrot 🥕")
|
||||
self.assertEquals(name.asFea(), r'nameid 9 "Carrot \d83e\dd55";')
|
||||
|
||||
def test_nameid_mac_roman(self):
|
||||
doc = self.parse(
|
||||
@ -956,9 +954,8 @@ class ParserTest(unittest.TestCase):
|
||||
self.assertEquals(name.platformID, 1)
|
||||
self.assertEquals(name.platEncID, 0)
|
||||
self.assertEquals(name.langID, 18)
|
||||
# TODO: https://github.com/fonttools/fonttools/issues/842
|
||||
# self.assertEquals(name.string, "Jovica Veljović")
|
||||
# self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
|
||||
self.assertEquals(name.string, "Jovica Veljović")
|
||||
self.assertEquals(name.asFea(), r'nameid 9 1 0 18 "Jovica Veljovi\e6";')
|
||||
|
||||
def test_nameid_unsupported_platform(self):
|
||||
self.assertRaisesRegex(
|
||||
|
Loading…
x
Reference in New Issue
Block a user