2014-01-14 15:07:50 +08:00
|
|
|
from __future__ import print_function, division, absolute_import
|
2013-11-27 17:27:45 -05:00
|
|
|
from fontTools.misc.py23 import *
|
2013-09-17 16:59:39 -04:00
|
|
|
from fontTools.misc import sstruct
|
1999-12-16 21:34:53 +00:00
|
|
|
from fontTools.misc.textTools import safeEval
|
2015-04-19 03:36:20 -07:00
|
|
|
from fontTools.misc.encodingTools import getEncoding
|
2013-11-27 17:27:45 -05:00
|
|
|
from . import DefaultTable
|
|
|
|
import struct
|
1999-12-16 21:34:53 +00:00
|
|
|
|
|
|
|
nameRecordFormat = """
|
|
|
|
> # big endian
|
|
|
|
platformID: H
|
|
|
|
platEncID: H
|
|
|
|
langID: H
|
|
|
|
nameID: H
|
|
|
|
length: H
|
|
|
|
offset: H
|
|
|
|
"""
|
|
|
|
|
2003-01-25 11:14:59 +00:00
|
|
|
nameRecordSize = sstruct.calcsize(nameRecordFormat)
|
|
|
|
|
|
|
|
|
1999-12-16 21:34:53 +00:00
|
|
|
class table__n_a_m_e(DefaultTable.DefaultTable):
|
2015-04-26 02:01:01 -04:00
|
|
|
|
1999-12-16 21:34:53 +00:00
|
|
|
def decompile(self, data, ttFont):
|
2003-01-25 11:15:42 +00:00
|
|
|
format, n, stringOffset = struct.unpack(">HHH", data[:6])
|
2003-01-25 11:14:59 +00:00
|
|
|
expectedStringOffset = 6 + n * nameRecordSize
|
2003-01-25 11:15:42 +00:00
|
|
|
if stringOffset != expectedStringOffset:
|
2003-01-25 11:14:59 +00:00
|
|
|
# XXX we need a warn function
|
2013-11-27 04:57:33 -05:00
|
|
|
print("Warning: 'name' table stringOffset incorrect. Expected: %s; Actual: %s" % (expectedStringOffset, stringOffset))
|
2003-01-25 11:15:42 +00:00
|
|
|
stringData = data[stringOffset:]
|
2003-01-25 18:20:22 +00:00
|
|
|
data = data[6:]
|
1999-12-16 21:34:53 +00:00
|
|
|
self.names = []
|
|
|
|
for i in range(n):
|
2004-09-25 09:12:00 +00:00
|
|
|
if len(data) < 12:
|
|
|
|
# compensate for buggy font
|
|
|
|
break
|
1999-12-16 21:34:53 +00:00
|
|
|
name, data = sstruct.unpack2(nameRecordFormat, data, NameRecord())
|
|
|
|
name.string = stringData[name.offset:name.offset+name.length]
|
2000-02-01 15:32:17 +00:00
|
|
|
assert len(name.string) == name.length
|
|
|
|
#if (name.platEncID, name.platformID) in ((0, 0), (1, 3)):
|
|
|
|
# if len(name.string) % 2:
|
|
|
|
# print "2-byte string doesn't have even length!"
|
|
|
|
# print name.__dict__
|
1999-12-16 21:34:53 +00:00
|
|
|
del name.offset, name.length
|
|
|
|
self.names.append(name)
|
2015-04-26 02:01:01 -04:00
|
|
|
|
1999-12-16 21:34:53 +00:00
|
|
|
def compile(self, ttFont):
|
2002-09-12 19:07:39 +00:00
|
|
|
if not hasattr(self, "names"):
|
|
|
|
# only happens when there are NO name table entries read
|
|
|
|
# from the TTX file
|
|
|
|
self.names = []
|
2015-04-07 17:56:51 -07:00
|
|
|
names = self.names
|
|
|
|
names.sort() # sort according to the spec; see NameRecord.__lt__()
|
2013-11-27 21:09:03 -05:00
|
|
|
stringData = b""
|
1999-12-16 21:34:53 +00:00
|
|
|
format = 0
|
2015-04-07 17:56:51 -07:00
|
|
|
n = len(names)
|
2003-01-25 11:15:42 +00:00
|
|
|
stringOffset = 6 + n * sstruct.calcsize(nameRecordFormat)
|
|
|
|
data = struct.pack(">HHH", format, n, stringOffset)
|
1999-12-16 21:34:53 +00:00
|
|
|
lastoffset = 0
|
|
|
|
done = {} # remember the data so we can reuse the "pointers"
|
2015-04-07 17:56:51 -07:00
|
|
|
for name in names:
|
2015-04-07 17:58:11 -07:00
|
|
|
string = name.toBytes()
|
|
|
|
if string in done:
|
|
|
|
name.offset, name.length = done[string]
|
1999-12-16 21:34:53 +00:00
|
|
|
else:
|
2015-04-07 17:58:11 -07:00
|
|
|
name.offset, name.length = done[string] = len(stringData), len(string)
|
|
|
|
stringData = bytesjoin([stringData, string])
|
1999-12-16 21:34:53 +00:00
|
|
|
data = data + sstruct.pack(nameRecordFormat, name)
|
|
|
|
return data + stringData
|
2015-04-26 02:01:01 -04:00
|
|
|
|
1999-12-16 21:34:53 +00:00
|
|
|
def toXML(self, writer, ttFont):
|
|
|
|
for name in self.names:
|
|
|
|
name.toXML(writer, ttFont)
|
2015-04-26 02:01:01 -04:00
|
|
|
|
2013-11-27 03:19:32 -05:00
|
|
|
def fromXML(self, name, attrs, content, ttFont):
|
2013-11-27 02:40:30 -05:00
|
|
|
if name != "namerecord":
|
1999-12-16 21:34:53 +00:00
|
|
|
return # ignore unknown tags
|
|
|
|
if not hasattr(self, "names"):
|
|
|
|
self.names = []
|
|
|
|
name = NameRecord()
|
|
|
|
self.names.append(name)
|
2013-11-27 03:19:32 -05:00
|
|
|
name.fromXML(name, attrs, content, ttFont)
|
2015-04-26 02:01:01 -04:00
|
|
|
|
2000-03-14 23:03:53 +00:00
|
|
|
def getName(self, nameID, platformID, platEncID, langID=None):
|
1999-12-16 21:34:53 +00:00
|
|
|
for namerecord in self.names:
|
2015-04-26 02:01:01 -04:00
|
|
|
if ( namerecord.nameID == nameID and
|
|
|
|
namerecord.platformID == platformID and
|
1999-12-16 21:34:53 +00:00
|
|
|
namerecord.platEncID == platEncID):
|
|
|
|
if langID is None or namerecord.langID == langID:
|
|
|
|
return namerecord
|
|
|
|
return None # not found
|
|
|
|
|
2015-06-24 07:55:52 +02:00
|
|
|
def getDebugName(self, nameID):
|
|
|
|
englishName = someName = None
|
|
|
|
for name in self.names:
|
2015-06-24 15:30:06 -07:00
|
|
|
if name.nameID != nameID:
|
|
|
|
continue
|
2015-06-24 07:55:52 +02:00
|
|
|
try:
|
|
|
|
unistr = name.toUnicode()
|
|
|
|
except UnicodeDecodeError:
|
2015-06-24 15:30:06 -07:00
|
|
|
continue
|
|
|
|
|
|
|
|
someName = unistr
|
|
|
|
if (name.platformID, name.langID) in ((1, 0), (3, 0x409)):
|
|
|
|
englishName = unistr
|
|
|
|
break
|
2015-06-24 07:55:52 +02:00
|
|
|
if englishName:
|
|
|
|
return englishName
|
|
|
|
elif someName:
|
|
|
|
return someName
|
|
|
|
else:
|
|
|
|
return None
|
2013-12-07 03:35:16 -05:00
|
|
|
|
2015-10-01 10:11:48 +01:00
|
|
|
def setName(self, string, nameID, platformID, platEncID, langID):
|
|
|
|
if not hasattr(self, 'names'):
|
|
|
|
self.names = []
|
|
|
|
namerecord = self.getName(nameID, platformID, platEncID, langID)
|
2016-01-15 22:25:49 +01:00
|
|
|
if namerecord is None:
|
2015-10-01 10:11:48 +01:00
|
|
|
namerecord = NameRecord()
|
|
|
|
namerecord.nameID = nameID
|
|
|
|
namerecord.platformID = platformID
|
|
|
|
namerecord.platEncID = platEncID
|
|
|
|
namerecord.langID = langID
|
|
|
|
self.names.append(namerecord)
|
2016-01-15 22:25:49 +01:00
|
|
|
namerecord.string = string.encode(namerecord.getEncoding())
|
2015-10-01 10:11:48 +01:00
|
|
|
|
|
|
|
|
2013-11-28 14:26:58 -05:00
|
|
|
class NameRecord(object):
|
2015-04-03 10:07:57 -07:00
|
|
|
|
2015-04-16 17:09:49 -07:00
|
|
|
def getEncoding(self, default='ascii'):
|
|
|
|
"""Returns the Python encoding name for this name entry based on its platformID,
|
|
|
|
platEncID, and langID. If encoding for these values is not known, by default
|
|
|
|
'ascii' is returned. That can be overriden by passing a value to the default
|
|
|
|
argument.
|
|
|
|
"""
|
2015-04-19 03:36:20 -07:00
|
|
|
return getEncoding(self.platformID, self.platEncID, self.langID, default)
|
2015-04-03 10:07:57 -07:00
|
|
|
|
2015-04-07 17:52:51 -07:00
|
|
|
def encodingIsUnicodeCompatible(self):
|
2015-04-24 12:48:37 -07:00
|
|
|
return self.getEncoding(None) in ['utf_16_be', 'ucs2be', 'ascii', 'latin1']
|
2015-04-07 17:52:51 -07:00
|
|
|
|
2015-04-03 10:07:57 -07:00
|
|
|
def __str__(self):
|
2016-01-19 13:20:36 +00:00
|
|
|
return self.toStr(errors='backslashreplace')
|
2015-04-15 19:07:19 -07:00
|
|
|
|
2014-03-12 12:32:27 -07:00
|
|
|
def isUnicode(self):
|
|
|
|
return (self.platformID == 0 or
|
|
|
|
(self.platformID == 3 and self.platEncID in [0, 1, 10]))
|
|
|
|
|
2015-04-16 17:09:49 -07:00
|
|
|
def toUnicode(self, errors='strict'):
|
|
|
|
"""
|
|
|
|
If self.string is a Unicode string, return it; otherwise try decoding the
|
|
|
|
bytes in self.string to a Unicode string using the encoding of this
|
|
|
|
entry as returned by self.getEncoding(); Note that self.getEncoding()
|
|
|
|
returns 'ascii' if the encoding is unknown to the library.
|
|
|
|
|
2015-04-22 01:49:15 -07:00
|
|
|
Certain heuristics are performed to recover data from bytes that are
|
|
|
|
ill-formed in the chosen encoding, or that otherwise look misencoded
|
|
|
|
(mostly around bad UTF-16BE encoded bytes, or bytes that look like UTF-16BE
|
|
|
|
but marked otherwise). If the bytes are ill-formed and the heuristics fail,
|
|
|
|
the error is handled according to the errors parameter to this function, which is
|
|
|
|
passed to the underlying decode() function; by default it throws a
|
|
|
|
UnicodeDecodeError exception.
|
|
|
|
|
|
|
|
Note: The mentioned heuristics mean that roundtripping a font to XML and back
|
|
|
|
to binary might recover some misencoded data whereas just loading the font
|
|
|
|
and saving it back will not change them.
|
2015-04-16 17:09:49 -07:00
|
|
|
"""
|
2015-04-22 02:15:51 -07:00
|
|
|
def isascii(b):
|
|
|
|
return (b >= 0x20 and b <= 0x7E) or b in [0x09, 0x0A, 0x0D]
|
2015-04-22 01:49:15 -07:00
|
|
|
encoding = self.getEncoding()
|
|
|
|
string = self.string
|
2015-04-22 02:15:51 -07:00
|
|
|
|
2015-04-24 12:48:37 -07:00
|
|
|
if encoding == 'utf_16_be' and len(string) % 2 == 1:
|
2015-04-22 01:49:15 -07:00
|
|
|
# Recover badly encoded UTF-16 strings that have an odd number of bytes:
|
|
|
|
# - If the last byte is zero, drop it. Otherwise,
|
|
|
|
# - If all the odd bytes are zero and all the even bytes are ASCII,
|
|
|
|
# prepend one zero byte. Otherwise,
|
|
|
|
# - If first byte is zero and all other bytes are ASCII, insert zero
|
|
|
|
# bytes between consecutive ASCII bytes.
|
|
|
|
#
|
|
|
|
# (Yes, I've seen all of these in the wild... sigh)
|
|
|
|
if byteord(string[-1]) == 0:
|
|
|
|
string = string[:-1]
|
2015-04-22 02:15:51 -07:00
|
|
|
elif all(byteord(b) == 0 if i % 2 else isascii(byteord(b)) for i,b in enumerate(string)):
|
2015-04-22 01:49:15 -07:00
|
|
|
string = b'\0' + string
|
2015-04-22 02:15:51 -07:00
|
|
|
elif byteord(string[0]) == 0 and all(isascii(byteord(b)) for b in string[1:]):
|
2015-04-22 01:49:15 -07:00
|
|
|
string = bytesjoin(b'\0'+bytechr(byteord(b)) for b in string[1:])
|
2015-04-22 02:15:51 -07:00
|
|
|
|
2015-04-22 02:22:11 -07:00
|
|
|
string = tounicode(string, encoding=encoding, errors=errors)
|
|
|
|
|
|
|
|
# If decoded strings still looks like UTF-16BE, it suggests a double-encoding.
|
|
|
|
# Fix it up.
|
|
|
|
if all(ord(c) == 0 if i % 2 == 0 else isascii(ord(c)) for i,c in enumerate(string)):
|
|
|
|
# If string claims to be Mac encoding, but looks like UTF-16BE with ASCII text,
|
|
|
|
# narrow it down.
|
|
|
|
string = ''.join(c for c in string[1::2])
|
|
|
|
|
|
|
|
return string
|
|
|
|
|
2015-04-16 17:09:49 -07:00
|
|
|
def toBytes(self, errors='strict'):
|
|
|
|
""" If self.string is a bytes object, return it; otherwise try encoding
|
|
|
|
the Unicode string in self.string to bytes using the encoding of this
|
|
|
|
entry as returned by self.getEncoding(); Note that self.getEncoding()
|
|
|
|
returns 'ascii' if the encoding is unknown to the library.
|
|
|
|
|
|
|
|
If the Unicode string cannot be encoded to bytes in the chosen encoding,
|
|
|
|
the error is handled according to the errors parameter to this function,
|
|
|
|
which is passed to the underlying encode() function; by default it throws a
|
|
|
|
UnicodeEncodeError exception.
|
|
|
|
"""
|
|
|
|
return tobytes(self.string, encoding=self.getEncoding(), errors=errors)
|
2015-04-07 17:58:11 -07:00
|
|
|
|
2016-01-19 12:28:29 +00:00
|
|
|
def toStr(self, errors='strict'):
|
|
|
|
if str == bytes:
|
|
|
|
# python 2
|
|
|
|
return self.toBytes(errors)
|
|
|
|
else:
|
|
|
|
# python 3
|
|
|
|
return self.toUnicode(errors)
|
|
|
|
|
1999-12-16 21:34:53 +00:00
|
|
|
def toXML(self, writer, ttFont):
|
2015-04-16 17:09:49 -07:00
|
|
|
try:
|
|
|
|
unistr = self.toUnicode()
|
2015-05-06 12:06:24 -07:00
|
|
|
except UnicodeDecodeError:
|
2015-04-16 17:09:49 -07:00
|
|
|
unistr = None
|
2015-04-07 17:52:51 -07:00
|
|
|
attrs = [
|
1999-12-16 21:34:53 +00:00
|
|
|
("nameID", self.nameID),
|
|
|
|
("platformID", self.platformID),
|
|
|
|
("platEncID", self.platEncID),
|
|
|
|
("langID", hex(self.langID)),
|
2015-04-07 17:52:51 -07:00
|
|
|
]
|
|
|
|
|
2015-04-22 01:12:05 -07:00
|
|
|
if unistr is None or not self.encodingIsUnicodeCompatible():
|
2015-04-07 17:52:51 -07:00
|
|
|
attrs.append(("unicode", unistr is not None))
|
|
|
|
|
|
|
|
writer.begintag("namerecord", attrs)
|
1999-12-16 21:34:53 +00:00
|
|
|
writer.newline()
|
2015-04-07 17:52:51 -07:00
|
|
|
if unistr is not None:
|
|
|
|
writer.write(unistr)
|
1999-12-16 21:34:53 +00:00
|
|
|
else:
|
2015-04-07 17:52:51 -07:00
|
|
|
writer.write8bit(self.string)
|
1999-12-16 21:34:53 +00:00
|
|
|
writer.newline()
|
|
|
|
writer.endtag("namerecord")
|
|
|
|
writer.newline()
|
2015-04-26 02:01:01 -04:00
|
|
|
|
2013-11-27 03:19:32 -05:00
|
|
|
def fromXML(self, name, attrs, content, ttFont):
|
1999-12-16 21:34:53 +00:00
|
|
|
self.nameID = safeEval(attrs["nameID"])
|
|
|
|
self.platformID = safeEval(attrs["platformID"])
|
|
|
|
self.platEncID = safeEval(attrs["platEncID"])
|
|
|
|
self.langID = safeEval(attrs["langID"])
|
2013-11-27 22:00:49 -05:00
|
|
|
s = strjoin(content).strip()
|
2015-04-07 17:52:51 -07:00
|
|
|
encoding = self.getEncoding()
|
|
|
|
if self.encodingIsUnicodeCompatible() or safeEval(attrs.get("unicode", "False")):
|
|
|
|
self.string = s.encode(encoding)
|
1999-12-16 21:34:53 +00:00
|
|
|
else:
|
2013-11-27 22:47:35 -05:00
|
|
|
# This is the inverse of write8bit...
|
2013-11-27 13:58:09 -05:00
|
|
|
self.string = s.encode("latin1")
|
2015-04-26 02:01:01 -04:00
|
|
|
|
2013-11-27 18:58:45 -05:00
|
|
|
def __lt__(self, other):
|
|
|
|
if type(self) != type(other):
|
2013-12-07 03:40:44 -05:00
|
|
|
return NotImplemented
|
2013-08-17 11:11:22 -04:00
|
|
|
|
2013-11-27 18:58:45 -05:00
|
|
|
# implemented so that list.sort() sorts according to the spec.
|
|
|
|
selfTuple = (
|
2013-10-28 12:16:41 +01:00
|
|
|
getattr(self, "platformID", None),
|
|
|
|
getattr(self, "platEncID", None),
|
|
|
|
getattr(self, "langID", None),
|
|
|
|
getattr(self, "nameID", None),
|
|
|
|
getattr(self, "string", None),
|
|
|
|
)
|
2013-11-27 18:58:45 -05:00
|
|
|
otherTuple = (
|
2013-10-28 12:16:41 +01:00
|
|
|
getattr(other, "platformID", None),
|
|
|
|
getattr(other, "platEncID", None),
|
|
|
|
getattr(other, "langID", None),
|
|
|
|
getattr(other, "nameID", None),
|
|
|
|
getattr(other, "string", None),
|
|
|
|
)
|
2013-11-27 18:58:45 -05:00
|
|
|
return selfTuple < otherTuple
|
2015-04-26 02:01:01 -04:00
|
|
|
|
1999-12-16 21:34:53 +00:00
|
|
|
def __repr__(self):
|
|
|
|
return "<NameRecord NameID=%d; PlatformID=%d; LanguageID=%d>" % (
|
|
|
|
self.nameID, self.platformID, self.langID)
|