2015-04-16 03:10:18 -07:00
|
|
|
"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
|
2019-03-06 16:01:28 +01:00
|
|
|
but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details."""
|
2015-04-16 03:10:18 -07:00
|
|
|
|
|
|
|
from fontTools.misc.py23 import *
|
|
|
|
import codecs
|
2015-04-19 04:28:22 -07:00
|
|
|
import encodings
|
2015-04-16 03:10:18 -07:00
|
|
|
|
|
|
|
class ExtendCodec(codecs.Codec):
|
|
|
|
|
|
|
|
def __init__(self, name, base_encoding, mapping):
|
|
|
|
self.name = name
|
|
|
|
self.base_encoding = base_encoding
|
|
|
|
self.mapping = mapping
|
2015-06-11 17:05:15 -07:00
|
|
|
self.reverse = {v:k for k,v in mapping.items()}
|
2015-04-16 03:10:18 -07:00
|
|
|
self.max_len = max(len(v) for v in mapping.values())
|
2015-04-20 10:00:18 -07:00
|
|
|
self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode)
|
2015-04-16 03:10:18 -07:00
|
|
|
codecs.register_error(name, self.error)
|
|
|
|
|
2020-12-24 20:37:52 +01:00
|
|
|
def _map(self, mapper, output_type, exc_type, input, errors):
|
|
|
|
base_error_handler = codecs.lookup_error(errors)
|
2015-04-16 03:10:18 -07:00
|
|
|
length = len(input)
|
2020-12-24 20:37:52 +01:00
|
|
|
out = output_type()
|
2015-04-16 03:10:18 -07:00
|
|
|
while input:
|
2020-12-24 20:37:52 +01:00
|
|
|
# first try to use self.error as the error handler
|
2015-04-16 03:10:18 -07:00
|
|
|
try:
|
2020-12-24 20:37:52 +01:00
|
|
|
part = mapper(input, self.base_encoding, errors=self.name)
|
2015-04-16 03:10:18 -07:00
|
|
|
out += part
|
2020-12-24 20:37:52 +01:00
|
|
|
break # All converted
|
|
|
|
except exc_type as e:
|
|
|
|
# else convert the correct part, handle error as requested and continue
|
|
|
|
out += mapper(input[:e.start], self.base_encoding, self.name)
|
|
|
|
replacement, pos = base_error_handler(e)
|
2015-04-16 03:10:18 -07:00
|
|
|
out += replacement
|
|
|
|
input = input[pos:]
|
|
|
|
return out, length
|
|
|
|
|
2020-12-24 20:37:52 +01:00
|
|
|
def encode(self, input, errors='strict'):
|
|
|
|
return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)
|
|
|
|
|
2015-04-16 03:10:18 -07:00
|
|
|
def decode(self, input, errors='strict'):
|
2020-12-24 20:37:52 +01:00
|
|
|
return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)
|
2015-04-16 03:10:18 -07:00
|
|
|
|
|
|
|
def error(self, e):
|
|
|
|
if isinstance(e, UnicodeDecodeError):
|
2015-04-16 13:31:08 -07:00
|
|
|
for end in range(e.start + 1, e.end + 1):
|
2015-04-16 03:10:18 -07:00
|
|
|
s = e.object[e.start:end]
|
|
|
|
if s in self.mapping:
|
|
|
|
return self.mapping[s], end
|
|
|
|
elif isinstance(e, UnicodeEncodeError):
|
|
|
|
for end in range(e.start + 1, e.start + self.max_len + 1):
|
|
|
|
s = e.object[e.start:end]
|
|
|
|
if s in self.reverse:
|
|
|
|
return self.reverse[s], end
|
|
|
|
e.encoding = self.name
|
|
|
|
raise e
|
|
|
|
|
2015-04-16 18:24:07 -07:00
|
|
|
|
2015-04-16 03:10:18 -07:00
|
|
|
_extended_encodings = {
|
2015-04-19 04:24:55 -07:00
|
|
|
"x_mac_japanese_ttx": ("shift_jis", {
|
2015-04-26 00:15:26 -04:00
|
|
|
b"\xFC": unichr(0x007C),
|
|
|
|
b"\x7E": unichr(0x007E),
|
|
|
|
b"\x80": unichr(0x005C),
|
|
|
|
b"\xA0": unichr(0x00A0),
|
|
|
|
b"\xFD": unichr(0x00A9),
|
|
|
|
b"\xFE": unichr(0x2122),
|
|
|
|
b"\xFF": unichr(0x2026),
|
2015-04-16 03:10:18 -07:00
|
|
|
}),
|
2015-04-19 04:46:12 -07:00
|
|
|
"x_mac_trad_chinese_ttx": ("big5", {
|
2015-04-26 00:51:22 -04:00
|
|
|
b"\x80": unichr(0x005C),
|
|
|
|
b"\xA0": unichr(0x00A0),
|
|
|
|
b"\xFD": unichr(0x00A9),
|
|
|
|
b"\xFE": unichr(0x2122),
|
|
|
|
b"\xFF": unichr(0x2026),
|
2015-04-16 03:10:18 -07:00
|
|
|
}),
|
2015-04-19 04:24:55 -07:00
|
|
|
"x_mac_korean_ttx": ("euc_kr", {
|
2015-04-26 00:51:22 -04:00
|
|
|
b"\x80": unichr(0x00A0),
|
|
|
|
b"\x81": unichr(0x20A9),
|
|
|
|
b"\x82": unichr(0x2014),
|
|
|
|
b"\x83": unichr(0x00A9),
|
|
|
|
b"\xFE": unichr(0x2122),
|
|
|
|
b"\xFF": unichr(0x2026),
|
2015-04-16 03:10:18 -07:00
|
|
|
}),
|
2015-04-19 04:46:12 -07:00
|
|
|
"x_mac_simp_chinese_ttx": ("gb2312", {
|
2015-04-26 00:51:22 -04:00
|
|
|
b"\x80": unichr(0x00FC),
|
|
|
|
b"\xA0": unichr(0x00A0),
|
|
|
|
b"\xFD": unichr(0x00A9),
|
|
|
|
b"\xFE": unichr(0x2122),
|
|
|
|
b"\xFF": unichr(0x2026),
|
2015-04-16 03:10:18 -07:00
|
|
|
}),
|
|
|
|
}
|
|
|
|
|
2015-04-19 04:28:22 -07:00
|
|
|
_cache = {}
|
2015-04-16 03:10:18 -07:00
|
|
|
|
|
|
|
def search_function(name):
|
2015-04-19 04:28:22 -07:00
|
|
|
name = encodings.normalize_encoding(name) # Rather undocumented...
|
2015-04-16 03:10:18 -07:00
|
|
|
if name in _extended_encodings:
|
2015-04-19 04:28:22 -07:00
|
|
|
if name not in _cache:
|
2015-04-16 03:10:18 -07:00
|
|
|
base_encoding, mapping = _extended_encodings[name]
|
2015-04-19 04:52:25 -07:00
|
|
|
assert(name[-4:] == "_ttx")
|
|
|
|
# Python 2 didn't have any of the encodings that we are implementing
|
|
|
|
# in this file. Python 3 added aliases for the East Asian ones, mapping
|
|
|
|
# them "temporarily" to the same base encoding as us, with a comment
|
|
|
|
# suggesting that full implementation will appear some time later.
|
|
|
|
# As such, try the Python version of the x_mac_... first, if that is found,
|
|
|
|
# use *that* as our base encoding. This would make our encoding upgrade
|
|
|
|
# to the full encoding when and if Python finally implements that.
|
2015-04-24 12:32:20 -07:00
|
|
|
# http://bugs.python.org/issue24041
|
2015-04-19 04:52:25 -07:00
|
|
|
base_encodings = [name[:-4], base_encoding]
|
|
|
|
for base_encoding in base_encodings:
|
|
|
|
try:
|
|
|
|
codecs.lookup(base_encoding)
|
|
|
|
except LookupError:
|
|
|
|
continue
|
|
|
|
_cache[name] = ExtendCodec(name, base_encoding, mapping)
|
|
|
|
break
|
2015-04-20 10:00:18 -07:00
|
|
|
return _cache[name].info
|
2015-04-16 03:10:18 -07:00
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
codecs.register(search_function)
|