fonttools/Lib/fontTools/encodings/codecs.py

"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)
but missing from Python.  See https://github.com/fonttools/fonttools/issues/236 for details."""

from fontTools.misc.py23 import *
import codecs
import encodings

class ExtendCodec(codecs.Codec):

	def __init__(self, name, base_encoding, mapping):
		self.name = name
		self.base_encoding = base_encoding
		self.mapping = mapping
		self.reverse = {v:k for k,v in mapping.items()}
		self.max_len = max(len(v) for v in mapping.values())
		self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode)
		codecs.register_error(name, self.error)

	def _map(self, mapper, output_type, exc_type, input, errors):
		base_error_handler = codecs.lookup_error(errors)
		length = len(input)
		out = output_type()
		while input:
			# first try to use self.error as the error handler
			try:
				part = mapper(input, self.base_encoding, errors=self.name)
				out += part
				break  # All converted
			except exc_type as e:
				# else convert the correct part, handle error as requested and continue
				out += mapper(input[:e.start], self.base_encoding, self.name)
				replacement, pos = base_error_handler(e)
				out += replacement
				input = input[pos:]
		return out, length

	def encode(self, input, errors='strict'):
		return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)

	def decode(self, input, errors='strict'):
		return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)

	def error(self, e):
		if isinstance(e, UnicodeDecodeError):
			for end in range(e.start + 1, e.end + 1):
				s = e.object[e.start:end]
				if s in self.mapping:
					return self.mapping[s], end
		elif isinstance(e, UnicodeEncodeError):
			for end in range(e.start + 1, e.start + self.max_len + 1):
				s = e.object[e.start:end]
				if s in self.reverse:
					return self.reverse[s], end
		e.encoding = self.name
		raise e


_extended_encodings = {
	"x_mac_japanese_ttx": ("shift_jis", {
					b"\xFC": unichr(0x007C),
					b"\x7E": unichr(0x007E),
					b"\x80": unichr(0x005C),
					b"\xA0": unichr(0x00A0),
					b"\xFD": unichr(0x00A9),
					b"\xFE": unichr(0x2122),
					b"\xFF": unichr(0x2026),
				}),
	"x_mac_trad_chinese_ttx": ("big5", {
					b"\x80": unichr(0x005C),
					b"\xA0": unichr(0x00A0),
					b"\xFD": unichr(0x00A9),
					b"\xFE": unichr(0x2122),
					b"\xFF": unichr(0x2026),
				}),
	"x_mac_korean_ttx": ("euc_kr", {
					b"\x80": unichr(0x00A0),
					b"\x81": unichr(0x20A9),
					b"\x82": unichr(0x2014),
					b"\x83": unichr(0x00A9),
					b"\xFE": unichr(0x2122),
					b"\xFF": unichr(0x2026),
				}),
	"x_mac_simp_chinese_ttx": ("gb2312", {
					b"\x80": unichr(0x00FC),
					b"\xA0": unichr(0x00A0),
					b"\xFD": unichr(0x00A9),
					b"\xFE": unichr(0x2122),
					b"\xFF": unichr(0x2026),
				}),
}

_cache = {}

def search_function(name):
	name = encodings.normalize_encoding(name) # Rather undocumented...
	if name in _extended_encodings:
		if name not in _cache:
			base_encoding, mapping = _extended_encodings[name]
			assert(name[-4:] == "_ttx")
			# Python 2 didn't have any of the encodings that we are implementing
			# in this file.  Python 3 added aliases for the East Asian ones, mapping
			# them "temporarily" to the same base encoding as us, with a comment
			# suggesting that full implementation will appear some time later.
			# As such, try the Python version of the x_mac_... first, if that is found,
			# use *that* as our base encoding.  This would make our encoding upgrade
			# to the full encoding when and if Python finally implements that.
			# http://bugs.python.org/issue24041
			base_encodings = [name[:-4], base_encoding]
			for base_encoding in base_encodings:
				try:
					codecs.lookup(base_encoding)
				except LookupError:
					continue
				_cache[name] = ExtendCodec(name, base_encoding, mapping)
				break
		return _cache[name].info

	return None

codecs.register(search_function)
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`"""Extend the Python codecs module with a few encodings that are used in OpenType (name table)`
updated inline github issue URLs 2019-03-06 16:01:28 +01:00			`but missing from Python. See https://github.com/fonttools/fonttools/issues/236 for details."""`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00
			`from fontTools.misc.py23 import *`
			`import codecs`
Normalize encoding name for lookup 2015-04-19 04:28:22 -07:00			`import encodings`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00
			`class ExtendCodec(codecs.Codec):`

			`def __init__(self, name, base_encoding, mapping):`
			`self.name = name`
			`self.base_encoding = base_encoding`
			`self.mapping = mapping`
Use dict comprehension now that we don't support Python 2.6 2015-06-11 17:05:15 -07:00			`self.reverse = {v:k for k,v in mapping.items()}`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`self.max_len = max(len(v) for v in mapping.values())`
Minor 2015-04-20 10:00:18 -07:00			`self.info = codecs.CodecInfo(name=self.name, encode=self.encode, decode=self.decode)`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`codecs.register_error(name, self.error)`

codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`def _map(self, mapper, output_type, exc_type, input, errors):`
			`base_error_handler = codecs.lookup_error(errors)`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`length = len(input)`
codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`out = output_type()`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`while input:`
codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`# first try to use self.error as the error handler`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`try:`
codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`part = mapper(input, self.base_encoding, errors=self.name)`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`out += part`
codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`break # All converted`
			`except exc_type as e:`
			`# else convert the correct part, handle error as requested and continue`
			`out += mapper(input[:e.start], self.base_encoding, self.name)`
			`replacement, pos = base_error_handler(e)`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`out += replacement`
			`input = input[pos:]`
			`return out, length`

codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`def encode(self, input, errors='strict'):`
			`return self._map(codecs.encode, bytes, UnicodeEncodeError, input, errors)`

Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`def decode(self, input, errors='strict'):`
codecs: handle errors different from 'strict' for extended mac encodings Fixes #2132 2020-12-24 20:37:52 +01:00			`return self._map(codecs.decode, str, UnicodeDecodeError, input, errors)`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00
			`def error(self, e):`
			`if isinstance(e, UnicodeDecodeError):`
Fix codecs end check There was a bug before Python 3.4 where an extra byte was included in e.end when the error callback was called. That hided a bug in the code. Fixes build with Python 3.4+ 2015-04-16 13:31:08 -07:00			`for end in range(e.start + 1, e.end + 1):`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`s = e.object[e.start:end]`
			`if s in self.mapping:`
			`return self.mapping[s], end`
			`elif isinstance(e, UnicodeEncodeError):`
			`for end in range(e.start + 1, e.start + self.max_len + 1):`
			`s = e.object[e.start:end]`
			`if s in self.reverse:`
			`return self.reverse[s], end`
			`e.encoding = self.name`
			`raise e`

Add Roman Croatian and Romanian encodings Concludes https://github.com/behdad/fonttools/issues/236 2015-04-16 18:24:07 -07:00
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`_extended_encodings = {`
Use canonical Python encoding names 2015-04-19 04:24:55 -07:00			`"x_mac_japanese_ttx": ("shift_jis", {`
Whitespace 2015-04-26 00:15:26 -04:00			`b"\xFC": unichr(0x007C),`
			`b"\x7E": unichr(0x007E),`
			`b"\x80": unichr(0x005C),`
			`b"\xA0": unichr(0x00A0),`
			`b"\xFD": unichr(0x00A9),`
			`b"\xFE": unichr(0x2122),`
			`b"\xFF": unichr(0x2026),`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`}),`
Fix x_mac Chinese names 2015-04-19 04:46:12 -07:00			`"x_mac_trad_chinese_ttx": ("big5", {`
Fix Mac East Asian encodings Ouch! 2015-04-26 00:51:22 -04:00			`b"\x80": unichr(0x005C),`
			`b"\xA0": unichr(0x00A0),`
			`b"\xFD": unichr(0x00A9),`
			`b"\xFE": unichr(0x2122),`
			`b"\xFF": unichr(0x2026),`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`}),`
Use canonical Python encoding names 2015-04-19 04:24:55 -07:00			`"x_mac_korean_ttx": ("euc_kr", {`
Fix Mac East Asian encodings Ouch! 2015-04-26 00:51:22 -04:00			`b"\x80": unichr(0x00A0),`
			`b"\x81": unichr(0x20A9),`
			`b"\x82": unichr(0x2014),`
			`b"\x83": unichr(0x00A9),`
			`b"\xFE": unichr(0x2122),`
			`b"\xFF": unichr(0x2026),`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`}),`
Fix x_mac Chinese names 2015-04-19 04:46:12 -07:00			`"x_mac_simp_chinese_ttx": ("gb2312", {`
Fix Mac East Asian encodings Ouch! 2015-04-26 00:51:22 -04:00			`b"\x80": unichr(0x00FC),`
			`b"\xA0": unichr(0x00A0),`
			`b"\xFD": unichr(0x00A9),`
			`b"\xFE": unichr(0x2122),`
			`b"\xFF": unichr(0x2026),`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`}),`
			`}`

Normalize encoding name for lookup 2015-04-19 04:28:22 -07:00			`_cache = {}`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00
			`def search_function(name):`
Normalize encoding name for lookup 2015-04-19 04:28:22 -07:00			`name = encodings.normalize_encoding(name) # Rather undocumented...`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`if name in _extended_encodings:`
Normalize encoding name for lookup 2015-04-19 04:28:22 -07:00			`if name not in _cache:`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00			`base_encoding, mapping = _extended_encodings[name]`
Upgrade Mac East Asian encodings to native implementation if available Fixes https://github.com/behdad/fonttools/issues/236 2015-04-19 04:52:25 -07:00			`assert(name[-4:] == "_ttx")`
			`# Python 2 didn't have any of the encodings that we are implementing`
			`# in this file. Python 3 added aliases for the East Asian ones, mapping`
			`# them "temporarily" to the same base encoding as us, with a comment`
			`# suggesting that full implementation will appear some time later.`
			`# As such, try the Python version of the x_mac_... first, if that is found,`
			`# use that as our base encoding. This would make our encoding upgrade`
			`# to the full encoding when and if Python finally implements that.`
Use native mac_romanian and mac_croatian encodings Apparently they are implemented in Python, just miss aliases. https://github.com/behdad/fonttools/issues/236 http://bugs.python.org/issue24043 2015-04-24 12:32:20 -07:00			`# http://bugs.python.org/issue24041`
Upgrade Mac East Asian encodings to native implementation if available Fixes https://github.com/behdad/fonttools/issues/236 2015-04-19 04:52:25 -07:00			`base_encodings = [name[:-4], base_encoding]`
			`for base_encoding in base_encodings:`
			`try:`
			`codecs.lookup(base_encoding)`
			`except LookupError:`
			`continue`
			`_cache[name] = ExtendCodec(name, base_encoding, mapping)`
			`break`
Minor 2015-04-20 10:00:18 -07:00			`return _cache[name].info`
Add codecs for mac-extended East Asian encodings Part of https://github.com/behdad/fonttools/issues/236 To be used in name table soon. 2015-04-16 03:10:18 -07:00
			`return None`

			`codecs.register(search_function)`