From e561b758c1a445acc73f4577fb91df19fd9f269b Mon Sep 17 00:00:00 2001 From: Behdad Esfahbod Date: Sun, 19 Apr 2015 03:36:20 -0700 Subject: [PATCH] Move getEncoding into fontTools.misc.encodingTools Logic developed as part of: https://github.com/behdad/fonttools/pull/208 --- Lib/fontTools/misc/encodingTools.py | 73 ++++++++++++++++++++++++ Lib/fontTools/misc/encodingTools_test.py | 31 ++++++++++ Lib/fontTools/ttLib/tables/_n_a_m_e.py | 64 +-------------------- 3 files changed, 106 insertions(+), 62 deletions(-) create mode 100644 Lib/fontTools/misc/encodingTools.py create mode 100644 Lib/fontTools/misc/encodingTools_test.py diff --git a/Lib/fontTools/misc/encodingTools.py b/Lib/fontTools/misc/encodingTools.py new file mode 100644 index 000000000..f076cb637 --- /dev/null +++ b/Lib/fontTools/misc/encodingTools.py @@ -0,0 +1,73 @@ +"""fontTools.misc.timeTools.py -- tools for working with OpenType encodings. +""" + +from __future__ import print_function, division, absolute_import +from fontTools.misc.py23 import * +import fontTools.encodings.codecs + +# Map keyed by platformID, then platEncID, then possibly langID +_encodingMap = { + 0: { # Unicode + 0: 'utf-16be', + 1: 'utf-16be', + 2: 'utf-16be', + 3: 'utf-16be', + 4: 'utf-16be', + 5: 'utf-16be', + 6: 'utf-16be', + }, + 1: { # Macintosh + # See + # https://github.com/behdad/fonttools/issues/236 + 0: { # Macintosh, platEncID==0, keyed by langID + 15: "mac-iceland", + 17: "mac-turkish", + 18: "x-mac-croatian-ttx", + 24: "mac-latin2", + 25: "mac-latin2", + 26: "mac-latin2", + 27: "mac-latin2", + 28: "mac-latin2", + 36: "mac-latin2", + 37: "x-mac-romanian-ttx", + 38: "mac-latin2", + 39: "mac-latin2", + 40: "mac-latin2", + Ellipsis: 'mac-roman', # Other + }, + 1: 'x-mac-japanese-ttx', + 2: 'x-mac-chinesetrad-ttx', + 3: 'x-mac-korean-ttx', + 6: 'mac-greek', + 7: 'mac-cyrillic', + 25: 'x-mac-chinesesimp-ttx', + 29: 'mac-latin2', + 35: 'mac-turkish', + 37: 'mac-iceland', + }, + 2: { # ISO + 0: 'ascii', + 1: 'utf-16be', + 2: 'latin1', + }, + 3: { # Microsoft + 0: 'utf-16be', + 1: 'utf-16be', + 2: 'shift-jis', + 3: 'gb2312', + 4: 'big5', + 5: 'wansung', + 6: 'johab', + 10: 'utf-16be', + }, +} + +def getEncoding(platformID, platEncID, langID, default=None): + """Returns the Python encoding name for OpenType platformID/encodingID/langID + triplet. If encoding for these values is not known, by default None is + returned. That can be overriden by passing a value to the default argument. + """ + encoding = _encodingMap.get(platformID, {}).get(platEncID, default) + if isinstance(encoding, dict): + encoding = encoding.get(langID, encoding[Ellipsis]) + return encoding diff --git a/Lib/fontTools/misc/encodingTools_test.py b/Lib/fontTools/misc/encodingTools_test.py new file mode 100644 index 000000000..176a30938 --- /dev/null +++ b/Lib/fontTools/misc/encodingTools_test.py @@ -0,0 +1,31 @@ +from __future__ import print_function, division, absolute_import, unicode_literals +from fontTools.misc.py23 import * +import unittest +from .encodingTools import getEncoding + +class EncodingTest(unittest.TestCase): + + def test_encoding_unicode(self): + + self.assertEqual(getEncoding(3, 0, None), "utf-16be") # MS Symbol is Unicode as well + self.assertEqual(getEncoding(3, 1, None), "utf-16be") + self.assertEqual(getEncoding(3, 10, None), "utf-16be") + self.assertEqual(getEncoding(0, 3, None), "utf-16be") + + def test_encoding_macroman_misc(self): + self.assertEqual(getEncoding(1, 0, 17), "mac-turkish") + self.assertEqual(getEncoding(1, 0, 37), "x-mac-romanian-ttx") + self.assertEqual(getEncoding(1, 0, 45), "mac-roman") + + def test_extended_mac_encodings(self): + encoding = getEncoding(1, 1, 0) # Mac Japanese + decoded = b'\xfe'.decode(encoding) + self.assertEqual(decoded, unichr(0x2122)) + + def test_extended_unknown(self): + self.assertEqual(getEncoding(10, 11, 12), None) + self.assertEqual(getEncoding(10, 11, 12, "ascii"), "ascii") + self.assertEqual(getEncoding(10, 11, 12, default="ascii"), "ascii") + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py index a4e255822..3b2d562ab 100644 --- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py +++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py @@ -2,7 +2,7 @@ from __future__ import print_function, division, absolute_import from fontTools.misc.py23 import * from fontTools.misc import sstruct from fontTools.misc.textTools import safeEval -import fontTools.encodings.codecs +from fontTools.misc.encodingTools import getEncoding from . import DefaultTable import struct @@ -93,73 +93,13 @@ class table__n_a_m_e(DefaultTable.DefaultTable): class NameRecord(object): - # Map keyed by platformID, then platEncID, then possibly langID - _encodingMap = { - 0: { # Unicode - 0: 'utf-16be', - 1: 'utf-16be', - 2: 'utf-16be', - 3: 'utf-16be', - 4: 'utf-16be', - 5: 'utf-16be', - 6: 'utf-16be', - }, - 1: { # Macintosh - # See - # https://github.com/behdad/fonttools/issues/236 - 0: { # Macintosh, platEncID==0, keyed by langID - 15: "mac-iceland", - 17: "mac-turkish", - 18: "x-mac-croatian-ttx", - 24: "mac-latin2", - 25: "mac-latin2", - 26: "mac-latin2", - 27: "mac-latin2", - 28: "mac-latin2", - 36: "mac-latin2", - 37: "x-mac-romanian-ttx", - 38: "mac-latin2", - 39: "mac-latin2", - 40: "mac-latin2", - Ellipsis: 'mac-roman', # Other - }, - 1: 'x-mac-japanese-ttx', - 2: 'x-mac-chinesetrad-ttx', - 3: 'x-mac-korean-ttx', - 6: 'mac-greek', - 7: 'mac-cyrillic', - 25: 'x-mac-chinesesimp-ttx', - 29: 'mac-latin2', - 35: 'mac-turkish', - 37: 'mac-iceland', - }, - 2: { # ISO - 0: 'ascii', - 1: 'utf-16be', - 2: 'latin1', - }, - 3: { # Microsoft - 0: 'utf-16be', - 1: 'utf-16be', - 2: 'shift-jis', - 3: 'gb2312', - 4: 'big5', - 5: 'wansung', - 6: 'johab', - 10: 'utf-16be', - }, - } - def getEncoding(self, default='ascii'): """Returns the Python encoding name for this name entry based on its platformID, platEncID, and langID. If encoding for these values is not known, by default 'ascii' is returned. That can be overriden by passing a value to the default argument. """ - encoding = self._encodingMap.get(self.platformID, {}).get(self.platEncID, default) - if isinstance(encoding, dict): - encoding = encoding.get(self.langID, encoding[Ellipsis]) - return encoding + return getEncoding(self.platformID, self.platEncID, self.langID, default) def encodingIsUnicodeCompatible(self): return self.getEncoding(None) in ['utf-16be', 'ucs2be', 'ascii', 'latin1']