Support non-BMP characters for synthetic glyph names
When a font supplies no glyph names in its 'post' table, fontTools builds synthetic glyph names by reversing the 'cmap' table. After this change, the library looks at all 'cmap' subtables for Unicode, irrespective of format or platform. For example, glyph #4 in NotoSansOldItalic-Regular.ttf gets now named "u10300" instead of "glyph00004". Moved the code for building a reversed 'cmap' table into the cmap class, for easier testing.
This commit is contained in:
parent
58f86f318a
commit
07458f62dd
@ -511,47 +511,44 @@ class TTFont(object):
|
||||
# Set the glyph order, so the cmap parser has something
|
||||
# to work with (so we don't get called recursively).
|
||||
self.glyphOrder = glyphOrder
|
||||
# Get a (new) temporary cmap (based on the just invented names)
|
||||
try:
|
||||
tempcmap = self['cmap'].getcmap(3, 1)
|
||||
except KeyError:
|
||||
tempcmap = None
|
||||
if tempcmap is not None:
|
||||
# we have a unicode cmap
|
||||
from fontTools import agl
|
||||
cmap = tempcmap.cmap
|
||||
# create a reverse cmap dict
|
||||
reversecmap = {}
|
||||
for unicode, name in list(cmap.items()):
|
||||
reversecmap[name] = unicode
|
||||
allNames = {}
|
||||
for i in range(numGlyphs):
|
||||
tempName = glyphOrder[i]
|
||||
if tempName in reversecmap:
|
||||
unicode = reversecmap[tempName]
|
||||
if unicode in agl.UV2AGL:
|
||||
# get name from the Adobe Glyph List
|
||||
glyphName = agl.UV2AGL[unicode]
|
||||
else:
|
||||
# create uni<CODE> name
|
||||
glyphName = "uni%04X" % unicode
|
||||
tempName = glyphName
|
||||
n = allNames.get(tempName, 0)
|
||||
if n:
|
||||
tempName = glyphName + "#" + str(n)
|
||||
glyphOrder[i] = tempName
|
||||
allNames[tempName] = n + 1
|
||||
# Delete the temporary cmap table from the cache, so it can
|
||||
# be parsed again with the right names.
|
||||
del self.tables['cmap']
|
||||
else:
|
||||
pass # no unicode cmap available, stick with the invented names
|
||||
|
||||
# Make up glyph names based on the reversed cmap table. Because some
|
||||
# glyphs (eg. ligatures or alternates) may not be reachable via cmap,
|
||||
# this naming table will usually not cover all glyphs in the font.
|
||||
# If the font has no Unicode cmap table, reversecmap will be empty.
|
||||
reversecmap = self['cmap'].buildReversed()
|
||||
useCount = {}
|
||||
for i in range(numGlyphs):
|
||||
tempName = glyphOrder[i]
|
||||
if tempName in reversecmap:
|
||||
# If a font maps both U+0041 LATIN CAPITAL LETTER A and
|
||||
# U+0391 GREEK CAPITAL LETTER ALPHA to the same glyph,
|
||||
# we prefer naming the glyph as "A".
|
||||
glyphName = self._makeGlyphName(min(reversecmap[tempName]))
|
||||
numUses = useCount[glyphName] = useCount.get(glyphName, 0) + 1
|
||||
if numUses > 1:
|
||||
glyphName = "%s.alt%d" % (glyphName, numUses - 1)
|
||||
glyphOrder[i] = glyphName
|
||||
|
||||
# Delete the temporary cmap table from the cache, so it can
|
||||
# be parsed again with the right names.
|
||||
del self.tables['cmap']
|
||||
self.glyphOrder = glyphOrder
|
||||
if cmapLoading:
|
||||
# restore partially loaded cmap, so it can continue loading
|
||||
# using the proper names.
|
||||
self.tables['cmap'] = cmapLoading
|
||||
|
||||
@staticmethod
|
||||
def _makeGlyphName(codepoint):
|
||||
from fontTools import agl # Adobe Glyph List
|
||||
if codepoint in agl.UV2AGL:
|
||||
return agl.UV2AGL[codepoint]
|
||||
elif codepoint <= 0xFFFF:
|
||||
return "uni%04X" % codepoint
|
||||
else:
|
||||
return "u%X" % codepoint
|
||||
|
||||
def getGlyphNames(self):
|
||||
"""Get a list of glyph names, sorted alphabetically."""
|
||||
glyphNames = sorted(self.getGlyphOrder()[:])
|
||||
|
@ -20,6 +20,21 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
|
||||
return subtable
|
||||
return None # not found
|
||||
|
||||
def buildReversed(self):
|
||||
"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
|
||||
|
||||
The values are sets of Unicode codepoints because
|
||||
some fonts map different codepoints to the same glyph.
|
||||
For example, U+0041 LATIN CAPITAL LETTER A and U+0391
|
||||
GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
|
||||
"""
|
||||
result = {}
|
||||
for subtable in self.tables:
|
||||
if subtable.isUnicode():
|
||||
for codepoint, name in subtable.cmap.items():
|
||||
result.setdefault(name, set()).add(codepoint)
|
||||
return result
|
||||
|
||||
def decompile(self, data, ttFont):
|
||||
tableVersion, numSubTables = struct.unpack(">HH", data[:4])
|
||||
self.tableVersion = int(tableVersion)
|
||||
|
@ -2,37 +2,37 @@ from __future__ import print_function, division, absolute_import, unicode_litera
|
||||
from fontTools.misc.py23 import *
|
||||
from fontTools import ttLib
|
||||
import unittest
|
||||
from ._c_m_a_p import CmapSubtable
|
||||
from ._c_m_a_p import CmapSubtable, table__c_m_a_p
|
||||
|
||||
class CmapSubtableTest(unittest.TestCase):
|
||||
|
||||
def makeSubtable(self, platformID, platEncID, langID):
|
||||
subtable = CmapSubtable(None)
|
||||
def makeSubtable(self, cmapFormat, platformID, platEncID, langID):
|
||||
subtable = CmapSubtable.newSubtable(cmapFormat)
|
||||
subtable.platformID, subtable.platEncID, subtable.language = (platformID, platEncID, langID)
|
||||
return subtable
|
||||
|
||||
def test_toUnicode_utf16be(self):
|
||||
subtable = self.makeSubtable(0, 2, 7)
|
||||
subtable = self.makeSubtable(4, 0, 2, 7)
|
||||
self.assertEqual("utf_16_be", subtable.getEncoding())
|
||||
self.assertEqual(True, subtable.isUnicode())
|
||||
|
||||
def test_toUnicode_macroman(self):
|
||||
subtable = self.makeSubtable(1, 0, 7) # MacRoman
|
||||
subtable = self.makeSubtable(4, 1, 0, 7) # MacRoman
|
||||
self.assertEqual("mac_roman", subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_toUnicode_macromanian(self):
|
||||
subtable = self.makeSubtable(1, 0, 37) # Mac Romanian
|
||||
subtable = self.makeSubtable(4, 1, 0, 37) # Mac Romanian
|
||||
self.assertNotEqual(None, subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_extended_mac_encodings(self):
|
||||
subtable = self.makeSubtable(1, 1, 0) # Mac Japanese
|
||||
subtable = self.makeSubtable(4, 1, 1, 0) # Mac Japanese
|
||||
self.assertNotEqual(None, subtable.getEncoding())
|
||||
self.assertEqual(False, subtable.isUnicode())
|
||||
|
||||
def test_extended_unknown(self):
|
||||
subtable = self.makeSubtable(10, 11, 12)
|
||||
subtable = self.makeSubtable(4, 10, 11, 12)
|
||||
self.assertEqual(subtable.getEncoding(), None)
|
||||
self.assertEqual(subtable.getEncoding("ascii"), "ascii")
|
||||
self.assertEqual(subtable.getEncoding(default="xyz"), "xyz")
|
||||
@ -49,5 +49,15 @@ class CmapSubtableTest(unittest.TestCase):
|
||||
font.setGlyphOrder([])
|
||||
subtable.decompile(b'\0' * 7 + b'\x10' + b'\0' * 8, font)
|
||||
|
||||
def test_buildReversed(self):
|
||||
c4 = self.makeSubtable(4, 3, 1, 0)
|
||||
c4.cmap = {0x0041:'A', 0x0391:'A'}
|
||||
c12 = self.makeSubtable(12, 3, 10, 0)
|
||||
c12.cmap = {0x10314: 'u10314'}
|
||||
cmap = table__c_m_a_p()
|
||||
cmap.tables = [c4, c12]
|
||||
self.assertEqual(cmap.buildReversed(), {'A':{0x0041, 0x0391}, 'u10314':{0x10314}})
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user