Support non-BMP characters for synthetic glyph names

When a font supplies no glyph names in its 'post' table, fontTools
builds synthetic glyph names by reversing the 'cmap' table.
After this change, the library looks at all 'cmap' subtables for
Unicode, irrespective of format or platform. For example, glyph #4
in NotoSansOldItalic-Regular.ttf gets now named "u10300" instead of
"glyph00004".

Moved the code for building a reversed 'cmap' table into the cmap class,
for easier testing.
This commit is contained in:
Sascha Brawer 2015-09-04 10:21:55 +02:00
parent 58f86f318a
commit 07458f62dd
3 changed files with 65 additions and 43 deletions

View File

@ -511,47 +511,44 @@ class TTFont(object):
# Set the glyph order, so the cmap parser has something
# to work with (so we don't get called recursively).
self.glyphOrder = glyphOrder
# Get a (new) temporary cmap (based on the just invented names)
try:
tempcmap = self['cmap'].getcmap(3, 1)
except KeyError:
tempcmap = None
if tempcmap is not None:
# we have a unicode cmap
from fontTools import agl
cmap = tempcmap.cmap
# create a reverse cmap dict
reversecmap = {}
for unicode, name in list(cmap.items()):
reversecmap[name] = unicode
allNames = {}
for i in range(numGlyphs):
tempName = glyphOrder[i]
if tempName in reversecmap:
unicode = reversecmap[tempName]
if unicode in agl.UV2AGL:
# get name from the Adobe Glyph List
glyphName = agl.UV2AGL[unicode]
else:
# create uni<CODE> name
glyphName = "uni%04X" % unicode
tempName = glyphName
n = allNames.get(tempName, 0)
if n:
tempName = glyphName + "#" + str(n)
glyphOrder[i] = tempName
allNames[tempName] = n + 1
# Delete the temporary cmap table from the cache, so it can
# be parsed again with the right names.
del self.tables['cmap']
else:
pass # no unicode cmap available, stick with the invented names
# Make up glyph names based on the reversed cmap table. Because some
# glyphs (eg. ligatures or alternates) may not be reachable via cmap,
# this naming table will usually not cover all glyphs in the font.
# If the font has no Unicode cmap table, reversecmap will be empty.
reversecmap = self['cmap'].buildReversed()
useCount = {}
for i in range(numGlyphs):
tempName = glyphOrder[i]
if tempName in reversecmap:
# If a font maps both U+0041 LATIN CAPITAL LETTER A and
# U+0391 GREEK CAPITAL LETTER ALPHA to the same glyph,
# we prefer naming the glyph as "A".
glyphName = self._makeGlyphName(min(reversecmap[tempName]))
numUses = useCount[glyphName] = useCount.get(glyphName, 0) + 1
if numUses > 1:
glyphName = "%s.alt%d" % (glyphName, numUses - 1)
glyphOrder[i] = glyphName
# Delete the temporary cmap table from the cache, so it can
# be parsed again with the right names.
del self.tables['cmap']
self.glyphOrder = glyphOrder
if cmapLoading:
# restore partially loaded cmap, so it can continue loading
# using the proper names.
self.tables['cmap'] = cmapLoading
@staticmethod
def _makeGlyphName(codepoint):
from fontTools import agl # Adobe Glyph List
if codepoint in agl.UV2AGL:
return agl.UV2AGL[codepoint]
elif codepoint <= 0xFFFF:
return "uni%04X" % codepoint
else:
return "u%X" % codepoint
def getGlyphNames(self):
"""Get a list of glyph names, sorted alphabetically."""
glyphNames = sorted(self.getGlyphOrder()[:])

View File

@ -20,6 +20,21 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
return subtable
return None # not found
def buildReversed(self):
"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
The values are sets of Unicode codepoints because
some fonts map different codepoints to the same glyph.
For example, U+0041 LATIN CAPITAL LETTER A and U+0391
GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
"""
result = {}
for subtable in self.tables:
if subtable.isUnicode():
for codepoint, name in subtable.cmap.items():
result.setdefault(name, set()).add(codepoint)
return result
def decompile(self, data, ttFont):
tableVersion, numSubTables = struct.unpack(">HH", data[:4])
self.tableVersion = int(tableVersion)

View File

@ -2,37 +2,37 @@ from __future__ import print_function, division, absolute_import, unicode_litera
from fontTools.misc.py23 import *
from fontTools import ttLib
import unittest
from ._c_m_a_p import CmapSubtable
from ._c_m_a_p import CmapSubtable, table__c_m_a_p
class CmapSubtableTest(unittest.TestCase):
def makeSubtable(self, platformID, platEncID, langID):
subtable = CmapSubtable(None)
def makeSubtable(self, cmapFormat, platformID, platEncID, langID):
subtable = CmapSubtable.newSubtable(cmapFormat)
subtable.platformID, subtable.platEncID, subtable.language = (platformID, platEncID, langID)
return subtable
def test_toUnicode_utf16be(self):
subtable = self.makeSubtable(0, 2, 7)
subtable = self.makeSubtable(4, 0, 2, 7)
self.assertEqual("utf_16_be", subtable.getEncoding())
self.assertEqual(True, subtable.isUnicode())
def test_toUnicode_macroman(self):
subtable = self.makeSubtable(1, 0, 7) # MacRoman
subtable = self.makeSubtable(4, 1, 0, 7) # MacRoman
self.assertEqual("mac_roman", subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_toUnicode_macromanian(self):
subtable = self.makeSubtable(1, 0, 37) # Mac Romanian
subtable = self.makeSubtable(4, 1, 0, 37) # Mac Romanian
self.assertNotEqual(None, subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_extended_mac_encodings(self):
subtable = self.makeSubtable(1, 1, 0) # Mac Japanese
subtable = self.makeSubtable(4, 1, 1, 0) # Mac Japanese
self.assertNotEqual(None, subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_extended_unknown(self):
subtable = self.makeSubtable(10, 11, 12)
subtable = self.makeSubtable(4, 10, 11, 12)
self.assertEqual(subtable.getEncoding(), None)
self.assertEqual(subtable.getEncoding("ascii"), "ascii")
self.assertEqual(subtable.getEncoding(default="xyz"), "xyz")
@ -49,5 +49,15 @@ class CmapSubtableTest(unittest.TestCase):
font.setGlyphOrder([])
subtable.decompile(b'\0' * 7 + b'\x10' + b'\0' * 8, font)
def test_buildReversed(self):
c4 = self.makeSubtable(4, 3, 1, 0)
c4.cmap = {0x0041:'A', 0x0391:'A'}
c12 = self.makeSubtable(12, 3, 10, 0)
c12.cmap = {0x10314: 'u10314'}
cmap = table__c_m_a_p()
cmap.tables = [c4, c12]
self.assertEqual(cmap.buildReversed(), {'A':{0x0041, 0x0391}, 'u10314':{0x10314}})
if __name__ == "__main__":
unittest.main()