Merge pull request #349 from brawer/glyphnames-beyond-bmp

Support non-BMP characters for synthetic glyph names
This commit is contained in:
Sascha Brawer 2015-09-09 15:03:06 +02:00
commit 5f5b07b8d7
5 changed files with 75 additions and 44 deletions

View File

@ -511,47 +511,44 @@ class TTFont(object):
# Set the glyph order, so the cmap parser has something
# to work with (so we don't get called recursively).
self.glyphOrder = glyphOrder
# Get a (new) temporary cmap (based on the just invented names)
try:
tempcmap = self['cmap'].getcmap(3, 1)
except KeyError:
tempcmap = None
if tempcmap is not None:
# we have a unicode cmap
from fontTools import agl
cmap = tempcmap.cmap
# create a reverse cmap dict
reversecmap = {}
for unicode, name in list(cmap.items()):
reversecmap[name] = unicode
allNames = {}
for i in range(numGlyphs):
tempName = glyphOrder[i]
if tempName in reversecmap:
unicode = reversecmap[tempName]
if unicode in agl.UV2AGL:
# get name from the Adobe Glyph List
glyphName = agl.UV2AGL[unicode]
else:
# create uni<CODE> name
glyphName = "uni%04X" % unicode
tempName = glyphName
n = allNames.get(tempName, 0)
if n:
tempName = glyphName + "#" + str(n)
glyphOrder[i] = tempName
allNames[tempName] = n + 1
# Delete the temporary cmap table from the cache, so it can
# be parsed again with the right names.
del self.tables['cmap']
else:
pass # no unicode cmap available, stick with the invented names
# Make up glyph names based on the reversed cmap table. Because some
# glyphs (eg. ligatures or alternates) may not be reachable via cmap,
# this naming table will usually not cover all glyphs in the font.
# If the font has no Unicode cmap table, reversecmap will be empty.
reversecmap = self['cmap'].buildReversed()
useCount = {}
for i in range(numGlyphs):
tempName = glyphOrder[i]
if tempName in reversecmap:
# If a font maps both U+0041 LATIN CAPITAL LETTER A and
# U+0391 GREEK CAPITAL LETTER ALPHA to the same glyph,
# we prefer naming the glyph as "A".
glyphName = self._makeGlyphName(min(reversecmap[tempName]))
numUses = useCount[glyphName] = useCount.get(glyphName, 0) + 1
if numUses > 1:
glyphName = "%s.alt%d" % (glyphName, numUses - 1)
glyphOrder[i] = glyphName
# Delete the temporary cmap table from the cache, so it can
# be parsed again with the right names.
del self.tables['cmap']
self.glyphOrder = glyphOrder
if cmapLoading:
# restore partially loaded cmap, so it can continue loading
# using the proper names.
self.tables['cmap'] = cmapLoading
@staticmethod
def _makeGlyphName(codepoint):
from fontTools import agl # Adobe Glyph List
if codepoint in agl.UV2AGL:
return agl.UV2AGL[codepoint]
elif codepoint <= 0xFFFF:
return "uni%04X" % codepoint
else:
return "u%X" % codepoint
def getGlyphNames(self):
"""Get a list of glyph names, sorted alphabetically."""
glyphNames = sorted(self.getGlyphOrder()[:])

View File

@ -20,6 +20,21 @@ class table__c_m_a_p(DefaultTable.DefaultTable):
return subtable
return None # not found
def buildReversed(self):
"""Returns a reverse cmap such as {'one':{0x31}, 'A':{0x41,0x391}}.
The values are sets of Unicode codepoints because
some fonts map different codepoints to the same glyph.
For example, U+0041 LATIN CAPITAL LETTER A and U+0391
GREEK CAPITAL LETTER ALPHA are sometimes the same glyph.
"""
result = {}
for subtable in self.tables:
if subtable.isUnicode():
for codepoint, name in subtable.cmap.items():
result.setdefault(name, set()).add(codepoint)
return result
def decompile(self, data, ttFont):
tableVersion, numSubTables = struct.unpack(">HH", data[:4])
self.tableVersion = int(tableVersion)

View File

@ -2,37 +2,37 @@ from __future__ import print_function, division, absolute_import, unicode_litera
from fontTools.misc.py23 import *
from fontTools import ttLib
import unittest
from ._c_m_a_p import CmapSubtable
from ._c_m_a_p import CmapSubtable, table__c_m_a_p
class CmapSubtableTest(unittest.TestCase):
def makeSubtable(self, platformID, platEncID, langID):
subtable = CmapSubtable(None)
def makeSubtable(self, cmapFormat, platformID, platEncID, langID):
subtable = CmapSubtable.newSubtable(cmapFormat)
subtable.platformID, subtable.platEncID, subtable.language = (platformID, platEncID, langID)
return subtable
def test_toUnicode_utf16be(self):
subtable = self.makeSubtable(0, 2, 7)
subtable = self.makeSubtable(4, 0, 2, 7)
self.assertEqual("utf_16_be", subtable.getEncoding())
self.assertEqual(True, subtable.isUnicode())
def test_toUnicode_macroman(self):
subtable = self.makeSubtable(1, 0, 7) # MacRoman
subtable = self.makeSubtable(4, 1, 0, 7) # MacRoman
self.assertEqual("mac_roman", subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_toUnicode_macromanian(self):
subtable = self.makeSubtable(1, 0, 37) # Mac Romanian
subtable = self.makeSubtable(4, 1, 0, 37) # Mac Romanian
self.assertNotEqual(None, subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_extended_mac_encodings(self):
subtable = self.makeSubtable(1, 1, 0) # Mac Japanese
subtable = self.makeSubtable(4, 1, 1, 0) # Mac Japanese
self.assertNotEqual(None, subtable.getEncoding())
self.assertEqual(False, subtable.isUnicode())
def test_extended_unknown(self):
subtable = self.makeSubtable(10, 11, 12)
subtable = self.makeSubtable(4, 10, 11, 12)
self.assertEqual(subtable.getEncoding(), None)
self.assertEqual(subtable.getEncoding("ascii"), "ascii")
self.assertEqual(subtable.getEncoding(default="xyz"), "xyz")
@ -49,5 +49,15 @@ class CmapSubtableTest(unittest.TestCase):
font.setGlyphOrder([])
subtable.decompile(b'\0' * 7 + b'\x10' + b'\0' * 8, font)
def test_buildReversed(self):
c4 = self.makeSubtable(4, 3, 1, 0)
c4.cmap = {0x0041:'A', 0x0391:'A'}
c12 = self.makeSubtable(12, 3, 10, 0)
c12.cmap = {0x10314: 'u10314'}
cmap = table__c_m_a_p()
cmap.tables = [c4, c12]
self.assertEqual(cmap.buildReversed(), {'A':{0x0041, 0x0391}, 'u10314':{0x10314}})
if __name__ == "__main__":
unittest.main()

View File

@ -227,7 +227,14 @@ class WOFF2Writer(SFNTWriter):
"""
if self.sfntVersion == "OTTO":
return
for tag in ('maxp', 'head', 'loca', 'glyf'):
# make up glyph names required to decompile glyf table
self._decompileTable('maxp')
numGlyphs = self.ttFont['maxp'].numGlyphs
glyphOrder = ['.notdef'] + ["glyph%.5d" % i for i in range(1, numGlyphs)]
self.ttFont.setGlyphOrder(glyphOrder)
for tag in ('head', 'loca', 'glyf'):
self._decompileTable(tag)
self.ttFont['glyf'].padding = padding
for tag in ('glyf', 'loca'):

View File

@ -611,9 +611,11 @@ class WOFF2GlyfTableTest(unittest.TestCase):
reader = WOFF2Reader(infile)
cls.transformedGlyfData = reader.tables['glyf'].loadData(
reader.transformBuffer)
cls.glyphOrder = ['.notdef'] + ["glyph%.5d" % i for i in range(1, font['maxp'].numGlyphs)]
def setUp(self):
self.font = font = ttLib.TTFont(recalcBBoxes=False, recalcTimestamp=False)
font.setGlyphOrder(self.glyphOrder)
font['head'] = ttLib.getTableClass('head')()
font['maxp'] = ttLib.getTableClass('maxp')()
font['loca'] = WOFF2LocaTable()