2008-03-01 11:43:01 +00:00
import sys
2013-11-27 02:34:11 -05:00
from . import DefaultTable
1999-12-16 21:34:53 +00:00
import struct
import array
2006-10-21 13:54:30 +00:00
import operator
1999-12-16 21:34:53 +00:00
from fontTools import ttLib
from fontTools . misc . textTools import safeEval , readHex
2002-05-10 19:03:34 +00:00
from types import TupleType
1999-12-16 21:34:53 +00:00
class table__c_m_a_p ( DefaultTable . DefaultTable ) :
def getcmap ( self , platformID , platEncID ) :
for subtable in self . tables :
if ( subtable . platformID == platformID and
subtable . platEncID == platEncID ) :
return subtable
return None # not found
def decompile ( self , data , ttFont ) :
tableVersion , numSubTables = struct . unpack ( " >HH " , data [ : 4 ] )
self . tableVersion = int ( tableVersion )
self . tables = tables = [ ]
2006-10-21 13:54:30 +00:00
seenOffsets = { }
1999-12-16 21:34:53 +00:00
for i in range ( numSubTables ) :
platformID , platEncID , offset = struct . unpack (
" >HHl " , data [ 4 + i * 8 : 4 + ( i + 1 ) * 8 ] )
platformID , platEncID = int ( platformID ) , int ( platEncID )
format , length = struct . unpack ( " >HH " , data [ offset : offset + 4 ] )
2013-10-09 15:55:07 -07:00
if format in [ 8 , 10 , 12 , 13 ] :
2003-02-08 10:45:23 +00:00
format , reserved , length = struct . unpack ( " >HHL " , data [ offset : offset + 8 ] )
2008-05-16 15:07:09 +00:00
elif format in [ 14 ] :
format , length = struct . unpack ( " >HL " , data [ offset : offset + 6 ] )
2008-02-29 14:43:49 +00:00
if not length :
print " Error: cmap subtable is reported as having zero length: platformID %s , platEncID %s , format %s offset %s . Skipping table. " % ( platformID , platEncID , format , offset )
continue
2013-11-27 02:33:03 -05:00
if format not in cmap_classes :
1999-12-16 21:34:53 +00:00
table = cmap_format_unknown ( format )
else :
table = cmap_classes [ format ] ( format )
table . platformID = platformID
table . platEncID = platEncID
2006-10-21 13:54:30 +00:00
# Note that by default we decompile only the subtable header info;
# any other data gets decompiled only when an attribute of the
# subtable is referenced.
table . decompileHeader ( data [ offset : offset + int ( length ) ] , ttFont )
2013-11-27 02:33:03 -05:00
if offset in seenOffsets :
2006-10-21 13:54:30 +00:00
table . cmap = tables [ seenOffsets [ offset ] ] . cmap
else :
seenOffsets [ offset ] = i
1999-12-16 21:34:53 +00:00
tables . append ( table )
def compile ( self , ttFont ) :
self . tables . sort ( ) # sort according to the spec; see CmapSubtable.__cmp__()
numSubTables = len ( self . tables )
totalOffset = 4 + 8 * numSubTables
data = struct . pack ( " >HH " , self . tableVersion , numSubTables )
tableData = " "
2006-10-21 13:54:30 +00:00
seen = { } # Some tables are the same object reference. Don't compile them twice.
done = { } # Some tables are different objects, but compile to the same data chunk
1999-12-16 21:34:53 +00:00
for table in self . tables :
2006-10-21 13:54:30 +00:00
try :
offset = seen [ id ( table . cmap ) ]
except KeyError :
chunk = table . compile ( ttFont )
2013-11-27 02:33:03 -05:00
if chunk in done :
2006-10-21 13:54:30 +00:00
offset = done [ chunk ]
else :
offset = seen [ id ( table . cmap ) ] = done [ chunk ] = totalOffset + len ( tableData )
tableData = tableData + chunk
1999-12-16 21:34:53 +00:00
data = data + struct . pack ( " >HHl " , table . platformID , table . platEncID , offset )
return data + tableData
def toXML ( self , writer , ttFont ) :
writer . simpletag ( " tableVersion " , version = self . tableVersion )
writer . newline ( )
for table in self . tables :
table . toXML ( writer , ttFont )
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
if name == " tableVersion " :
self . tableVersion = safeEval ( attrs [ " version " ] )
return
if name [ : 12 ] < > " cmap_format_ " :
return
if not hasattr ( self , " tables " ) :
self . tables = [ ]
2004-09-25 07:30:47 +00:00
format = safeEval ( name [ 12 : ] )
2013-11-27 02:33:03 -05:00
if format not in cmap_classes :
1999-12-16 21:34:53 +00:00
table = cmap_format_unknown ( format )
else :
table = cmap_classes [ format ] ( format )
table . platformID = safeEval ( attrs [ " platformID " ] )
table . platEncID = safeEval ( attrs [ " platEncID " ] )
table . fromXML ( ( name , attrs , content ) , ttFont )
self . tables . append ( table )
class CmapSubtable :
def __init__ ( self , format ) :
self . format = format
2006-10-21 13:54:30 +00:00
self . data = None
self . ttFont = None
def __getattr__ ( self , attr ) :
# allow lazy decompilation of subtables.
if attr [ : 2 ] == ' __ ' : # don't handle requests for member functions like '__lt__'
raise AttributeError , attr
if self . data == None :
raise AttributeError , attr
self . decompile ( None , None ) # use saved data.
self . data = None # Once this table has been decompiled, make sure we don't
# just return the original data. Also avoids recursion when
# called with an attribute that the cmap subtable doesn't have.
return getattr ( self , attr )
1999-12-16 21:34:53 +00:00
2006-10-21 13:54:30 +00:00
def decompileHeader ( self , data , ttFont ) :
format , length , language = struct . unpack ( " >HHH " , data [ : 6 ] )
assert len ( data ) == length , " corrupt cmap table format %d (data length: %d , header length: %d ) " % ( format , len ( data ) , length )
self . format = int ( format )
self . length = int ( length )
self . language = int ( language )
self . data = data [ 6 : ]
self . ttFont = ttFont
1999-12-16 21:34:53 +00:00
def toXML ( self , writer , ttFont ) :
writer . begintag ( self . __class__ . __name__ , [
( " platformID " , self . platformID ) ,
( " platEncID " , self . platEncID ) ,
2004-09-25 09:06:58 +00:00
( " language " , self . language ) ,
1999-12-16 21:34:53 +00:00
] )
writer . newline ( )
2004-09-25 09:06:58 +00:00
codes = self . cmap . items ( )
codes . sort ( )
self . _writeCodes ( codes , writer )
1999-12-16 21:34:53 +00:00
writer . endtag ( self . __class__ . __name__ )
writer . newline ( )
2004-09-25 09:06:58 +00:00
def _writeCodes ( self , codes , writer ) :
2006-10-21 13:54:30 +00:00
if ( self . platformID , self . platEncID ) == ( 3 , 1 ) or ( self . platformID , self . platEncID ) == ( 3 , 10 ) or self . platformID == 0 :
2004-09-25 09:06:58 +00:00
from fontTools . unicode import Unicode
isUnicode = 1
else :
isUnicode = 0
for code , name in codes :
writer . simpletag ( " map " , code = hex ( code ) , name = name )
if isUnicode :
writer . comment ( Unicode [ code ] )
writer . newline ( )
1999-12-16 21:34:53 +00:00
def __cmp__ ( self , other ) :
2013-10-28 12:07:15 +01:00
if type ( self ) != type ( other ) : return cmp ( type ( self ) , type ( other ) )
2013-08-17 11:11:22 -04:00
1999-12-16 21:34:53 +00:00
# implemented so that list.sort() sorts according to the cmap spec.
selfTuple = (
2013-10-28 12:16:41 +01:00
getattr ( self , " platformID " , None ) ,
getattr ( self , " platEncID " , None ) ,
getattr ( self , " language " , None ) ,
self . __dict__ )
1999-12-16 21:34:53 +00:00
otherTuple = (
2013-10-28 12:16:41 +01:00
getattr ( other , " platformID " , None ) ,
getattr ( other , " platEncID " , None ) ,
getattr ( other , " language " , None ) ,
other . __dict__ )
1999-12-16 21:34:53 +00:00
return cmp ( selfTuple , otherTuple )
class cmap_format_0 ( CmapSubtable ) :
def decompile ( self , data , ttFont ) :
2006-10-21 13:54:30 +00:00
# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
# If not, someone is calling the subtable decompile() directly, and must provide both args.
if data != None and ttFont != None :
self . decompileHeader ( data [ offset : offset + int ( length ) ] , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
2006-10-21 13:54:30 +00:00
data = self . data # decompileHeader assigns the data after the header to self.data
assert 262 == self . length , " Format 0 cmap subtable not 262 bytes "
1999-12-16 21:34:53 +00:00
glyphIdArray = array . array ( " B " )
2006-10-21 13:54:30 +00:00
glyphIdArray . fromstring ( self . data )
1999-12-16 21:34:53 +00:00
self . cmap = cmap = { }
2006-10-21 13:54:30 +00:00
lenArray = len ( glyphIdArray )
charCodes = range ( lenArray )
names = map ( self . ttFont . getGlyphName , glyphIdArray )
map ( operator . setitem , [ cmap ] * lenArray , charCodes , names )
1999-12-16 21:34:53 +00:00
def compile ( self , ttFont ) :
2006-10-21 13:54:30 +00:00
if self . data :
return struct . pack ( " >HHH " , 0 , 262 , self . language ) + self . data
charCodeList = self . cmap . items ( )
charCodeList . sort ( )
charCodes = [ entry [ 0 ] for entry in charCodeList ]
valueList = [ entry [ 1 ] for entry in charCodeList ]
assert charCodes == range ( 256 )
valueList = map ( ttFont . getGlyphID , valueList )
2013-08-16 12:56:08 -04:00
glyphIdArray = array . array ( " B " , valueList )
2004-09-25 07:30:47 +00:00
data = struct . pack ( " >HHH " , 0 , 262 , self . language ) + glyphIdArray . tostring ( )
1999-12-16 21:34:53 +00:00
assert len ( data ) == 262
return data
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
2004-09-25 07:30:47 +00:00
self . language = safeEval ( attrs [ " language " ] )
2006-10-21 13:54:30 +00:00
if not hasattr ( self , " cmap " ) :
self . cmap = { }
cmap = self . cmap
1999-12-16 21:34:53 +00:00
for element in content :
2002-05-10 19:03:34 +00:00
if type ( element ) < > TupleType :
1999-12-16 21:34:53 +00:00
continue
name , attrs , content = element
if name < > " map " :
continue
2006-10-21 13:54:30 +00:00
cmap [ safeEval ( attrs [ " code " ] ) ] = attrs [ " name " ]
1999-12-16 21:34:53 +00:00
2003-08-28 18:04:23 +00:00
subHeaderFormat = " >HHhH "
class SubHeader :
def __init__ ( self ) :
self . firstCode = None
self . entryCount = None
self . idDelta = None
self . idRangeOffset = None
self . glyphIndexArray = [ ]
1999-12-16 21:34:53 +00:00
class cmap_format_2 ( CmapSubtable ) :
2006-10-21 13:54:30 +00:00
def setIDDelta ( self , subHeader ) :
subHeader . idDelta = 0
# find the minGI which is not zero.
minGI = subHeader . glyphIndexArray [ 0 ]
for gid in subHeader . glyphIndexArray :
if ( gid != 0 ) and ( gid < minGI ) :
minGI = gid
# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
# We would like to pick an idDelta such that the first glyphArray GID is 1,
# so that we are more likely to be able to combine glypharray GID subranges.
# This means that we have a problem when minGI is > 32K
# Since the final gi is reconstructed from the glyphArray GID by:
# (short)finalGID = (gid + idDelta) % 0x10000),
# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
# negative number to an unsigned short.
if ( minGI > 1 ) :
if minGI > 0x7FFF :
subHeader . idDelta = - ( 0x10000 - minGI ) - 1
else :
subHeader . idDelta = minGI - 1
idDelta = subHeader . idDelta
for i in range ( subHeader . entryCount ) :
gid = subHeader . glyphIndexArray [ i ]
if gid > 0 :
subHeader . glyphIndexArray [ i ] = gid - idDelta
1999-12-16 21:34:53 +00:00
def decompile ( self , data , ttFont ) :
2006-10-21 13:54:30 +00:00
# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
# If not, someone is calling the subtable decompile() directly, and must provide both args.
if data != None and ttFont != None :
self . decompileHeader ( data [ offset : offset + int ( length ) ] , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
2006-10-21 13:54:30 +00:00
data = self . data # decompileHeader assigns the data after the header to self.data
2003-08-28 18:04:23 +00:00
subHeaderKeys = [ ]
maxSubHeaderindex = 0
# get the key array, and determine the number of subHeaders.
2006-10-21 13:54:30 +00:00
allKeys = array . array ( " H " )
allKeys . fromstring ( data [ : 512 ] )
data = data [ 512 : ]
2008-03-01 11:43:01 +00:00
if sys . byteorder < > " big " :
2006-10-21 13:54:30 +00:00
allKeys . byteswap ( )
subHeaderKeys = [ key / 8 for key in allKeys ]
maxSubHeaderindex = max ( subHeaderKeys )
1999-12-16 21:34:53 +00:00
2003-08-28 18:04:23 +00:00
#Load subHeaders
subHeaderList = [ ]
2006-10-21 13:54:30 +00:00
pos = 0
2003-08-28 18:04:23 +00:00
for i in range ( maxSubHeaderindex + 1 ) :
subHeader = SubHeader ( )
( subHeader . firstCode , subHeader . entryCount , subHeader . idDelta , \
2006-10-21 13:54:30 +00:00
subHeader . idRangeOffset ) = struct . unpack ( subHeaderFormat , data [ pos : pos + 8 ] )
pos + = 8
giDataPos = pos + subHeader . idRangeOffset - 2
giList = array . array ( " H " )
giList . fromstring ( data [ giDataPos : giDataPos + subHeader . entryCount * 2 ] )
2008-03-01 11:43:01 +00:00
if sys . byteorder < > " big " :
2006-10-21 13:54:30 +00:00
giList . byteswap ( )
subHeader . glyphIndexArray = giList
2003-08-28 18:04:23 +00:00
subHeaderList . append ( subHeader )
# How this gets processed.
# Charcodes may be one or two bytes.
# The first byte of a charcode is mapped through the subHeaderKeys, to select
# a subHeader. For any subheader but 0, the next byte is then mapped through the
# selected subheader. If subheader Index 0 is selected, then the byte itself is
# mapped through the subheader, and there is no second byte.
# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
#
# Each subheader references a range in the glyphIndexArray whose length is entryCount.
# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
# referenced by another subheader.
# The only subheader that will be referenced by more than one first-byte value is the subheader
# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
# {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
# A subheader specifies a subrange within (0...256) by the
# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
# (e.g. glyph not in font).
# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
# Example for Logocut-Medium
# first byte of charcode = 129; selects subheader 1.
# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
# second byte of charCode = 66
# the index offset = 66-64 = 2.
# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
# [glyphIndexArray index], [subrange array index] = glyphIndex
# [256], [0]=1 from charcode [129, 64]
# [257], [1]=2 from charcode [129, 65]
# [258], [2]=3 from charcode [129, 66]
# [259], [3]=4 from charcode [129, 67]
2006-10-21 13:54:30 +00:00
# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero,
# add it to the glyphID to get the final glyphIndex
2003-08-28 18:04:23 +00:00
# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
self . data = " "
2006-10-21 13:54:30 +00:00
self . cmap = cmap = { }
notdefGI = 0
2003-08-28 18:04:23 +00:00
for firstByte in range ( 256 ) :
subHeadindex = subHeaderKeys [ firstByte ]
subHeader = subHeaderList [ subHeadindex ]
if subHeadindex == 0 :
if ( firstByte < subHeader . firstCode ) or ( firstByte > = subHeader . firstCode + subHeader . entryCount ) :
2006-10-21 13:54:30 +00:00
continue # gi is notdef.
2003-08-28 18:04:23 +00:00
else :
charCode = firstByte
offsetIndex = firstByte - subHeader . firstCode
gi = subHeader . glyphIndexArray [ offsetIndex ]
if gi != 0 :
2006-10-21 13:54:30 +00:00
gi = ( gi + subHeader . idDelta ) % 0x10000
else :
continue # gi is notdef.
cmap [ charCode ] = gi
2003-08-28 18:04:23 +00:00
else :
if subHeader . entryCount :
2006-10-21 13:54:30 +00:00
charCodeOffset = firstByte * 256 + subHeader . firstCode
2003-08-28 18:04:23 +00:00
for offsetIndex in range ( subHeader . entryCount ) :
2006-10-21 13:54:30 +00:00
charCode = charCodeOffset + offsetIndex
2003-08-28 18:04:23 +00:00
gi = subHeader . glyphIndexArray [ offsetIndex ]
if gi != 0 :
2006-10-21 13:54:30 +00:00
gi = ( gi + subHeader . idDelta ) % 0x10000
else :
continue
cmap [ charCode ] = gi
# If not subHeader.entryCount, then all char codes with this first byte are
# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the
# same as mapping it to .notdef.
# cmap values are GID's.
glyphOrder = self . ttFont . getGlyphOrder ( )
gids = cmap . values ( )
charCodes = cmap . keys ( )
lenCmap = len ( gids )
try :
names = map ( operator . getitem , [ glyphOrder ] * lenCmap , gids )
except IndexError :
getGlyphName = self . ttFont . getGlyphName
names = map ( getGlyphName , gids )
map ( operator . setitem , [ cmap ] * lenCmap , charCodes , names )
2003-08-28 18:04:23 +00:00
1999-12-16 21:34:53 +00:00
def compile ( self , ttFont ) :
2006-10-21 13:54:30 +00:00
if self . data :
return struct . pack ( " >HHH " , self . format , self . length , self . language ) + self . data
2003-08-28 18:04:23 +00:00
kEmptyTwoCharCodeRange = - 1
2006-10-21 13:54:30 +00:00
notdefGI = 0
2003-08-28 18:04:23 +00:00
items = self . cmap . items ( )
items . sort ( )
2006-10-21 13:54:30 +00:00
charCodes = [ item [ 0 ] for item in items ]
names = [ item [ 1 ] for item in items ]
nameMap = ttFont . getReverseGlyphMap ( )
lenCharCodes = len ( charCodes )
try :
gids = map ( operator . getitem , [ nameMap ] * lenCharCodes , names )
except KeyError :
nameMap = ttFont . getReverseGlyphMap ( rebuild = 1 )
try :
gids = map ( operator . getitem , [ nameMap ] * lenCharCodes , names )
except KeyError :
# allow virtual GIDs in format 2 tables
gids = [ ]
for name in names :
try :
gid = nameMap [ name ]
except KeyError :
try :
if ( name [ : 3 ] == ' gid ' ) :
gid = eval ( name [ 3 : ] )
else :
gid = ttFont . getGlyphID ( name )
except :
raise KeyError ( name )
gids . append ( gid )
# Process the (char code to gid) item list in char code order.
# By definition, all one byte char codes map to subheader 0.
# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0,
# which defines all char codes in its range to map to notdef) unless proven otherwise.
# Note that since the char code items are processed in char code order, all the char codes with the
# same first byte are in sequential order.
2003-08-28 18:04:23 +00:00
2006-10-21 13:54:30 +00:00
subHeaderKeys = [ kEmptyTwoCharCodeRange for x in range ( 256 ) ] # list of indices into subHeaderList.
2003-08-28 18:04:23 +00:00
subHeaderList = [ ]
2006-10-21 13:54:30 +00:00
# We force this subheader entry 0 to exist in the subHeaderList in the case where some one comes up
# with a cmap where all the one byte char codes map to notdef,
# with the result that the subhead 0 would not get created just by processing the item list.
charCode = charCodes [ 0 ]
if charCode > 255 :
subHeader = SubHeader ( )
subHeader . firstCode = 0
subHeader . entryCount = 0
subHeader . idDelta = 0
subHeader . idRangeOffset = 0
subHeaderList . append ( subHeader )
2003-08-28 18:04:23 +00:00
lastFirstByte = - 1
2006-10-21 13:54:30 +00:00
items = zip ( charCodes , gids )
for charCode , gid in items :
if gid == 0 :
continue
2003-08-28 18:04:23 +00:00
firstbyte = charCode >> 8
secondByte = charCode & 0x00FF
2006-10-21 13:54:30 +00:00
if firstbyte != lastFirstByte : # Need to update the current subhead, and start a new one.
2003-08-28 18:04:23 +00:00
if lastFirstByte > - 1 :
2006-10-21 13:54:30 +00:00
# fix GI's and iDelta of current subheader.
self . setIDDelta ( subHeader )
# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
# for the indices matching the char codes.
if lastFirstByte == 0 :
for index in range ( subHeader . entryCount ) :
charCode = subHeader . firstCode + index
subHeaderKeys [ charCode ] = 0
2003-08-28 18:04:23 +00:00
assert ( subHeader . entryCount == len ( subHeader . glyphIndexArray ) ) , " Error - subhead entry count does not match len of glyphID subrange. "
# init new subheader
subHeader = SubHeader ( )
subHeader . firstCode = secondByte
2006-10-21 13:54:30 +00:00
subHeader . entryCount = 1
subHeader . glyphIndexArray . append ( gid )
subHeaderList . append ( subHeader )
subHeaderKeys [ firstbyte ] = len ( subHeaderList ) - 1
2003-08-28 18:04:23 +00:00
lastFirstByte = firstbyte
else :
2006-10-21 13:54:30 +00:00
# need to fill in with notdefs all the code points between the last charCode and the current charCode.
2003-08-28 18:04:23 +00:00
codeDiff = secondByte - ( subHeader . firstCode + subHeader . entryCount )
for i in range ( codeDiff ) :
2006-10-21 13:54:30 +00:00
subHeader . glyphIndexArray . append ( notdefGI )
subHeader . glyphIndexArray . append ( gid )
2003-08-28 18:04:23 +00:00
subHeader . entryCount = subHeader . entryCount + codeDiff + 1
2006-10-21 13:54:30 +00:00
# fix GI's and iDelta of last subheader that we we added to the subheader array.
self . setIDDelta ( subHeader )
# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
2003-08-28 18:04:23 +00:00
subHeader = SubHeader ( )
subHeader . firstCode = 0
subHeader . entryCount = 0
subHeader . idDelta = 0
subHeader . idRangeOffset = 2
subHeaderList . append ( subHeader )
emptySubheadIndex = len ( subHeaderList ) - 1
for index in range ( 256 ) :
2006-10-21 13:54:30 +00:00
if subHeaderKeys [ index ] == kEmptyTwoCharCodeRange :
2003-08-28 18:04:23 +00:00
subHeaderKeys [ index ] = emptySubheadIndex
# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
2006-10-21 13:54:30 +00:00
# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
2003-08-28 18:04:23 +00:00
# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
# charcode 0 and GID 0.
idRangeOffset = ( len ( subHeaderList ) - 1 ) * 8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
2006-10-21 13:54:30 +00:00
subheadRangeLen = len ( subHeaderList ) - 1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
for index in range ( subheadRangeLen ) :
subHeader = subHeaderList [ index ]
subHeader . idRangeOffset = 0
for j in range ( index ) :
prevSubhead = subHeaderList [ j ]
if prevSubhead . glyphIndexArray == subHeader . glyphIndexArray : # use the glyphIndexArray subarray
subHeader . idRangeOffset = prevSubhead . idRangeOffset - ( index - j ) * 8
subHeader . glyphIndexArray = [ ]
break
if subHeader . idRangeOffset == 0 : # didn't find one.
subHeader . idRangeOffset = idRangeOffset
idRangeOffset = ( idRangeOffset - 8 ) + subHeader . entryCount * 2 # one less subheader, one more subArray.
else :
idRangeOffset = idRangeOffset - 8 # one less subheader
2003-08-28 18:04:23 +00:00
# Now we can write out the data!
length = 6 + 512 + 8 * len ( subHeaderList ) # header, 256 subHeaderKeys, and subheader array.
for subhead in subHeaderList [ : - 1 ] :
2006-10-21 13:54:30 +00:00
length = length + len ( subhead . glyphIndexArray ) * 2 # We can't use subhead.entryCount, as some of the subhead may share subArrays.
dataList = [ struct . pack ( " >HHH " , 2 , length , self . language ) ]
2003-08-28 18:04:23 +00:00
for index in subHeaderKeys :
2006-10-21 13:54:30 +00:00
dataList . append ( struct . pack ( " >H " , index * 8 ) )
2003-08-28 18:04:23 +00:00
for subhead in subHeaderList :
2006-10-21 13:54:30 +00:00
dataList . append ( struct . pack ( subHeaderFormat , subhead . firstCode , subhead . entryCount , subhead . idDelta , subhead . idRangeOffset ) )
2003-08-28 18:04:23 +00:00
for subhead in subHeaderList [ : - 1 ] :
for gi in subhead . glyphIndexArray :
2006-10-21 13:54:30 +00:00
dataList . append ( struct . pack ( " >H " , gi ) )
data = " " . join ( dataList )
2003-08-28 18:04:23 +00:00
assert ( len ( data ) == length ) , " Error: cmap format 2 is not same length as calculated! actual: " + str ( len ( data ) ) + " calc : " + str ( length )
return data
2006-10-21 13:54:30 +00:00
2003-08-28 18:04:23 +00:00
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
2004-09-25 07:30:47 +00:00
self . language = safeEval ( attrs [ " language " ] )
2006-10-21 13:54:30 +00:00
if not hasattr ( self , " cmap " ) :
self . cmap = { }
cmap = self . cmap
2003-08-28 18:04:23 +00:00
for element in content :
if type ( element ) < > TupleType :
continue
name , attrs , content = element
if name < > " map " :
continue
2006-10-21 13:54:30 +00:00
cmap [ safeEval ( attrs [ " code " ] ) ] = attrs [ " name " ]
1999-12-16 21:34:53 +00:00
cmap_format_4_format = " >7H "
2002-07-23 07:51:23 +00:00
#uint16 endCode[segCount] # Ending character code for each segment, last = 0xFFFF.
#uint16 reservedPad # This value should be zero
#uint16 startCode[segCount] # Starting character code for each segment
#uint16 idDelta[segCount] # Delta for all character codes in segment
#uint16 idRangeOffset[segCount] # Offset in bytes to glyph indexArray, or 0
#uint16 glyphIndexArray[variable] # Glyph index array
1999-12-16 21:34:53 +00:00
2002-07-20 21:57:26 +00:00
def splitRange ( startCode , endCode , cmap ) :
2002-07-23 07:51:23 +00:00
# Try to split a range of character codes into subranges with consecutive
# glyph IDs in such a way that the cmap4 subtable can be stored "most"
# efficiently. I can't prove I've got the optimal solution, but it seems
# to do well with the fonts I tested: none became bigger, many became smaller.
2002-07-20 21:57:26 +00:00
if startCode == endCode :
return [ ] , [ endCode ]
lastID = cmap [ startCode ]
lastCode = startCode
inOrder = None
orderedBegin = None
2002-07-23 07:51:23 +00:00
subRanges = [ ]
2002-07-20 21:57:26 +00:00
2002-07-23 07:51:23 +00:00
# Gather subranges in which the glyph IDs are consecutive.
2002-07-20 21:57:26 +00:00
for code in range ( startCode + 1 , endCode + 1 ) :
glyphID = cmap [ code ]
if glyphID - 1 == lastID :
if inOrder is None or not inOrder :
inOrder = 1
orderedBegin = lastCode
else :
if inOrder :
inOrder = 0
2002-07-23 07:51:23 +00:00
subRanges . append ( ( orderedBegin , lastCode ) )
2002-07-20 21:57:26 +00:00
orderedBegin = None
lastID = glyphID
lastCode = code
if inOrder :
2002-07-23 07:51:23 +00:00
subRanges . append ( ( orderedBegin , lastCode ) )
2002-07-20 21:57:26 +00:00
assert lastCode == endCode
2002-07-23 07:51:23 +00:00
# Now filter out those new subranges that would only make the data bigger.
# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
# character.
newRanges = [ ]
for b , e in subRanges :
2002-07-20 21:57:26 +00:00
if b == startCode and e == endCode :
break # the whole range, we're fine
if b == startCode or e == endCode :
threshold = 4 # split costs one more segment
else :
threshold = 8 # split costs two more segments
if ( e - b + 1 ) > threshold :
2002-07-23 07:51:23 +00:00
newRanges . append ( ( b , e ) )
subRanges = newRanges
2002-07-20 21:57:26 +00:00
2002-07-23 07:51:23 +00:00
if not subRanges :
2002-07-20 21:57:26 +00:00
return [ ] , [ endCode ]
2002-07-23 07:51:23 +00:00
if subRanges [ 0 ] [ 0 ] != startCode :
subRanges . insert ( 0 , ( startCode , subRanges [ 0 ] [ 0 ] - 1 ) )
if subRanges [ - 1 ] [ 1 ] != endCode :
subRanges . append ( ( subRanges [ - 1 ] [ 1 ] + 1 , endCode ) )
# Fill the "holes" in the segments list -- those are the segments in which
# the glyph IDs are _not_ consecutive.
2002-07-20 21:57:26 +00:00
i = 1
2002-07-23 07:51:23 +00:00
while i < len ( subRanges ) :
if subRanges [ i - 1 ] [ 1 ] + 1 != subRanges [ i ] [ 0 ] :
subRanges . insert ( i , ( subRanges [ i - 1 ] [ 1 ] + 1 , subRanges [ i ] [ 0 ] - 1 ) )
2002-07-20 21:57:26 +00:00
i = i + 1
i = i + 1
2002-07-23 07:51:23 +00:00
# Transform the ranges into startCode/endCode lists.
2002-07-20 21:57:26 +00:00
start = [ ]
end = [ ]
2002-07-23 07:51:23 +00:00
for b , e in subRanges :
2002-07-20 21:57:26 +00:00
start . append ( b )
end . append ( e )
start . pop ( 0 )
assert len ( start ) + 1 == len ( end )
return start , end
1999-12-16 21:34:53 +00:00
class cmap_format_4 ( CmapSubtable ) :
def decompile ( self , data , ttFont ) :
2006-10-21 13:54:30 +00:00
# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
# If not, someone is calling the subtable decompile() directly, and must provide both args.
if data != None and ttFont != None :
self . decompileHeader ( self . data [ offset : offset + int ( length ) ] , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
2006-10-21 13:54:30 +00:00
data = self . data # decompileHeader assigns the data after the header to self.data
( segCountX2 , searchRange , entrySelector , rangeShift ) = \
struct . unpack ( " >4H " , data [ : 8 ] )
data = data [ 8 : ]
1999-12-16 21:34:53 +00:00
segCount = segCountX2 / 2
2002-07-20 21:57:26 +00:00
allCodes = array . array ( " H " )
2006-10-21 13:54:30 +00:00
allCodes . fromstring ( data )
self . data = data = None
2008-03-01 11:43:01 +00:00
if sys . byteorder < > " big " :
2002-07-20 21:57:26 +00:00
allCodes . byteswap ( )
1999-12-16 21:34:53 +00:00
# divide the data
2002-07-20 21:57:26 +00:00
endCode = allCodes [ : segCount ]
allCodes = allCodes [ segCount + 1 : ] # the +1 is skipping the reservedPad field
startCode = allCodes [ : segCount ]
allCodes = allCodes [ segCount : ]
idDelta = allCodes [ : segCount ]
allCodes = allCodes [ segCount : ]
idRangeOffset = allCodes [ : segCount ]
glyphIndexArray = allCodes [ segCount : ]
2006-10-21 13:54:30 +00:00
lenGIArray = len ( glyphIndexArray )
1999-12-16 21:34:53 +00:00
# build 2-byte character mapping
2006-10-21 13:54:30 +00:00
charCodes = [ ]
gids = [ ]
1999-12-16 21:34:53 +00:00
for i in range ( len ( startCode ) - 1 ) : # don't do 0xffff!
2006-10-21 13:54:30 +00:00
rangeCharCodes = range ( startCode [ i ] , endCode [ i ] + 1 )
charCodes = charCodes + rangeCharCodes
for charCode in rangeCharCodes :
1999-12-16 21:34:53 +00:00
rangeOffset = idRangeOffset [ i ]
if rangeOffset == 0 :
glyphID = charCode + idDelta [ i ]
else :
# *someone* needs to get killed.
index = idRangeOffset [ i ] / 2 + ( charCode - startCode [ i ] ) + i - len ( idRangeOffset )
2006-10-21 13:54:30 +00:00
assert ( index < lenGIArray ) , " In format 4 cmap, range ( %d ), the calculated index ( %d ) into the glyph index array is not less than the length of the array ( %d ) ! " % ( i , index , lenGIArray )
1999-12-16 21:34:53 +00:00
if glyphIndexArray [ index ] < > 0 : # if not missing glyph
glyphID = glyphIndexArray [ index ] + idDelta [ i ]
else :
glyphID = 0 # missing glyph
2006-10-21 13:54:30 +00:00
gids . append ( glyphID % 0x10000 )
self . cmap = cmap = { }
lenCmap = len ( gids )
glyphOrder = self . ttFont . getGlyphOrder ( )
try :
names = map ( operator . getitem , [ glyphOrder ] * lenCmap , gids )
except IndexError :
getGlyphName = self . ttFont . getGlyphName
names = map ( getGlyphName , gids )
map ( operator . setitem , [ cmap ] * lenCmap , charCodes , names )
def setIDDelta ( self , idDelta ) :
# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
# idDelta is a short, and must be between -32K and 32K
# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
# This means that we have a problem because we can need to assign to idDelta values
# between -(64K-2) and 64K -1.
# Since the final gi is reconstructed from the glyphArray GID by:
# (short)finalGID = (gid + idDelta) % 0x10000),
# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
# negative number to an unsigned short.
# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
# the modulo arithmetic.
if idDelta > 0x7FFF :
idDelta = idDelta - 0x10000
elif idDelta < - 0x7FFF :
idDelta = idDelta + 0x10000
return idDelta
1999-12-16 21:34:53 +00:00
def compile ( self , ttFont ) :
2006-10-21 13:54:30 +00:00
if self . data :
return struct . pack ( " >HHH " , self . format , self . length , self . language ) + self . data
2002-05-12 17:14:50 +00:00
from fontTools . ttLib . sfnt import maxPowerOfTwo
1999-12-16 21:34:53 +00:00
2006-10-21 13:54:30 +00:00
charCodes = self . cmap . keys ( )
lenCharCodes = len ( charCodes )
if lenCharCodes == 0 :
startCode = [ 0xffff ]
endCode = [ 0xffff ]
else :
2008-02-29 14:43:49 +00:00
charCodes . sort ( )
names = map ( operator . getitem , [ self . cmap ] * lenCharCodes , charCodes )
2006-10-21 13:54:30 +00:00
nameMap = ttFont . getReverseGlyphMap ( )
try :
gids = map ( operator . getitem , [ nameMap ] * lenCharCodes , names )
except KeyError :
nameMap = ttFont . getReverseGlyphMap ( rebuild = 1 )
try :
gids = map ( operator . getitem , [ nameMap ] * lenCharCodes , names )
except KeyError :
# allow virtual GIDs in format 4 tables
gids = [ ]
for name in names :
try :
gid = nameMap [ name ]
except KeyError :
try :
if ( name [ : 3 ] == ' gid ' ) :
gid = eval ( name [ 3 : ] )
else :
gid = ttFont . getGlyphID ( name )
except :
raise KeyError ( name )
gids . append ( gid )
cmap = { } # code:glyphID mapping
map ( operator . setitem , [ cmap ] * len ( charCodes ) , charCodes , gids )
1999-12-16 21:34:53 +00:00
2006-10-21 13:54:30 +00:00
# Build startCode and endCode lists.
# Split the char codes in ranges of consecutive char codes, then split
# each range in more ranges of consecutive/not consecutive glyph IDs.
# See splitRange().
lastCode = charCodes [ 0 ]
endCode = [ ]
startCode = [ lastCode ]
for charCode in charCodes [ 1 : ] : # skip the first code, it's the first start code
if charCode == lastCode + 1 :
lastCode = charCode
continue
start , end = splitRange ( startCode [ - 1 ] , lastCode , cmap )
startCode . extend ( start )
endCode . extend ( end )
startCode . append ( charCode )
2002-07-20 21:57:26 +00:00
lastCode = charCode
2006-10-21 13:54:30 +00:00
endCode . append ( lastCode )
startCode . append ( 0xffff )
endCode . append ( 0xffff )
1999-12-16 21:34:53 +00:00
2002-07-20 21:57:26 +00:00
# build up rest of cruft
1999-12-16 21:34:53 +00:00
idDelta = [ ]
idRangeOffset = [ ]
glyphIndexArray = [ ]
for i in range ( len ( endCode ) - 1 ) : # skip the closing codes (0xffff)
indices = [ ]
2002-07-20 21:57:26 +00:00
for charCode in range ( startCode [ i ] , endCode [ i ] + 1 ) :
indices . append ( cmap [ charCode ] )
2006-10-21 13:54:30 +00:00
if ( indices == range ( indices [ 0 ] , indices [ 0 ] + len ( indices ) ) ) :
idDeltaTemp = self . setIDDelta ( indices [ 0 ] - startCode [ i ] )
idDelta . append ( idDeltaTemp )
1999-12-16 21:34:53 +00:00
idRangeOffset . append ( 0 )
else :
# someone *definitely* needs to get killed.
idDelta . append ( 0 )
idRangeOffset . append ( 2 * ( len ( endCode ) + len ( glyphIndexArray ) - i ) )
2002-07-20 21:57:26 +00:00
glyphIndexArray . extend ( indices )
1999-12-16 21:34:53 +00:00
idDelta . append ( 1 ) # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
idRangeOffset . append ( 0 )
# Insane.
segCount = len ( endCode )
segCountX2 = segCount * 2
2002-07-20 21:57:26 +00:00
maxExponent = maxPowerOfTwo ( segCount )
searchRange = 2 * ( 2 * * maxExponent )
entrySelector = maxExponent
1999-12-16 21:34:53 +00:00
rangeShift = 2 * segCount - searchRange
2013-08-16 12:56:08 -04:00
charCodeArray = array . array ( " H " , endCode + [ 0 ] + startCode )
idDeltaeArray = array . array ( " h " , idDelta )
restArray = array . array ( " H " , idRangeOffset + glyphIndexArray )
2008-03-01 11:43:01 +00:00
if sys . byteorder < > " big " :
2013-08-16 12:56:08 -04:00
charCodeArray . byteswap ( )
idDeltaeArray . byteswap ( )
restArray . byteswap ( )
2006-10-21 13:54:30 +00:00
data = charCodeArray . tostring ( ) + idDeltaeArray . tostring ( ) + restArray . tostring ( )
1999-12-16 21:34:53 +00:00
length = struct . calcsize ( cmap_format_4_format ) + len ( data )
2004-09-25 07:30:47 +00:00
header = struct . pack ( cmap_format_4_format , self . format , length , self . language ,
1999-12-16 21:34:53 +00:00
segCountX2 , searchRange , entrySelector , rangeShift )
2006-10-21 13:54:30 +00:00
return header + data
1999-12-16 21:34:53 +00:00
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
2004-09-25 07:30:47 +00:00
self . language = safeEval ( attrs [ " language " ] )
2006-10-21 13:54:30 +00:00
if not hasattr ( self , " cmap " ) :
self . cmap = { }
cmap = self . cmap
1999-12-16 21:34:53 +00:00
for element in content :
2002-05-10 19:03:34 +00:00
if type ( element ) < > TupleType :
1999-12-16 21:34:53 +00:00
continue
2006-10-21 13:54:30 +00:00
nameMap , attrsMap , dummyContent = element
if nameMap < > " map " :
assert 0 , " Unrecognized keyword in cmap subtable "
cmap [ safeEval ( attrsMap [ " code " ] ) ] = attrsMap [ " name " ]
1999-12-16 21:34:53 +00:00
class cmap_format_6 ( CmapSubtable ) :
def decompile ( self , data , ttFont ) :
2006-10-21 13:54:30 +00:00
# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
# If not, someone is calling the subtable decompile() directly, and must provide both args.
if data != None and ttFont != None :
self . decompileHeader ( data [ offset : offset + int ( length ) ] , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
2006-10-21 13:54:30 +00:00
data = self . data # decompileHeader assigns the data after the header to self.data
firstCode , entryCount = struct . unpack ( " >HH " , data [ : 4 ] )
1999-12-16 21:34:53 +00:00
firstCode = int ( firstCode )
2006-10-21 13:54:30 +00:00
data = data [ 4 : ]
2000-08-23 12:33:14 +00:00
#assert len(data) == 2 * entryCount # XXX not true in Apple's Helvetica!!!
1999-12-16 21:34:53 +00:00
glyphIndexArray = array . array ( " H " )
2000-10-11 18:04:03 +00:00
glyphIndexArray . fromstring ( data [ : 2 * int ( entryCount ) ] )
2008-03-01 11:43:01 +00:00
if sys . byteorder < > " big " :
1999-12-16 21:34:53 +00:00
glyphIndexArray . byteswap ( )
2006-10-21 13:54:30 +00:00
self . data = data = None
1999-12-16 21:34:53 +00:00
self . cmap = cmap = { }
2006-10-21 13:54:30 +00:00
lenArray = len ( glyphIndexArray )
charCodes = range ( firstCode , firstCode + lenArray )
glyphOrder = self . ttFont . getGlyphOrder ( )
try :
names = map ( operator . getitem , [ glyphOrder ] * lenArray , glyphIndexArray )
except IndexError :
getGlyphName = self . ttFont . getGlyphName
names = map ( getGlyphName , glyphIndexArray )
map ( operator . setitem , [ cmap ] * lenArray , charCodes , names )
1999-12-16 21:34:53 +00:00
def compile ( self , ttFont ) :
2006-10-21 13:54:30 +00:00
if self . data :
return struct . pack ( " >HHH " , self . format , self . length , self . language ) + self . data
cmap = self . cmap
codes = cmap . keys ( )
if codes : # yes, there are empty cmap tables.
2013-11-26 15:49:36 -05:00
codes = range ( codes [ 0 ] , codes [ - 1 ] + 1 )
2006-10-21 13:54:30 +00:00
firstCode = codes [ 0 ]
2013-11-26 15:49:36 -05:00
valueList = [ cmap . get ( code , " .notdef " ) for code in codes ]
2006-10-21 13:54:30 +00:00
valueList = map ( ttFont . getGlyphID , valueList )
2013-08-16 12:56:08 -04:00
glyphIndexArray = array . array ( " H " , valueList )
2008-03-01 11:43:01 +00:00
if sys . byteorder < > " big " :
2013-08-16 12:56:08 -04:00
glyphIndexArray . byteswap ( )
2006-10-21 13:54:30 +00:00
data = glyphIndexArray . tostring ( )
else :
data = " "
firstCode = 0
1999-12-16 21:34:53 +00:00
header = struct . pack ( " >HHHHH " ,
2006-10-21 13:54:30 +00:00
6 , len ( data ) + 10 , self . language , firstCode , len ( codes ) )
1999-12-16 21:34:53 +00:00
return header + data
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
2004-09-25 07:30:47 +00:00
self . language = safeEval ( attrs [ " language " ] )
2006-10-21 13:54:30 +00:00
if not hasattr ( self , " cmap " ) :
self . cmap = { }
cmap = self . cmap
1999-12-16 21:34:53 +00:00
for element in content :
2002-05-10 19:03:34 +00:00
if type ( element ) < > TupleType :
1999-12-16 21:34:53 +00:00
continue
name , attrs , content = element
if name < > " map " :
continue
2006-10-21 13:54:30 +00:00
cmap [ safeEval ( attrs [ " code " ] ) ] = attrs [ " name " ]
1999-12-16 21:34:53 +00:00
2013-10-09 15:55:07 -07:00
class cmap_format_12_or_13 ( CmapSubtable ) :
2003-02-08 10:45:23 +00:00
2006-10-21 13:54:30 +00:00
def __init__ ( self , format ) :
self . format = format
self . reserved = 0
self . data = None
self . ttFont = None
def decompileHeader ( self , data , ttFont ) :
2003-02-08 10:45:23 +00:00
format , reserved , length , language , nGroups = struct . unpack ( " >HHLLL " , data [ : 16 ] )
2013-10-09 15:55:07 -07:00
assert len ( data ) == ( 16 + nGroups * 12 ) == ( length ) , " corrupt cmap table format %d (data length: %d , header length: %d ) " % ( format , len ( data ) , length )
2003-02-08 10:45:23 +00:00
self . format = format
self . reserved = reserved
self . length = length
self . language = language
self . nGroups = nGroups
2006-10-21 13:54:30 +00:00
self . data = data [ 16 : ]
self . ttFont = ttFont
def decompile ( self , data , ttFont ) :
# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
# If not, someone is calling the subtable decompile() directly, and must provide both args.
if data != None and ttFont != None :
self . decompileHeader ( data [ offset : offset + int ( length ) ] , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
2006-10-21 13:54:30 +00:00
data = self . data # decompileHeader assigns the data after the header to self.data
charCodes = [ ]
gids = [ ]
pos = 0
for i in range ( self . nGroups ) :
startCharCode , endCharCode , glyphID = struct . unpack ( " >LLL " , data [ pos : pos + 12 ] )
pos + = 12
lenGroup = 1 + endCharCode - startCharCode
charCodes + = range ( startCharCode , endCharCode + 1 )
2013-10-09 15:55:07 -07:00
gids + = self . _computeGIDs ( glyphID , lenGroup )
2006-10-21 13:54:30 +00:00
self . data = data = None
self . cmap = cmap = { }
lenCmap = len ( gids )
glyphOrder = self . ttFont . getGlyphOrder ( )
try :
names = map ( operator . getitem , [ glyphOrder ] * lenCmap , gids )
except IndexError :
getGlyphName = self . ttFont . getGlyphName
names = map ( getGlyphName , gids )
map ( operator . setitem , [ cmap ] * lenCmap , charCodes , names )
2003-02-08 10:45:23 +00:00
def compile ( self , ttFont ) :
2006-10-21 13:54:30 +00:00
if self . data :
2013-10-09 15:55:07 -07:00
return struct . pack ( " >HHLLL " , self . format , self . reserved , self . length , self . language , self . nGroups ) + self . data
2006-10-21 13:54:30 +00:00
charCodes = self . cmap . keys ( )
lenCharCodes = len ( charCodes )
names = self . cmap . values ( )
nameMap = ttFont . getReverseGlyphMap ( )
try :
gids = map ( operator . getitem , [ nameMap ] * lenCharCodes , names )
except KeyError :
nameMap = ttFont . getReverseGlyphMap ( rebuild = 1 )
try :
gids = map ( operator . getitem , [ nameMap ] * lenCharCodes , names )
except KeyError :
# allow virtual GIDs in format 12 tables
gids = [ ]
for name in names :
try :
gid = nameMap [ name ]
except KeyError :
try :
if ( name [ : 3 ] == ' gid ' ) :
gid = eval ( name [ 3 : ] )
else :
gid = ttFont . getGlyphID ( name )
except :
raise KeyError ( name )
gids . append ( gid )
2003-02-08 10:45:23 +00:00
cmap = { } # code:glyphID mapping
2006-10-21 13:54:30 +00:00
map ( operator . setitem , [ cmap ] * len ( charCodes ) , charCodes , gids )
2003-02-08 10:45:23 +00:00
charCodes . sort ( )
2006-10-21 13:54:30 +00:00
index = 0
2003-02-08 10:45:23 +00:00
startCharCode = charCodes [ 0 ]
startGlyphID = cmap [ startCharCode ]
2013-10-09 15:55:07 -07:00
lastGlyphID = startGlyphID - self . _format_step
2006-10-21 13:54:30 +00:00
lastCharCode = startCharCode - 1
2004-09-25 07:30:47 +00:00
nGroups = 0
2006-10-21 13:54:30 +00:00
dataList = [ ]
maxIndex = len ( charCodes )
for index in range ( maxIndex ) :
charCode = charCodes [ index ]
2003-02-08 10:45:23 +00:00
glyphID = cmap [ charCode ]
2013-10-09 15:55:07 -07:00
if not self . _IsInSameRun ( glyphID , lastGlyphID , charCode , lastCharCode ) :
2006-10-21 13:54:30 +00:00
dataList . append ( struct . pack ( " >LLL " , startCharCode , lastCharCode , startGlyphID ) )
2003-02-08 10:45:23 +00:00
startCharCode = charCode
2006-10-21 13:54:30 +00:00
startGlyphID = glyphID
2003-02-08 10:45:23 +00:00
nGroups = nGroups + 1
2006-10-21 13:54:30 +00:00
lastGlyphID = glyphID
lastCharCode = charCode
dataList . append ( struct . pack ( " >LLL " , startCharCode , lastCharCode , startGlyphID ) )
2004-09-25 07:30:47 +00:00
nGroups = nGroups + 1
2006-10-21 13:54:30 +00:00
data = " " . join ( dataList )
lengthSubtable = len ( data ) + 16
assert len ( data ) == ( nGroups * 12 ) == ( lengthSubtable - 16 )
return struct . pack ( " >HHLLL " , self . format , self . reserved , lengthSubtable , self . language , nGroups ) + data
2003-02-08 10:45:23 +00:00
def toXML ( self , writer , ttFont ) :
writer . begintag ( self . __class__ . __name__ , [
( " platformID " , self . platformID ) ,
( " platEncID " , self . platEncID ) ,
( " format " , self . format ) ,
( " reserved " , self . reserved ) ,
( " length " , self . length ) ,
( " language " , self . language ) ,
( " nGroups " , self . nGroups ) ,
] )
writer . newline ( )
2004-09-25 09:06:58 +00:00
codes = self . cmap . items ( )
codes . sort ( )
self . _writeCodes ( codes , writer )
2003-02-08 10:45:23 +00:00
writer . endtag ( self . __class__ . __name__ )
writer . newline ( )
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
2006-10-21 13:54:30 +00:00
self . format = safeEval ( attrs [ " format " ] )
self . reserved = safeEval ( attrs [ " reserved " ] )
self . length = safeEval ( attrs [ " length " ] )
2003-02-08 10:45:23 +00:00
self . language = safeEval ( attrs [ " language " ] )
2006-10-21 13:54:30 +00:00
self . nGroups = safeEval ( attrs [ " nGroups " ] )
if not hasattr ( self , " cmap " ) :
self . cmap = { }
cmap = self . cmap
2003-02-08 10:45:23 +00:00
for element in content :
if type ( element ) < > TupleType :
continue
name , attrs , content = element
if name < > " map " :
continue
2006-10-21 13:54:30 +00:00
cmap [ safeEval ( attrs [ " code " ] ) ] = attrs [ " name " ]
2003-02-08 10:45:23 +00:00
2013-10-09 15:55:07 -07:00
class cmap_format_12 ( cmap_format_12_or_13 ) :
def __init__ ( self , format ) :
cmap_format_12_or_13 . __init__ ( self , format )
self . _format_step = 1
def _computeGIDs ( self , startingGlyph , numberOfGlyphs ) :
return range ( startingGlyph , startingGlyph + numberOfGlyphs )
def _IsInSameRun ( self , glyphID , lastGlyphID , charCode , lastCharCode ) :
return ( glyphID == 1 + lastGlyphID ) and ( charCode == 1 + lastCharCode )
class cmap_format_13 ( cmap_format_12_or_13 ) :
def __init__ ( self , format ) :
cmap_format_12_or_13 . __init__ ( self , format )
self . _format_step = 0
def _computeGIDs ( self , startingGlyph , numberOfGlyphs ) :
return [ startingGlyph ] * numberOfGlyphs
def _IsInSameRun ( self , glyphID , lastGlyphID , charCode , lastCharCode ) :
return ( glyphID == lastGlyphID ) and ( charCode == 1 + lastCharCode )
2008-05-16 15:07:09 +00:00
def cvtToUVS ( threeByteString ) :
if sys . byteorder < > " big " :
data = " \0 " + threeByteString
else :
data = threeByteString + " \0 "
val , = struct . unpack ( " >L " , data )
return val
def cvtFromUVS ( val ) :
if sys . byteorder < > " big " :
threeByteString = struct . pack ( " >L " , val ) [ 1 : ]
else :
threeByteString = struct . pack ( " >L " , val ) [ : 3 ]
return threeByteString
def cmpUVSListEntry ( first , second ) :
uv1 , glyphName1 = first
uv2 , glyphName2 = second
if ( glyphName1 == None ) and ( glyphName2 != None ) :
return - 1
elif ( glyphName2 == None ) and ( glyphName1 != None ) :
return 1
ret = cmp ( uv1 , uv2 )
if ret :
return ret
return cmp ( glyphName1 , glyphName2 )
class cmap_format_14 ( CmapSubtable ) :
def decompileHeader ( self , data , ttFont ) :
format , length , numVarSelectorRecords = struct . unpack ( " >HLL " , data [ : 10 ] )
self . data = data [ 10 : ]
self . length = length
self . numVarSelectorRecords = numVarSelectorRecords
self . ttFont = ttFont
self . language = 0xFF # has no language.
def decompile ( self , data , ttFont ) :
2013-11-26 19:22:13 -05:00
if data != None and ttFont != None and ttFont . lazy :
2008-05-16 15:07:09 +00:00
self . decompileHeader ( data , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
2008-05-16 15:07:09 +00:00
data = self . data
self . cmap = { } # so that clients that expect this to exist in a cmap table won't fail.
uvsDict = { }
recOffset = 0
for n in range ( self . numVarSelectorRecords ) :
uvs , defOVSOffset , nonDefUVSOffset = struct . unpack ( " >3sLL " , data [ recOffset : recOffset + 11 ] )
recOffset + = 11
varUVS = cvtToUVS ( uvs )
if defOVSOffset :
startOffset = defOVSOffset - 10
numValues , = struct . unpack ( " >L " , data [ startOffset : startOffset + 4 ] )
startOffset + = 4
for r in range ( numValues ) :
uv , addtlCnt = struct . unpack ( " >3sB " , data [ startOffset : startOffset + 4 ] )
startOffset + = 4
firstBaseUV = cvtToUVS ( uv )
cnt = addtlCnt + 1
baseUVList = range ( firstBaseUV , firstBaseUV + cnt )
glyphList = [ None ] * cnt
localUVList = zip ( baseUVList , glyphList )
try :
uvsDict [ varUVS ] . extend ( localUVList )
except KeyError :
uvsDict [ varUVS ] = localUVList
if nonDefUVSOffset :
startOffset = nonDefUVSOffset - 10
numRecs , = struct . unpack ( " >L " , data [ startOffset : startOffset + 4 ] )
startOffset + = 4
localUVList = [ ]
for r in range ( numRecs ) :
uv , gid = struct . unpack ( " >3sH " , data [ startOffset : startOffset + 5 ] )
startOffset + = 5
uv = cvtToUVS ( uv )
glyphName = self . ttFont . getGlyphName ( gid )
localUVList . append ( [ uv , glyphName ] )
try :
uvsDict [ varUVS ] . extend ( localUVList )
except KeyError :
uvsDict [ varUVS ] = localUVList
self . uvsDict = uvsDict
def toXML ( self , writer , ttFont ) :
writer . begintag ( self . __class__ . __name__ , [
( " platformID " , self . platformID ) ,
( " platEncID " , self . platEncID ) ,
( " format " , self . format ) ,
( " length " , self . length ) ,
( " numVarSelectorRecords " , self . numVarSelectorRecords ) ,
] )
writer . newline ( )
uvsDict = self . uvsDict
uvsList = uvsDict . keys ( )
uvsList . sort ( )
for uvs in uvsList :
uvList = uvsDict [ uvs ]
uvList . sort ( cmpUVSListEntry )
for uv , gname in uvList :
if gname == None :
gname = " None "
# I use the arg rather than th keyword syntax in order to preserve the attribute order.
writer . simpletag ( " map " , [ ( " uvs " , hex ( uvs ) ) , ( " uv " , hex ( uv ) ) , ( " name " , gname ) ] )
writer . newline ( )
writer . endtag ( self . __class__ . __name__ )
writer . newline ( )
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
self . format = safeEval ( attrs [ " format " ] )
self . length = safeEval ( attrs [ " length " ] )
self . numVarSelectorRecords = safeEval ( attrs [ " numVarSelectorRecords " ] )
self . language = 0xFF # provide a value so that CmapSubtable.__cmp__() won't fail
if not hasattr ( self , " cmap " ) :
self . cmap = { } # so that clients that expect this to exist in a cmap table won't fail.
if not hasattr ( self , " uvsDict " ) :
self . uvsDict = { }
uvsDict = self . uvsDict
for element in content :
if type ( element ) < > TupleType :
continue
name , attrs , content = element
if name < > " map " :
continue
uvs = safeEval ( attrs [ " uvs " ] )
uv = safeEval ( attrs [ " uv " ] )
gname = attrs [ " name " ]
if gname == " None " :
gname = None
try :
uvsDict [ uvs ] . append ( [ uv , gname ] )
except KeyError :
uvsDict [ uvs ] = [ [ uv , gname ] ]
def compile ( self , ttFont ) :
if self . data :
return struct . pack ( " >HLL " , self . format , self . length , self . numVarSelectorRecords ) + self . data
uvsDict = self . uvsDict
uvsList = uvsDict . keys ( )
uvsList . sort ( )
self . numVarSelectorRecords = len ( uvsList )
offset = 10 + self . numVarSelectorRecords * 11 # current value is end of VarSelectorRecords block.
data = [ ]
varSelectorRecords = [ ]
for uvs in uvsList :
entryList = uvsDict [ uvs ]
defList = filter ( lambda entry : entry [ 1 ] == None , entryList )
if defList :
defList = map ( lambda entry : entry [ 0 ] , defList )
defOVSOffset = offset
defList . sort ( )
lastUV = defList [ 0 ]
cnt = - 1
defRecs = [ ]
for defEntry in defList :
cnt + = 1
if ( lastUV + cnt ) != defEntry :
rec = struct . pack ( " >3sB " , cvtFromUVS ( lastUV ) , cnt - 1 )
lastUV = defEntry
defRecs . append ( rec )
cnt = 0
rec = struct . pack ( " >3sB " , cvtFromUVS ( lastUV ) , cnt )
defRecs . append ( rec )
numDefRecs = len ( defRecs )
data . append ( struct . pack ( " >L " , numDefRecs ) )
data . extend ( defRecs )
offset + = 4 + numDefRecs * 4
else :
defOVSOffset = 0
ndefList = filter ( lambda entry : entry [ 1 ] != None , entryList )
if ndefList :
nonDefUVSOffset = offset
ndefList . sort ( )
numNonDefRecs = len ( ndefList )
data . append ( struct . pack ( " >L " , numNonDefRecs ) )
offset + = 4 + numNonDefRecs * 5
for uv , gname in ndefList :
gid = ttFont . getGlyphID ( gname )
ndrec = struct . pack ( " >3sH " , cvtFromUVS ( uv ) , gid )
data . append ( ndrec )
else :
nonDefUVSOffset = 0
vrec = struct . pack ( " >3sLL " , cvtFromUVS ( uvs ) , defOVSOffset , nonDefUVSOffset )
varSelectorRecords . append ( vrec )
data = " " . join ( varSelectorRecords ) + " " . join ( data )
self . length = 10 + len ( data )
headerdata = struct . pack ( " >HLL " , self . format , self . length , self . numVarSelectorRecords )
self . data = headerdata + data
return self . data
1999-12-16 21:34:53 +00:00
class cmap_format_unknown ( CmapSubtable ) :
2004-09-25 09:06:58 +00:00
def toXML ( self , writer , ttFont ) :
2006-10-21 13:54:30 +00:00
cmapName = self . __class__ . __name__ [ : 12 ] + str ( self . format )
writer . begintag ( cmapName , [
2004-09-25 09:06:58 +00:00
( " platformID " , self . platformID ) ,
( " platEncID " , self . platEncID ) ,
] )
writer . newline ( )
2006-10-21 13:54:30 +00:00
writer . dumphex ( self . data )
writer . endtag ( cmapName )
2004-09-25 09:06:58 +00:00
writer . newline ( )
2006-10-21 13:54:30 +00:00
def fromXML ( self , ( name , attrs , content ) , ttFont ) :
self . data = readHex ( content )
self . cmap = { }
def decompileHeader ( self , data , ttFont ) :
2004-09-26 18:32:50 +00:00
self . language = 0 # dummy value
1999-12-16 21:34:53 +00:00
self . data = data
2006-10-21 13:54:30 +00:00
def decompile ( self , data , ttFont ) :
# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
# If not, someone is calling the subtable decompile() directly, and must provide both args.
if data != None and ttFont != None :
self . decompileHeader ( data [ offset : offset + int ( length ) ] , ttFont )
else :
2009-11-08 15:55:53 +00:00
assert ( data == None and ttFont == None ) , " Need both data and ttFont arguments "
1999-12-16 21:34:53 +00:00
2006-10-21 13:54:30 +00:00
def compile ( self , ttFont ) :
if self . data :
return self . data
else :
return None
1999-12-16 21:34:53 +00:00
cmap_classes = {
0 : cmap_format_0 ,
2 : cmap_format_2 ,
4 : cmap_format_4 ,
6 : cmap_format_6 ,
2003-02-08 10:45:23 +00:00
12 : cmap_format_12 ,
2013-10-09 15:55:07 -07:00
13 : cmap_format_13 ,
2008-05-16 15:07:09 +00:00
14 : cmap_format_14 ,
1999-12-16 21:34:53 +00:00
}