Another patch from rroberts. He writes:

"""It adds full support for cmap format 2, which is what
the Adobe CJK fonts use for the Mac cmap subtable."""


git-svn-id: svn://svn.code.sf.net/p/fonttools/code/trunk@434 4cde692c-a291-49d1-8350-778aa11640f8
This commit is contained in:
jvr 2003-08-28 18:04:23 +00:00
parent ceb3449e2b
commit bafa66e665

View File

@ -163,15 +163,240 @@ class cmap_format_0(CmapSubtable):
self.cmap[safeEval(attrs["code"])] = attrs["name"]
subHeaderFormat = ">HHhH"
class SubHeader:
def __init__(self):
self.firstCode = None
self.entryCount = None
self.idDelta = None
self.idRangeOffset = None
self.glyphIndexArray = []
class cmap_format_2(CmapSubtable):
def decompile(self, data, ttFont):
format, length, version = struct.unpack(">HHH", data[:6])
self.version = int(version)
self.data = data
data = data[6:]
subHeaderKeys = []
maxSubHeaderindex = 0
# get the key array, and determine the number of subHeaders.
for i in range(256):
key = struct.unpack(">H", data[:2])[0]
value = int(key)/8
if value > maxSubHeaderindex:
maxSubHeaderindex = value
data = data[2:]
subHeaderKeys.append(value)
#Load subHeaders
subHeaderList = []
for i in range(maxSubHeaderindex + 1):
subHeader = SubHeader()
(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[:8])
data = data[8:]
giData = data[subHeader.idRangeOffset-2:]
for j in range(subHeader.entryCount):
gi = struct.unpack(">H", giData[:2])[0]
giData = giData[2:]
subHeader.glyphIndexArray.append(int(gi))
subHeaderList.append(subHeader)
# How this gets processed.
# Charcodes may be one or two bytes.
# The first byte of a charcode is mapped through the subHeaderKeys, to select
# a subHeader. For any subheader but 0, the next byte is then mapped through the
# selected subheader. If subheader Index 0 is selected, then the byte itself is
# mapped through the subheader, and there is no second byte.
# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
#
# Each subheader references a range in the glyphIndexArray whose length is entryCount.
# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
# referenced by another subheader.
# The only subheader that will be referenced by more than one first-byte value is the subheader
# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
# {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
# A subheader specifies a subrange within (0...256) by the
# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
# (e.g. glyph not in font).
# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by
# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
# Example for Logocut-Medium
# first byte of charcode = 129; selects subheader 1.
# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
# second byte of charCode = 66
# the index offset = 66-64 = 2.
# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
# [glyphIndexArray index], [subrange array index] = glyphIndex
# [256], [0]=1 from charcode [129, 64]
# [257], [1]=2 from charcode [129, 65]
# [258], [2]=3 from charcode [129, 66]
# [259], [3]=4 from charcode [129, 67]
# So, the glyphIndex = 3 from the array. Then if idDelta is not zero, add it to the glyphInex to get the final glyphIndex
# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
# Has anyone ever really tried to overlap the subHeader subranges in the glyphIndexArray? I doubt it!
self.data = ""
self.cmap = {}
for firstByte in range(256):
subHeadindex = subHeaderKeys[firstByte]
subHeader = subHeaderList[subHeadindex]
if subHeadindex == 0:
if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
gi = 0
else:
charCode = firstByte
offsetIndex = firstByte - subHeader.firstCode
gi = subHeader.glyphIndexArray[offsetIndex]
if gi != 0:
gi = gi + subHeader.idDelta
gName = ttFont.getGlyphName(gi)
self.cmap[charCode] = gName
else:
if subHeader.entryCount:
for offsetIndex in range(subHeader.entryCount):
charCode = firstByte * 256 + offsetIndex + subHeader.firstCode
gi = subHeader.glyphIndexArray[offsetIndex]
if gi != 0:
gi = gi + subHeader.idDelta
gName = ttFont.getGlyphName(gi)
self.cmap[charCode] = gName
else:
# Is a subHead that maps to .notdef. We do need to record it, so we can later
# know that this firstByte value is the initial byte of a two byte charcode,
# as opposed to a sing byte charcode.
charCode = firstByte * 256
gName = ttFont.getGlyphName(0)
self.cmap[charCode] = gName
def compile(self, ttFont):
return self.data
kEmptyTwoCharCodeRange = -1
items = self.cmap.items()
items.sort()
# All one-byte code values map through the subHeaderKeys table to subheader 0.
# Assume that all entries in the subHeaderKeys table are one-byte codes unless proven otherwise.
subHeaderKeys = [ 0 for x in range(256)]
subHeaderList = []
lastFirstByte = -1
for item in items:
charCode = item[0]
firstbyte = charCode >> 8
secondByte = charCode & 0x00FF
gi = ttFont.getGlyphID(item[1])
if firstbyte != lastFirstByte:
if lastFirstByte > -1:
# fix GI's and iDelta of last subheader.
subHeader.idDelta = 0
if subHeader.entryCount > 0:
minGI = min(subHeader.glyphIndexArray) -1
if minGI > 0:
subHeader.idDelta = minGI
for i in range(subHeader.entryCount):
subHeader.glyphIndexArray[i] = subHeader.glyphIndexArray[i] - minGI
assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
# init new subheader
subHeader = SubHeader()
subHeader.firstCode = secondByte
if (secondByte == 0) and ( gi==0 ) and (lastFirstByte > -1): # happens only when the font has no glyphs in the this charcpde range.
subHeader.entryCount = 0
subHeaderKeys[firstbyte] = kEmptyTwoCharCodeRange
else:
subHeader.entryCount = 1
subHeader.glyphIndexArray.append(gi)
subHeaderList.append(subHeader)
subHeaderKeys[firstbyte] = len(subHeaderList) -1
lastFirstByte = firstbyte
else:
assert (subHeader.entryCount != 0), "Error: we should never see another entry for an empty 2 byte charcode range."
codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
for i in range(codeDiff):
subHeader.glyphIndexArray.append(0)
subHeader.glyphIndexArray.append(gi)
subHeader.entryCount = subHeader.entryCount + codeDiff + 1
# fix GI's and iDelta of last subheader.
subHeader.idDelta = 0
if subHeader.entryCount > 0:
minGI = min(subHeader.glyphIndexArray) -1
if minGI > 0:
subHeader.idDelta = minGI
for i in range(subHeader.entryCount):
subHeaderList[i] = subHeaderList[i] - minGI
# Now we add a last subheader for the subHeaderKeys which mapped to empty two byte charcode ranges.
subHeader = SubHeader()
subHeader.firstCode = 0
subHeader.entryCount = 0
subHeader.idDelta = 0
subHeader.idRangeOffset = 2
subHeaderList.append(subHeader)
emptySubheadIndex = len(subHeaderList) - 1
for index in range(256):
if subHeaderKeys[index] < 0:
subHeaderKeys[index] = emptySubheadIndex
# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
# idRangeOffset word of this subHeader. we can safely point to the first entry in the GlyphIndexArray,
# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with
# charcode 0 and GID 0.
# I am not going to try and optimise by trying to overlap the glyphIDArray subranges of the subheaders -
# I will just write them out sequentially.
idRangeOffset = (len(subHeaderList)-1)*8 + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
for subHeader in subHeaderList[:-1]: # skip last special empty-set subheader
subHeader.idRangeOffset = idRangeOffset
idRangeOffset = (idRangeOffset -8) + subHeader.entryCount*2 # one less subheader, one more subRange.
# Now we can write out the data!
length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
for subhead in subHeaderList[:-1]:
length = length + subhead.entryCount*2
data = struct.pack(">HHH", 2, length, self.version)
for index in subHeaderKeys:
data = data + struct.pack(">H", index*8)
for subhead in subHeaderList:
data = data + struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)
for subhead in subHeaderList[:-1]:
for gi in subhead.glyphIndexArray:
data = data + struct.pack(">H", gi)
assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
return data
def toXML(self, writer, ttFont):
writer.begintag(self.__class__.__name__, [
("platformID", self.platformID),
("platEncID", self.platEncID),
("version", self.version),
])
writer.newline()
items = self.cmap.items()
items.sort()
for code, name in items:
writer.simpletag("map", code=hex(code), name=name)
writer.newline()
writer.endtag(self.__class__.__name__)
writer.newline()
def fromXML(self, (name, attrs, content), ttFont):
self.version = safeEval(attrs["version"])
self.cmap = {}
for element in content:
if type(element) <> TupleType:
continue
name, attrs, content = element
if name <> "map":
continue
self.cmap[safeEval(attrs["code"])] = attrs["name"]
cmap_format_4_format = ">7H"