Another patch from rroberts. He writes:

"""It adds full support for cmap format 2, which is what the Adobe CJK fonts use for the Mac cmap subtable.""" git-svn-id: svn://svn.code.sf.net/p/fonttools/code/trunk@434 4cde692c-a291-49d1-8350-778aa11640f8
2003-08-28 18:04:23 +00:00 · 2003-08-28 18:04:23 +00:00 · bafa66e665
commit bafa66e665
parent ceb3449e2b
1 changed files with 227 additions and 2 deletions
--- a/Lib/fontTools/ttLib/tables/_c_m_a_p.py
+++ b/Lib/fontTools/ttLib/tables/_c_m_a_p.py
@ -163,15 +163,240 @@ class cmap_format_0(CmapSubtable):
 			self.cmap[safeEval(attrs["code"])] = attrs["name"]


+subHeaderFormat = ">HHhH"
+class SubHeader:
+	def __init__(self):
+		self.firstCode = None
+		self.entryCount = None
+		self.idDelta = None
+		self.idRangeOffset = None
+		self.glyphIndexArray = []
+		
 class cmap_format_2(CmapSubtable):
 	
 	def decompile(self, data, ttFont):
 		format, length, version = struct.unpack(">HHH", data[:6])
 		self.version = int(version)
-		self.data = data
+		data = data[6:]
+		subHeaderKeys = []
+		maxSubHeaderindex = 0
+		
+		# get the key array, and determine the number of subHeaders.
+		for i in range(256):
+			key = struct.unpack(">H", data[:2])[0]
+			value = int(key)/8
+			if value > maxSubHeaderindex:
+				maxSubHeaderindex  = value
+			data = data[2:]
+			subHeaderKeys.append(value)
 	
+		#Load subHeaders
+		subHeaderList = []
+		for i in range(maxSubHeaderindex + 1):
+			subHeader = SubHeader()
+			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
+				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[:8])
+			data = data[8:]
+			giData = data[subHeader.idRangeOffset-2:]
+			for j in range(subHeader.entryCount):
+				gi = struct.unpack(">H", giData[:2])[0]
+				giData = giData[2:]
+				subHeader.glyphIndexArray.append(int(gi))
+			 		
+			subHeaderList.append(subHeader)
+		
+		# How this gets processed. 
+		# Charcodes may be one or two bytes.
+		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
+		# a subHeader. For any subheader but 0, the next byte is then mapped through the
+		# selected subheader. If subheader Index 0 is selected, then the byte itself is 
+		# mapped through the subheader, and there is no second byte.
+		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
+		# 
+		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
+		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
+		# referenced by another subheader.
+		# The only subheader that will be referenced by more than one first-byte value is the subheader
+		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
+		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
+		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
+		# A subheader specifies a subrange within (0...256) by the
+		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
+		# (e.g. glyph not in font).
+		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
+		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 
+		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
+		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
+		# Example for Logocut-Medium
+		# first byte of charcode = 129; selects subheader 1.
+		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
+		# second byte of charCode = 66
+		# the index offset = 66-64 = 2.
+		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
+		# [glyphIndexArray index], [subrange array index] = glyphIndex
+		# [256], [0]=1 	from charcode [129, 64]
+		# [257], [1]=2  	from charcode [129, 65]
+		# [258], [2]=3  	from charcode [129, 66]
+		# [259], [3]=4  	from charcode [129, 67]
+		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero, add it to the glyphInex to get the final glyphIndex
+		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
+		# Has anyone ever really tried to overlap the subHeader subranges in the glyphIndexArray? I doubt it!
+		
+		self.data = ""
+		self.cmap = {}
+		for firstByte in range(256):
+			subHeadindex = subHeaderKeys[firstByte]
+			subHeader = subHeaderList[subHeadindex]
+			if subHeadindex == 0:
+				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
+					gi = 0
+				else:
+					charCode = firstByte
+					offsetIndex = firstByte - subHeader.firstCode
+					gi = subHeader.glyphIndexArray[offsetIndex]
+					if gi != 0:
+						gi = gi + subHeader.idDelta
+				gName = ttFont.getGlyphName(gi)
+				self.cmap[charCode] = gName
+			else:
+				if subHeader.entryCount:
+					for offsetIndex in range(subHeader.entryCount):
+						charCode = firstByte * 256 + offsetIndex + subHeader.firstCode
+						gi = subHeader.glyphIndexArray[offsetIndex]
+						if gi != 0:
+							gi = gi + subHeader.idDelta
+						gName = ttFont.getGlyphName(gi)
+						self.cmap[charCode] = gName
+				else:
+					# Is a subHead that maps to .notdef. We do need to record it, so we can later
+					# know that this firstByte value is the initial byte of a two byte charcode,
+					# as opposed to a sing byte charcode.
+					charCode = firstByte * 256
+					gName = ttFont.getGlyphName(0)
+					self.cmap[charCode] = gName
+		
+		
 	def compile(self, ttFont):
-		return self.data
+		kEmptyTwoCharCodeRange = -1
+		items = self.cmap.items()
+		items.sort()
+
+		# All one-byte code values map through the subHeaderKeys table to subheader 0.
+		# Assume that all entries in the subHeaderKeys table are one-byte codes unless proven otherwise.
+		subHeaderKeys = [ 0 for x in  range(256)] 
+		subHeaderList = []
+		
+		lastFirstByte = -1
+		for item in items:
+			charCode = item[0]
+			firstbyte = charCode >> 8
+			secondByte = charCode & 0x00FF
+			gi = ttFont.getGlyphID(item[1])
+			if firstbyte != lastFirstByte:
+				if lastFirstByte > -1:
+					# fix GI's and iDelta of last subheader.
+					subHeader.idDelta = 0
+					if subHeader.entryCount > 0:
+						minGI = min(subHeader.glyphIndexArray) -1
+						if minGI > 0:
+							subHeader.idDelta = minGI
+							for i in range(subHeader.entryCount):
+								subHeader.glyphIndexArray[i] = subHeader.glyphIndexArray[i] - minGI
+					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
+				# init new subheader
+				subHeader = SubHeader()
+				subHeader.firstCode = secondByte
+				if (secondByte == 0) and ( gi==0 ) and (lastFirstByte > -1): # happens only when the font has no glyphs in the this charcpde range.
+					subHeader.entryCount = 0
+					subHeaderKeys[firstbyte] = kEmptyTwoCharCodeRange
+				else:
+					subHeader.entryCount = 1
+					subHeader.glyphIndexArray.append(gi)
+					subHeaderList.append(subHeader)
+					subHeaderKeys[firstbyte] = len(subHeaderList) -1
+				lastFirstByte = firstbyte
+			else:
+				assert (subHeader.entryCount != 0), "Error: we should never see another entry for an empty 2 byte charcode range."
+				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
+				for i in range(codeDiff):
+					subHeader.glyphIndexArray.append(0)
+				subHeader.glyphIndexArray.append(gi)
+				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
+		# fix GI's and iDelta of last subheader.
+		subHeader.idDelta = 0
+		if subHeader.entryCount > 0:
+			minGI = min(subHeader.glyphIndexArray) -1
+			if minGI > 0:
+				subHeader.idDelta = minGI
+				for i in range(subHeader.entryCount):
+					subHeaderList[i] = subHeaderList[i] - minGI
+
+		# Now we add a last subheader for the subHeaderKeys which mapped to empty two byte charcode ranges.
+		subHeader = SubHeader()
+		subHeader.firstCode = 0
+		subHeader.entryCount = 0
+		subHeader.idDelta = 0
+		subHeader.idRangeOffset = 2
+		subHeaderList.append(subHeader)
+		emptySubheadIndex = len(subHeaderList) - 1
+		for index in range(256):
+			if subHeaderKeys[index] < 0:
+				subHeaderKeys[index] = emptySubheadIndex
+		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
+		# idRangeOffset word of this subHeader. we can safely point to the first entry in the GlyphIndexArray,
+		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 
+		# charcode 0 and GID 0.
+		
+		# I am not going to try and optimise by trying to overlap the glyphIDArray subranges of the subheaders -
+		# I will just write them out sequentially.
+		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
+		for subHeader in subHeaderList[:-1]: # skip last special empty-set subheader
+			subHeader.idRangeOffset = idRangeOffset
+			idRangeOffset = (idRangeOffset -8) + subHeader.entryCount*2 # one less subheader, one more subRange.
+		
+		# Now we can write out the data!
+		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
+		for subhead in 	subHeaderList[:-1]:
+			length = length + subhead.entryCount*2
+		data = struct.pack(">HHH", 2, length, self.version)
+		for index in subHeaderKeys:
+			data = data + struct.pack(">H", index*8)
+		for subhead in 	subHeaderList:
+			data = data + struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset)
+		for subhead in 	subHeaderList[:-1]:
+			for gi in subhead.glyphIndexArray:
+				data = data + struct.pack(">H", gi)
+			
+		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
+		return data
+		
+
+
+	def toXML(self, writer, ttFont):
+		writer.begintag(self.__class__.__name__, [
+				("platformID", self.platformID),
+				("platEncID", self.platEncID),
+				("version", self.version),
+				])
+		writer.newline()
+		items = self.cmap.items()
+		items.sort()
+		for code, name in items:
+			writer.simpletag("map", code=hex(code), name=name)
+			writer.newline()
+		writer.endtag(self.__class__.__name__)
+		writer.newline()
+	
+	def fromXML(self, (name, attrs, content), ttFont):
+		self.version = safeEval(attrs["version"])
+		self.cmap = {}
+		for element in content:
+			if type(element) <> TupleType:
+				continue
+			name, attrs, content = element
+			if name <> "map":
+				continue
+			self.cmap[safeEval(attrs["code"])] = attrs["name"]


 cmap_format_4_format = ">7H"