From ca4c45681ef2ea9290c6845f8bf61dff281fc5c4 Mon Sep 17 00:00:00 2001
From: jvr <jvr@4cde692c-a291-49d1-8350-778aa11640f8>
Date: Wed, 1 May 2002 21:06:11 +0000
Subject: [PATCH] Complety revised the XML import code: - use expat instead of
 xmlproc - minor fixes here and there

Fixed bug in hmtx/vmtx code that only occured if all advances were equal.

FontTools now officially requires Python 2.0 or up, due to exapt and unicode.


git-svn-id: svn://svn.code.sf.net/p/fonttools/code/trunk@179 4cde692c-a291-49d1-8350-778aa11640f8
---
 Lib/fontTools/ttLib/__init__.py        |  27 +---
 Lib/fontTools/ttLib/tables/_g_l_y_f.py |   5 +-
 Lib/fontTools/ttLib/tables/_h_d_m_x.py |   2 +-
 Lib/fontTools/ttLib/tables/_h_m_t_x.py |   3 +
 Lib/fontTools/ttLib/tables/_n_a_m_e.py |  12 +-
 Lib/fontTools/ttLib/xmlImport.py       | 179 +++++++------------------
 6 files changed, 64 insertions(+), 164 deletions(-)

diff --git a/Lib/fontTools/ttLib/__init__.py b/Lib/fontTools/ttLib/__init__.py
index d46092dd0..902416e5b 100644
--- a/Lib/fontTools/ttLib/__init__.py
+++ b/Lib/fontTools/ttLib/__init__.py
@@ -42,7 +42,7 @@ Dumping 'prep' table...
 """
 
 #
-# $Id: __init__.py,v 1.17 2001-02-23 21:58:57 Just Exp $
+# $Id: __init__.py,v 1.18 2002-05-01 21:06:11 jvr Exp $
 #
 
 __version__ = "1.0a6"
@@ -244,30 +244,11 @@ class TTFont:
 			debugmsg("Done dumping TTX")
 	
 	def importXML(self, file, progress=None):
-		"""Import an TTX file (an XML-based text format), so as to recreate
+		"""Import a TTX file (an XML-based text format), so as to recreate
 		a font object.
 		"""
-		import xmlImport, stat
-		from xml.parsers.xmlproc import xmlproc
-		builder = xmlImport.XMLApplication(self, progress)
-		if progress:
-			progress.set(0, os.stat(file)[stat.ST_SIZE] / 100 or 1)
-		proc = xmlImport.UnicodeProcessor()
-		proc.set_application(builder)
-		proc.set_error_handler(xmlImport.XMLErrorHandler(proc))
-		dir, filename = os.path.split(file)
-		if dir:
-			olddir = os.getcwd()
-			os.chdir(dir)
-		try:
-			proc.parse_resource(filename)
-			root = builder.root
-		finally:
-			if dir:
-				os.chdir(olddir)
-			# remove circular references
-			proc.deref()
-			del builder.progress
+		import xmlImport
+		xmlImport.importXML(self, file, progress)
 	
 	def isLoaded(self, tag):
 		"""Return true if the table identified by 'tag' has been 
diff --git a/Lib/fontTools/ttLib/tables/_g_l_y_f.py b/Lib/fontTools/ttLib/tables/_g_l_y_f.py
index e5a8f257d..daa4e2638 100644
--- a/Lib/fontTools/ttLib/tables/_g_l_y_f.py
+++ b/Lib/fontTools/ttLib/tables/_g_l_y_f.py
@@ -40,8 +40,9 @@ class table__g_l_y_f(DefaultTable.DefaultTable):
 			glyph = Glyph(glyphdata)
 			self.glyphs[glyphName] = glyph
 			last = next
-		if len(data) > next:
-			raise ttLib.TTLibError, "too much 'glyf' table data"
+		# this should become a warning:
+		#if len(data) > next:
+		#	raise ttLib.TTLibError, "too much 'glyf' table data"
 	
 	def compile(self, ttFont):
 		import string
diff --git a/Lib/fontTools/ttLib/tables/_h_d_m_x.py b/Lib/fontTools/ttLib/tables/_h_d_m_x.py
index acfb84f84..48a5f7b2d 100644
--- a/Lib/fontTools/ttLib/tables/_h_d_m_x.py
+++ b/Lib/fontTools/ttLib/tables/_h_d_m_x.py
@@ -75,7 +75,7 @@ class table__h_d_m_x(DefaultTable.DefaultTable):
 	def fromXML(self, (name, attrs, content), ttFont):
 		if name <> "hdmxData":
 			return
-		content = string.join(content, " ")
+		content = string.join(content, "")
 		lines = string.split(content, ";")
 		topRow = string.split(lines[0])
 		assert topRow[0] == "ppem:", "illegal hdmx format"
diff --git a/Lib/fontTools/ttLib/tables/_h_m_t_x.py b/Lib/fontTools/ttLib/tables/_h_m_t_x.py
index 0116b1626..2d4d8ed46 100644
--- a/Lib/fontTools/ttLib/tables/_h_m_t_x.py
+++ b/Lib/fontTools/ttLib/tables/_h_m_t_x.py
@@ -52,6 +52,9 @@ class table__h_m_t_x(DefaultTable.DefaultTable):
 		lastIndex = len(metrics)
 		while metrics[lastIndex-2][0] == lastAdvance:
 			lastIndex = lastIndex - 1
+			if lastIndex == 0:
+				# all advances are equal
+				break
 		additionalMetrics = metrics[lastIndex:]
 		additionalMetrics = map(lambda (advance, sb): sb, additionalMetrics)
 		metrics = metrics[:lastIndex]
diff --git a/Lib/fontTools/ttLib/tables/_n_a_m_e.py b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
index 8bcf69cf4..dcdf907d8 100644
--- a/Lib/fontTools/ttLib/tables/_n_a_m_e.py
+++ b/Lib/fontTools/ttLib/tables/_n_a_m_e.py
@@ -107,13 +107,15 @@ class NameRecord:
 		self.platEncID = safeEval(attrs["platEncID"])
 		self.langID =  safeEval(attrs["langID"])
 		if self.platformID == 0 or (self.platformID == 3 and self.platEncID in (0, 1)):
-			from fontTools.ttLib.xmlImport import UnicodeString
-			str = UnicodeString("")
+			s = ""
 			for element in content:
-				str = str + element
-			self.string = str.stripped().tostring()
+				s = s + element
+			s = unicode(s, "utf8")
+			s = s.strip()
+			self.string = s.encode("utf_16_be")
 		else:
-			self.string = string.strip(string.join(content, ""))
+			s = string.strip(string.join(content, ""))
+			self.string = unicode(s, "utf8").encode("latin1")
 	
 	def __cmp__(self, other):
 		"""Compare method, so a list of NameRecords can be sorted
diff --git a/Lib/fontTools/ttLib/xmlImport.py b/Lib/fontTools/ttLib/xmlImport.py
index 8ff2a8aa1..1adfc2bf5 100644
--- a/Lib/fontTools/ttLib/xmlImport.py
+++ b/Lib/fontTools/ttLib/xmlImport.py
@@ -7,140 +7,45 @@ import Numeric, array
 from xml.parsers.xmlproc import xmlproc
 
 
-xmlerror = "xmlerror"
-xml_parse_error = "XML parse error"
+class TTXParseError(Exception): pass
 
 
-class UnicodeString:
-	
-	def __init__(self, value):
-		if isinstance(value, UnicodeString):
-			self.value = value.value
-		else:
-			if type(value) == types.StringType:
-				# Since Numeric interprets char codes as *signed*,
-				# we feed it through the array module.
-				value = array.array("B", value)
-			self.value = Numeric.array(value, Numeric.Int16)
-	
-	def __len__(self):
-		return len(self.value)
-	
-	#def __hash__(self):
-	#	return hash(self.value.tostring())
-	#
-	#def __cmp__(self, other):
-	#	if not isinstance(other, UnicodeString):
-	#		return 1
-	#	else:
-	#		return not Numeric.alltrue(
-	#				Numeric.equal(self.value, other.value))
-	
-	def __add__(self, other):
-		if not isinstance(other, UnicodeString):
-			other = self.__class__(other)
-		return self.__class__(Numeric.concatenate((self.value, other.value)))
-	
-	def __radd__(self, other):
-		if not isinstance(other, UnicodeString):
-			other = self.__class__(other)
-		return self.__class__(Numeric.concatenate((other.value, self.value)))
-	
-	def __getslice__(self, i, j):
-		return self.__class__(self.value[i:j])
-	
-	def __getitem__(self, i):
-		return self.__class__(self.value[i:i+1])
-	
-	def tostring(self):
-		value = self.value
-		if ttLib.endian <> "big":
-			value = value.byteswapped()
-		return value.tostring()
-	
-	def stripped(self):
-		value = self.value
-		i = 0
-		for i in range(len(value)):
-			if value[i] not in (0xa, 0xd, 0x9, 0x20):
-				break
-		value = value[i:]
-		i = 0
-		for i in range(len(value)-1, -1, -1):
-			if value[i] not in (0xa, 0xd, 0x9, 0x20):
-				break
-		value = value[:i+1]
-		return self.__class__(value)
-	
-	def __repr__(self):
-		return "<%s %s at %x>" % (self.__class__.__name__, `self.value.tostring()`, id(self))
-
-
-class UnicodeProcessor(xmlproc.XMLProcessor):
-	
-	def parse_charref(self):
-		"Parses a character reference."
-		
-		if self.now_at("x"):
-			digs=unhex(self.get_match(xmlproc.reg_hex_digits))
-		else:
-			try:
-				digs=string.atoi(self.get_match(xmlproc.reg_digits))
-			except ValueError,e:
-				self.report_error(3027)
-				digs=None
-		if digs == 169:
-			pass
-		if not self.now_at(";"): self.report_error(3005,";")
-		if digs==None: return
-		
-		if not (digs==9 or digs==10 or digs==13 or \
-				(digs>=32 and digs<=255)):
-			if digs>255:
-				self.app.handle_data(UnicodeString([digs]),0,1)
-			else:
-				# hrm, I need to let some null bytes go through...
-				self.app.handle_data(chr(digs),0,1)
-				#self.report_error(3018,digs)
-		else:
-			if self.stack==[]:
-				self.report_error(3028)
-			self.app.handle_data(chr(digs),0,1)
-
-
-class XMLErrorHandler(xmlproc.ErrorHandler):
-	
-	def fatal(self, msg):
-		"Handles a fatal error message."
-		# we don't want no stinkin' sys.exit(1)
-		raise xml_parse_error, msg
-
-
-class XMLApplication(xmlproc.Application):
+class ExpatParser:
 	
 	def __init__(self, ttFont, progress=None):
+		from xml.parsers.expat import ParserCreate
 		self.ttFont = ttFont
 		self.progress = progress
 		self.root = None
-		self.content_stack = []
+		self.contentStack = []
 		self.lastpos = 0
+		self.stackSize = 0
+		self.parser = ParserCreate()
+		self.parser.returns_unicode = 0
+		self.parser.StartElementHandler = self.StartElementHandler
+		self.parser.EndElementHandler = self.EndElementHandler
+		self.parser.CharacterDataHandler = self.CharacterDataHandler
 	
-	def handle_start_tag(self, name, attrs):
-		if self.progress:
+	def ParseFile(self, file):
+		self.parser.ParseFile(file)
+	
+	def StartElementHandler(self, name, attrs):
+		if 0 and self.progress:
+			# XXX
 			pos = self.locator.pos + self.locator.block_offset
 			if (pos - self.lastpos) > 4000:
 				self.progress.set(pos / 100)
 				self.lastpos = pos
-		stack = self.locator.stack
-		stackSize = len(stack)
+		stackSize = self.stackSize
+		self.stackSize = self.stackSize + 1
 		if not stackSize:
 			if name <> "ttFont":
-				raise xml_parse_error, "illegal root tag: %s" % name
+				raise TTXParseError, "illegal root tag: %s" % name
 			sfntVersion = attrs.get("sfntVersion", "\000\001\000\000")
 			if len(sfntVersion) <> 4:
 				sfntVersion = safeEval('"' + sfntVersion + '"')
 			self.ttFont.sfntVersion = sfntVersion
-			self.content_stack.append([])
+			self.contentStack.append([])
 		elif stackSize == 1:
 			msg = "Parsing '%s' table..." % ttLib.xmltag2tag(name)
 			if self.progress:
@@ -157,31 +62,30 @@ class XMLApplication(xmlproc.Application):
 				if tableClass is None:
 					tableClass = DefaultTable
 			if self.ttFont.has_key(tag):
-				self.current_table = self.ttFont[tag]
+				self.currentTable = self.ttFont[tag]
 			else:
-				self.current_table = tableClass(tag)
-				self.ttFont[tag] = self.current_table
-			self.content_stack.append([])
+				self.currentTable = tableClass(tag)
+				self.ttFont[tag] = self.currentTable
+			self.contentStack.append([])
 		elif stackSize == 2:
-			self.content_stack.append([])
-			self.root = (name, attrs, self.content_stack[-1])
+			self.contentStack.append([])
+			self.root = (name, attrs, self.contentStack[-1])
 		else:
 			list = []
-			self.content_stack[-1].append((name, attrs, list))
-			self.content_stack.append(list)
+			self.contentStack[-1].append((name, attrs, list))
+			self.contentStack.append(list)
 	
-	def handle_data(self, data, start, end):
-		if len(self.locator.stack) > 1:
-			self.content_stack[-1].append(data[start:end])
+	def CharacterDataHandler(self, data):
+		if self.stackSize > 1:
+			self.contentStack[-1].append(data)
 	
-	def handle_end_tag(self, name):
-		del self.content_stack[-1]
-		stack = self.locator.stack
-		stackSize = len(stack)
-		if stackSize == 1:
+	def EndElementHandler(self, name):
+		self.stackSize = self.stackSize - 1
+		del self.contentStack[-1]
+		if self.stackSize == 1:
 			self.root = None
-		elif stackSize == 2:
-			self.current_table.fromXML(self.root, self.ttFont)
+		elif self.stackSize == 2:
+			self.currentTable.fromXML(self.root, self.ttFont)
 			self.root = None
 
 
@@ -200,3 +104,12 @@ class ProgressPrinter:
 		print text
 
 
+def importXML(ttFont, fileName, progress=None):
+	"""Import a TTX file (an XML-based text format), so as to recreate
+	a font object.
+	"""
+	p = ExpatParser(ttFont, progress)
+	file = open(fileName)
+	p.ParseFile(file)
+	file.close()
+