git-svn-id: http://svn.robofab.com/branches/ufo3k@287 b5fa9d6c-a76f-4ffd-b3cb-f825fc41095c
117 lines
2.5 KiB
Python
117 lines
2.5 KiB
Python
import os
|
|
try:
|
|
from xml.parsers.expat import ParserCreate
|
|
except ImportError:
|
|
_haveExpat = 0
|
|
from xml.parsers.xmlproc.xmlproc import XMLProcessor
|
|
else:
|
|
_haveExpat = 1
|
|
|
|
|
|
class XMLParser:
|
|
|
|
def __init__(self):
|
|
self.root = []
|
|
self.current = (self.root, None)
|
|
|
|
def getRoot(self):
|
|
assert len(self.root) == 1
|
|
return self.root[0]
|
|
|
|
def startElementHandler(self, name, attrs):
|
|
children = []
|
|
self.current = (children, name, attrs, self.current)
|
|
|
|
def endElementHandler(self, name):
|
|
children, name, attrs, previous = self.current
|
|
previous[0].append((name, attrs, children))
|
|
self.current = previous
|
|
|
|
def characterDataHandler(self, data):
|
|
nodes = self.current[0]
|
|
if nodes and type(nodes[-1]) == type(data):
|
|
nodes[-1] = nodes[-1] + data
|
|
else:
|
|
nodes.append(data)
|
|
|
|
def _expatParseFile(self, pathOrFile):
|
|
parser = ParserCreate()
|
|
parser.returns_unicode = 0 # XXX, Don't remember why. It sucks, though.
|
|
parser.StartElementHandler = self.startElementHandler
|
|
parser.EndElementHandler = self.endElementHandler
|
|
parser.CharacterDataHandler = self.characterDataHandler
|
|
if isinstance(pathOrFile, (str, unicode)):
|
|
f = open(pathOrFile)
|
|
didOpen = 1
|
|
else:
|
|
didOpen = 0
|
|
f = pathOrFile
|
|
parser.ParseFile(f)
|
|
if didOpen:
|
|
f.close()
|
|
return self.getRoot()
|
|
|
|
def _xmlprocDataHandler(self, data, begin, end):
|
|
self.characterDataHandler(data[begin:end])
|
|
|
|
def _xmlprocParseFile(self, pathOrFile):
|
|
proc = XMLProcessor()
|
|
proc.app.handle_start_tag = self.startElementHandler
|
|
proc.app.handle_end_tag = self.endElementHandler
|
|
proc.app.handle_data = self._xmlprocDataHandler
|
|
if isinstance(pathOrFile, (str, unicode)):
|
|
f = open(pathOrFile)
|
|
didOpen = 1
|
|
else:
|
|
didOpen = 0
|
|
f = pathOrFile
|
|
proc.parseStart()
|
|
proc.read_from(f)
|
|
proc.flush()
|
|
proc.parseEnd()
|
|
proc.deref()
|
|
if didOpen:
|
|
f.close()
|
|
return self.getRoot()
|
|
|
|
if _haveExpat:
|
|
parseFile = _expatParseFile
|
|
else:
|
|
parseFile = _xmlprocParseFile
|
|
|
|
|
|
def stripCharacterData(nodes, recursive=True):
|
|
i = 0
|
|
while 1:
|
|
try:
|
|
node = nodes[i]
|
|
except IndexError:
|
|
break
|
|
if isinstance(node, tuple):
|
|
if recursive:
|
|
stripCharacterData(node[2])
|
|
i = i + 1
|
|
else:
|
|
node = node.strip()
|
|
if node:
|
|
nodes[i] = node
|
|
i = i + 1
|
|
else:
|
|
del nodes[i]
|
|
|
|
|
|
def buildTree(pathOrFile, stripData=1):
|
|
parser = XMLParser()
|
|
tree = parser.parseFile(pathOrFile)
|
|
if stripData:
|
|
stripCharacterData(tree[2])
|
|
return tree
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from pprint import pprint
|
|
import sys
|
|
strip = bool(sys.argv[2:])
|
|
tree = buildTree(sys.argv[1], strip)
|
|
pprint(tree)
|