By default (newlinestr=None), the XMLWriter will still use the `os.linesep` as the newline string. Otherwise, it will use the specified `newlinestr`. This is useful when TTX files under version control are being written from multiple platforms; in which case, one usually wants to always use one specific line ending (most likely LF, which is what the XML spec itself normalizes it to).
185 lines
4.7 KiB
Python
185 lines
4.7 KiB
Python
"""xmlWriter.py -- Simple XML authoring class"""
|
|
|
|
from __future__ import print_function, division, absolute_import
|
|
from fontTools.misc.py23 import *
|
|
import sys
|
|
import os
|
|
import string
|
|
|
|
INDENT = " "
|
|
|
|
|
|
class XMLWriter(object):
|
|
|
|
def __init__(self, fileOrPath, indentwhite=INDENT, idlefunc=None, encoding="utf_8",
|
|
newlinestr=None):
|
|
if encoding.lower().replace('-','').replace('_','') != 'utf8':
|
|
raise Exception('Only UTF-8 encoding is supported.')
|
|
if fileOrPath == '-':
|
|
fileOrPath = sys.stdout
|
|
if not hasattr(fileOrPath, "write"):
|
|
self.file = open(fileOrPath, "wb")
|
|
else:
|
|
# assume writable file object
|
|
self.file = fileOrPath
|
|
|
|
# Figure out if writer expects bytes or unicodes
|
|
try:
|
|
# The bytes check should be first. See:
|
|
# https://github.com/behdad/fonttools/pull/233
|
|
self.file.write(b'')
|
|
self.totype = tobytes
|
|
except TypeError:
|
|
# This better not fail.
|
|
self.file.write(tounicode(''))
|
|
self.totype = tounicode
|
|
self.indentwhite = self.totype(indentwhite)
|
|
if newlinestr is None:
|
|
self.newlinestr = self.totype(os.linesep)
|
|
else:
|
|
self.newlinestr = self.totype(newlinestr)
|
|
self.indentlevel = 0
|
|
self.stack = []
|
|
self.needindent = 1
|
|
self.idlefunc = idlefunc
|
|
self.idlecounter = 0
|
|
self._writeraw('<?xml version="1.0" encoding="UTF-8"?>')
|
|
self.newline()
|
|
|
|
def close(self):
|
|
self.file.close()
|
|
|
|
def write(self, string, indent=True):
|
|
"""Writes text."""
|
|
self._writeraw(escape(string), indent=indent)
|
|
|
|
def writecdata(self, string):
|
|
"""Writes text in a CDATA section."""
|
|
self._writeraw("<![CDATA[" + string + "]]>")
|
|
|
|
def write8bit(self, data, strip=False):
|
|
"""Writes a bytes() sequence into the XML, escaping
|
|
non-ASCII bytes. When this is read in xmlReader,
|
|
the original bytes can be recovered by encoding to
|
|
'latin-1'."""
|
|
self._writeraw(escape8bit(data), strip=strip)
|
|
|
|
def write_noindent(self, string):
|
|
"""Writes text without indentation."""
|
|
self._writeraw(escape(string), indent=False)
|
|
|
|
def _writeraw(self, data, indent=True, strip=False):
|
|
"""Writes bytes, possibly indented."""
|
|
if indent and self.needindent:
|
|
self.file.write(self.indentlevel * self.indentwhite)
|
|
self.needindent = 0
|
|
s = self.totype(data, encoding="utf_8")
|
|
if (strip):
|
|
s = s.strip()
|
|
self.file.write(s)
|
|
|
|
def newline(self):
|
|
self.file.write(self.newlinestr)
|
|
self.needindent = 1
|
|
idlecounter = self.idlecounter
|
|
if not idlecounter % 100 and self.idlefunc is not None:
|
|
self.idlefunc()
|
|
self.idlecounter = idlecounter + 1
|
|
|
|
def comment(self, data):
|
|
data = escape(data)
|
|
lines = data.split("\n")
|
|
self._writeraw("<!-- " + lines[0])
|
|
for line in lines[1:]:
|
|
self.newline()
|
|
self._writeraw(" " + line)
|
|
self._writeraw(" -->")
|
|
|
|
def simpletag(self, _TAG_, *args, **kwargs):
|
|
attrdata = self.stringifyattrs(*args, **kwargs)
|
|
data = "<%s%s/>" % (_TAG_, attrdata)
|
|
self._writeraw(data)
|
|
|
|
def begintag(self, _TAG_, *args, **kwargs):
|
|
attrdata = self.stringifyattrs(*args, **kwargs)
|
|
data = "<%s%s>" % (_TAG_, attrdata)
|
|
self._writeraw(data)
|
|
self.stack.append(_TAG_)
|
|
self.indent()
|
|
|
|
def endtag(self, _TAG_):
|
|
assert self.stack and self.stack[-1] == _TAG_, "nonmatching endtag"
|
|
del self.stack[-1]
|
|
self.dedent()
|
|
data = "</%s>" % _TAG_
|
|
self._writeraw(data)
|
|
|
|
def dumphex(self, data):
|
|
linelength = 16
|
|
hexlinelength = linelength * 2
|
|
chunksize = 8
|
|
for i in range(0, len(data), linelength):
|
|
hexline = hexStr(data[i:i+linelength])
|
|
line = ""
|
|
white = ""
|
|
for j in range(0, hexlinelength, chunksize):
|
|
line = line + white + hexline[j:j+chunksize]
|
|
white = " "
|
|
self._writeraw(line)
|
|
self.newline()
|
|
|
|
def indent(self):
|
|
self.indentlevel = self.indentlevel + 1
|
|
|
|
def dedent(self):
|
|
assert self.indentlevel > 0
|
|
self.indentlevel = self.indentlevel - 1
|
|
|
|
def stringifyattrs(self, *args, **kwargs):
|
|
if kwargs:
|
|
assert not args
|
|
attributes = sorted(kwargs.items())
|
|
elif args:
|
|
assert len(args) == 1
|
|
attributes = args[0]
|
|
else:
|
|
return ""
|
|
data = ""
|
|
for attr, value in attributes:
|
|
if not isinstance(value, (bytes, unicode)):
|
|
value = str(value)
|
|
data = data + ' %s="%s"' % (attr, escapeattr(value))
|
|
return data
|
|
|
|
|
|
def escape(data):
|
|
data = tostr(data, 'utf_8')
|
|
data = data.replace("&", "&")
|
|
data = data.replace("<", "<")
|
|
data = data.replace(">", ">")
|
|
data = data.replace("\r", " ")
|
|
return data
|
|
|
|
def escapeattr(data):
|
|
data = escape(data)
|
|
data = data.replace('"', """)
|
|
return data
|
|
|
|
def escape8bit(data):
|
|
"""Input is Unicode string."""
|
|
def escapechar(c):
|
|
n = ord(c)
|
|
if 32 <= n <= 127 and c not in "<&>":
|
|
return c
|
|
else:
|
|
return "&#" + repr(n) + ";"
|
|
return strjoin(map(escapechar, data.decode('latin-1')))
|
|
|
|
def hexStr(s):
|
|
h = string.hexdigits
|
|
r = ''
|
|
for c in s:
|
|
i = byteord(c)
|
|
r = r + h[(i >> 4) & 0xF] + h[i & 0xF]
|
|
return r
|