# -*- coding: utf-8 -*- from __future__ import print_function, division, absolute_import, unicode_literals from fontTools.misc.py23 import * import os import unittest from fontTools.ttLib import TTFont from .xmlReader import XMLReader import tempfile class TestXMLReader(unittest.TestCase): def test_decode_utf8(self): class DebugXMLReader(XMLReader): def __init__(self, fileName, ttFont, progress=None, quiet=False): super(DebugXMLReader, self).__init__( fileName, ttFont, progress, quiet) self.contents = [] def _endElementHandler(self, name): if self.stackSize == 3: name, attrs, content = self.root self.contents.append(content) super(DebugXMLReader, self)._endElementHandler(name) expected = 'fôôbär' data = '''\ %s ''' % expected with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp.write(data.encode('utf-8')) reader = DebugXMLReader(tmp.name, TTFont(), quiet=True) reader.read() os.remove(tmp.name) content = strjoin(reader.contents[0]).strip() self.assertEqual(expected, content) def test_normalise_newlines(self): class DebugXMLReader(XMLReader): def __init__(self, fileName, ttFont, progress=None, quiet=False): super(DebugXMLReader, self).__init__( fileName, ttFont, progress, quiet) self.newlines = [] def _characterDataHandler(self, data): self.newlines.extend([c for c in data if c in ('\r', '\n')]) # notice how when CR is escaped, it is not normalised by the XML parser data = ( '\r' # \r -> \n ' \r\n' # \r\n -> \n ' a line of text\n' # \n ' escaped CR and unix newline \n' # \n -> \r\n ' escaped CR and macintosh newline \r' # \r -> \r\n ' escaped CR and windows newline \r\n' # \r\n -> \r\n ' \n' # \n '') with tempfile.NamedTemporaryFile(delete=False) as tmp: tmp.write(data.encode('utf-8')) reader = DebugXMLReader(tmp.name, TTFont(), quiet=True) reader.read() os.remove(tmp.name) expected = ['\n'] * 3 + ['\r', '\n'] * 3 + ['\n'] self.assertEqual(expected, reader.newlines) if __name__ == '__main__': unittest.main()