From 9f846d36870c5d9bd97355ea2f8b927cd6a2d514 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Mon, 27 Jul 2015 19:24:02 +0100 Subject: [PATCH] [xmlReader_test] test newlines gets normalised by expat parser, unless escaped --- Lib/fontTools/misc/xmlReader_test.py | 30 ++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/Lib/fontTools/misc/xmlReader_test.py b/Lib/fontTools/misc/xmlReader_test.py index 2081ba099..e468f8e70 100644 --- a/Lib/fontTools/misc/xmlReader_test.py +++ b/Lib/fontTools/misc/xmlReader_test.py @@ -46,6 +46,36 @@ class TestXMLReader(unittest.TestCase): content = strjoin(reader.contents[0]).strip() self.assertEqual(expected, content) + def test_normalise_newlines(self): + + class DebugXMLReader(XMLReader): + + def __init__(self, fileName, ttFont, progress=None, quiet=False): + super(DebugXMLReader, self).__init__( + fileName, ttFont, progress, quiet) + self.newlines = [] + + def _characterDataHandler(self, data): + self.newlines.extend([c for c in data if c in ('\r', '\n')]) + + # notice how when CR is escaped, it is not normalised by the XML parser + data = ( + '\r' # \r -> \n + ' \r\n' # \r\n -> \n + ' a line of text\n' # \n + ' escaped CR and unix newline \n' # \n -> \r\n + ' escaped CR and macintosh newline \r' # \r -> \r\n + ' escaped CR and windows newline \r\n' # \r\n -> \r\n + ' \n' # \n + '') + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmp.write(data.encode('utf-8')) + reader = DebugXMLReader(tmp.name, TTFont(), quiet=True) + reader.read() + os.remove(tmp.name) + expected = ['\n'] * 3 + ['\r', '\n'] * 3 + ['\n'] + self.assertEqual(expected, reader.newlines) + if __name__ == '__main__': unittest.main()