From 1f3380300bd99797fec0f5b6769af22ff38c84f7 Mon Sep 17 00:00:00 2001 From: Harry Dalton Date: Thu, 9 Mar 2023 16:11:32 +0000 Subject: [PATCH 1/2] Wrap XML library exceptions with glifLib types when parsing glifs This allows dependent projects to catch errors parsing glifs without requiring logic to account for which XML library fonttools is using internally (e.g. for implementing fonttools/ufoLib2#264). This commit also adds tests to ensure that the exception we expose when glifs have invalid syntax remains stable across future releases. --- Lib/fontTools/ufoLib/glifLib.py | 12 ++++++++---- Tests/ufoLib/glifLib_test.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/Lib/fontTools/ufoLib/glifLib.py b/Lib/fontTools/ufoLib/glifLib.py index 0461d85e8..49e74bfcd 100755 --- a/Lib/fontTools/ufoLib/glifLib.py +++ b/Lib/fontTools/ufoLib/glifLib.py @@ -1082,10 +1082,14 @@ def _glifTreeFromFile(aFile): def _glifTreeFromString(aString): data = tobytes(aString, encoding="utf-8") - if etree._have_lxml: - root = etree.fromstring(data, parser=etree.XMLParser(remove_comments=True)) - else: - root = etree.fromstring(data) + try: + if etree._have_lxml: + root = etree.fromstring(data, parser=etree.XMLParser(remove_comments=True)) + else: + root = etree.fromstring(data) + except Exception as etree_exception: + raise GlifLibError("GLIF contains invalid XML.") from etree_exception + if root.tag != "glyph": raise GlifLibError("The GLIF is not properly formatted.") if root.text and root.text.strip() != "": diff --git a/Tests/ufoLib/glifLib_test.py b/Tests/ufoLib/glifLib_test.py index 7c5c9fd1c..ba5b1e7a4 100644 --- a/Tests/ufoLib/glifLib_test.py +++ b/Tests/ufoLib/glifLib_test.py @@ -3,6 +3,7 @@ import os import tempfile import shutil import unittest +from pathlib import Path from io import open from .testSupport import getDemoFontGlyphSetPath from fontTools.ufoLib.glifLib import ( @@ -134,6 +135,27 @@ class GlyphSetTests(unittest.TestCase): else: self.assertEqual(g.unicodes, unicodes[glyphName]) + def testReadGlyphInvalidXml(self): + """Test that calling readGlyph() to read a .glif with invalid XML raises + a library error, instead of an exception from the XML dependency that is + used internally.""" + + # Create a glyph set with three empty glyphs. + glyph_set = GlyphSet(self.dstDir) + glyph_set.writeGlyph("a", _Glyph()) + glyph_set.writeGlyph("b", _Glyph()) + glyph_set.writeGlyph("c", _Glyph()) + + # Corrupt the XML of /c. + invalid_xml = b"" + Path(self.dstDir, glyph_set.contents["c"]).write_bytes(invalid_xml) + + # Confirm that reading /c raises an appropriate error. + glyph_set.readGlyph("a", _Glyph()) + glyph_set.readGlyph("b", _Glyph()) + with pytest.raises(GlifLibError, match="GLIF contains invalid XML"): + glyph_set.readGlyph("c", _Glyph()) + class FileNameTest: def test_default_file_name_scheme(self): @@ -228,6 +250,17 @@ class ReadWriteFuncTest: assert g.width == 1290 assert g.unicodes == [0x0041] + def test_read_invalid_xml(self): + """Test that calling readGlyphFromString() with invalid XML raises a + library error, instead of an exception from the XML dependency that is + used internally.""" + + invalid_xml = b"" + empty_glyph = _Glyph() + + with pytest.raises(GlifLibError, match="GLIF contains invalid XML"): + readGlyphFromString(invalid_xml, empty_glyph) + def test_read_unsupported_format_version(self, caplog): s = """ From 70ca6dec9b19f55410c3a32f15748fd2cc6e95c0 Mon Sep 17 00:00:00 2001 From: Harry Dalton Date: Tue, 14 Mar 2023 15:58:18 +0000 Subject: [PATCH 2/2] Identify the culprit glif in read errors with a loose backport of PEP678 This commit annotates errors from GlyphSet.readGlyph() with the details of the glyph that originated them (e.g. name, path to glif). This is implemented with a loose backport of PEP678, to avoid adding a wrapper error that would be less specific and would break API compatibility. In addition, this commit adds a test to ensure that the new details are present (specifically, in the case of parsing invalid XML). --- Lib/fontTools/ufoLib/errors.py | 10 ++++++++- Lib/fontTools/ufoLib/glifLib.py | 38 +++++++++++++++++++++++---------- Tests/ufoLib/glifLib_test.py | 13 ++++++++--- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/Lib/fontTools/ufoLib/errors.py b/Lib/fontTools/ufoLib/errors.py index f4a181c41..e05dd438b 100644 --- a/Lib/fontTools/ufoLib/errors.py +++ b/Lib/fontTools/ufoLib/errors.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + class UFOLibError(Exception): pass @@ -7,7 +10,12 @@ class UnsupportedUFOFormat(UFOLibError): class GlifLibError(UFOLibError): - pass + def _add_note(self, note: str) -> None: + # Loose backport of PEP 678 until we only support Python 3.11+, used for + # adding additional context to errors. + # TODO: Replace with https://docs.python.org/3.11/library/exceptions.html#BaseException.add_note + (message, *rest) = self.args + self.args = ((message + "\n" + note), *rest) class UnsupportedGLIFFormat(GlifLibError): diff --git a/Lib/fontTools/ufoLib/glifLib.py b/Lib/fontTools/ufoLib/glifLib.py index 49e74bfcd..6dee9db30 100755 --- a/Lib/fontTools/ufoLib/glifLib.py +++ b/Lib/fontTools/ufoLib/glifLib.py @@ -416,17 +416,33 @@ class GlyphSet(_UFOBaseIO): if validate is None: validate = self._validateRead text = self.getGLIF(glyphName) - tree = _glifTreeFromString(text) - formatVersions = GLIFFormatVersion.supported_versions( - self.ufoFormatVersionTuple - ) - _readGlyphFromTree( - tree, - glyphObject, - pointPen, - formatVersions=formatVersions, - validate=validate, - ) + try: + tree = _glifTreeFromString(text) + formatVersions = GLIFFormatVersion.supported_versions( + self.ufoFormatVersionTuple + ) + _readGlyphFromTree( + tree, + glyphObject, + pointPen, + formatVersions=formatVersions, + validate=validate, + ) + except GlifLibError as glifLibError: + # Re-raise with a note that gives extra context, describing where + # the error occurred. + fileName = self.contents[glyphName] + try: + glifLocation = f"'{self.fs.getsyspath(fileName)}'" + except fs.errors.NoSysPath: + # Network or in-memory FS may not map to a local path, so use + # the best string representation we have. + glifLocation = f"'{fileName}' from '{str(self.fs)}'" + + glifLibError._add_note( + f"The issue is in glyph '{glyphName}', located in {glifLocation}." + ) + raise def writeGlyph( self, diff --git a/Tests/ufoLib/glifLib_test.py b/Tests/ufoLib/glifLib_test.py index ba5b1e7a4..8f48168d8 100644 --- a/Tests/ufoLib/glifLib_test.py +++ b/Tests/ufoLib/glifLib_test.py @@ -138,7 +138,8 @@ class GlyphSetTests(unittest.TestCase): def testReadGlyphInvalidXml(self): """Test that calling readGlyph() to read a .glif with invalid XML raises a library error, instead of an exception from the XML dependency that is - used internally.""" + used internally. In addition, check that the raised exception describes + the glyph by name and gives the location of the broken .glif file.""" # Create a glyph set with three empty glyphs. glyph_set = GlyphSet(self.dstDir) @@ -150,10 +151,16 @@ class GlyphSetTests(unittest.TestCase): invalid_xml = b"" Path(self.dstDir, glyph_set.contents["c"]).write_bytes(invalid_xml) - # Confirm that reading /c raises an appropriate error. + # Confirm that reading /a and /b is fine... glyph_set.readGlyph("a", _Glyph()) glyph_set.readGlyph("b", _Glyph()) - with pytest.raises(GlifLibError, match="GLIF contains invalid XML"): + + # ...but that reading /c raises a descriptive library error. + expected_message = ( + r"GLIF contains invalid XML\.\n" + r"The issue is in glyph 'c', located in '.*c\.glif.*\." + ) + with pytest.raises(GlifLibError, match=expected_message): glyph_set.readGlyph("c", _Glyph())