diff --git a/Lib/fontTools/ufoLib/errors.py b/Lib/fontTools/ufoLib/errors.py index f4a181c41..e05dd438b 100644 --- a/Lib/fontTools/ufoLib/errors.py +++ b/Lib/fontTools/ufoLib/errors.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + class UFOLibError(Exception): pass @@ -7,7 +10,12 @@ class UnsupportedUFOFormat(UFOLibError): class GlifLibError(UFOLibError): - pass + def _add_note(self, note: str) -> None: + # Loose backport of PEP 678 until we only support Python 3.11+, used for + # adding additional context to errors. + # TODO: Replace with https://docs.python.org/3.11/library/exceptions.html#BaseException.add_note + (message, *rest) = self.args + self.args = ((message + "\n" + note), *rest) class UnsupportedGLIFFormat(GlifLibError): diff --git a/Lib/fontTools/ufoLib/glifLib.py b/Lib/fontTools/ufoLib/glifLib.py index 0461d85e8..6dee9db30 100755 --- a/Lib/fontTools/ufoLib/glifLib.py +++ b/Lib/fontTools/ufoLib/glifLib.py @@ -416,17 +416,33 @@ class GlyphSet(_UFOBaseIO): if validate is None: validate = self._validateRead text = self.getGLIF(glyphName) - tree = _glifTreeFromString(text) - formatVersions = GLIFFormatVersion.supported_versions( - self.ufoFormatVersionTuple - ) - _readGlyphFromTree( - tree, - glyphObject, - pointPen, - formatVersions=formatVersions, - validate=validate, - ) + try: + tree = _glifTreeFromString(text) + formatVersions = GLIFFormatVersion.supported_versions( + self.ufoFormatVersionTuple + ) + _readGlyphFromTree( + tree, + glyphObject, + pointPen, + formatVersions=formatVersions, + validate=validate, + ) + except GlifLibError as glifLibError: + # Re-raise with a note that gives extra context, describing where + # the error occurred. + fileName = self.contents[glyphName] + try: + glifLocation = f"'{self.fs.getsyspath(fileName)}'" + except fs.errors.NoSysPath: + # Network or in-memory FS may not map to a local path, so use + # the best string representation we have. + glifLocation = f"'{fileName}' from '{str(self.fs)}'" + + glifLibError._add_note( + f"The issue is in glyph '{glyphName}', located in {glifLocation}." + ) + raise def writeGlyph( self, @@ -1082,10 +1098,14 @@ def _glifTreeFromFile(aFile): def _glifTreeFromString(aString): data = tobytes(aString, encoding="utf-8") - if etree._have_lxml: - root = etree.fromstring(data, parser=etree.XMLParser(remove_comments=True)) - else: - root = etree.fromstring(data) + try: + if etree._have_lxml: + root = etree.fromstring(data, parser=etree.XMLParser(remove_comments=True)) + else: + root = etree.fromstring(data) + except Exception as etree_exception: + raise GlifLibError("GLIF contains invalid XML.") from etree_exception + if root.tag != "glyph": raise GlifLibError("The GLIF is not properly formatted.") if root.text and root.text.strip() != "": diff --git a/Tests/ufoLib/glifLib_test.py b/Tests/ufoLib/glifLib_test.py index 7c5c9fd1c..8f48168d8 100644 --- a/Tests/ufoLib/glifLib_test.py +++ b/Tests/ufoLib/glifLib_test.py @@ -3,6 +3,7 @@ import os import tempfile import shutil import unittest +from pathlib import Path from io import open from .testSupport import getDemoFontGlyphSetPath from fontTools.ufoLib.glifLib import ( @@ -134,6 +135,34 @@ class GlyphSetTests(unittest.TestCase): else: self.assertEqual(g.unicodes, unicodes[glyphName]) + def testReadGlyphInvalidXml(self): + """Test that calling readGlyph() to read a .glif with invalid XML raises + a library error, instead of an exception from the XML dependency that is + used internally. In addition, check that the raised exception describes + the glyph by name and gives the location of the broken .glif file.""" + + # Create a glyph set with three empty glyphs. + glyph_set = GlyphSet(self.dstDir) + glyph_set.writeGlyph("a", _Glyph()) + glyph_set.writeGlyph("b", _Glyph()) + glyph_set.writeGlyph("c", _Glyph()) + + # Corrupt the XML of /c. + invalid_xml = b"" + Path(self.dstDir, glyph_set.contents["c"]).write_bytes(invalid_xml) + + # Confirm that reading /a and /b is fine... + glyph_set.readGlyph("a", _Glyph()) + glyph_set.readGlyph("b", _Glyph()) + + # ...but that reading /c raises a descriptive library error. + expected_message = ( + r"GLIF contains invalid XML\.\n" + r"The issue is in glyph 'c', located in '.*c\.glif.*\." + ) + with pytest.raises(GlifLibError, match=expected_message): + glyph_set.readGlyph("c", _Glyph()) + class FileNameTest: def test_default_file_name_scheme(self): @@ -228,6 +257,17 @@ class ReadWriteFuncTest: assert g.width == 1290 assert g.unicodes == [0x0041] + def test_read_invalid_xml(self): + """Test that calling readGlyphFromString() with invalid XML raises a + library error, instead of an exception from the XML dependency that is + used internally.""" + + invalid_xml = b"" + empty_glyph = _Glyph() + + with pytest.raises(GlifLibError, match="GLIF contains invalid XML"): + readGlyphFromString(invalid_xml, empty_glyph) + def test_read_unsupported_format_version(self, caplog): s = """