Harry Dalton 70ca6dec9b Identify the culprit glif in read errors with a loose backport of PEP678
This commit annotates errors from GlyphSet.readGlyph() with the details
of the glyph that originated them (e.g. name, path to glif). This is
implemented with a loose backport of PEP678, to avoid adding a wrapper
error that would be less specific and would break API compatibility.

In addition, this commit adds a test to ensure that the new details are
present (specifically, in the case of parsing invalid XML).
2023-03-14 15:58:18 +00:00

368 lines
13 KiB

import logging
import os
import tempfile
import shutil
import unittest
from pathlib import Path
from io import open
from .testSupport import getDemoFontGlyphSetPath
from fontTools.ufoLib.glifLib import (
from fontTools.ufoLib.errors import (
from fontTools.misc.etree import XML_DECLARATION
from fontTools.pens.recordingPen import RecordingPointPen
import pytest
GLYPHSETDIR = getDemoFontGlyphSetPath()
class GlyphSetTests(unittest.TestCase):
def setUp(self):
self.dstDir = tempfile.mktemp()
def tearDown(self):
def testRoundTrip(self):
import difflib
dstDir = self.dstDir
src = GlyphSet(
srcDir, ufoFormatVersion=2, validateRead=True, validateWrite=True
dst = GlyphSet(
dstDir, ufoFormatVersion=2, validateRead=True, validateWrite=True
for glyphName in src.keys():
g = src[glyphName]
g.drawPoints(None) # load attrs
dst.writeGlyph(glyphName, g, g.drawPoints)
# compare raw file data:
for glyphName in sorted(src.keys()):
fileName = src.contents[glyphName]
with open(os.path.join(srcDir, fileName), "r") as f:
org =
with open(os.path.join(dstDir, fileName), "r") as f:
new =
added = []
removed = []
for line in difflib.unified_diff(org.split("\n"), new.split("\n")):
if line.startswith("+ "):
elif line.startswith("- "):
added, removed, "%s.glif file differs after round tripping" % glyphName
def testContentsExist(self):
with self.assertRaises(GlifLibError):
def testRebuildContents(self):
gset = GlyphSet(GLYPHSETDIR, validateRead=True, validateWrite=True)
contents = gset.contents
self.assertEqual(contents, gset.contents)
def testReverseContents(self):
gset = GlyphSet(GLYPHSETDIR, validateRead=True, validateWrite=True)
d = {}
for k, v in gset.getReverseContents().items():
d[v] = k
org = {}
for k, v in gset.contents.items():
org[k] = v.lower()
self.assertEqual(d, org)
def testReverseContents2(self):
src = GlyphSet(GLYPHSETDIR, validateRead=True, validateWrite=True)
dst = GlyphSet(self.dstDir, validateRead=True, validateWrite=True)
dstMap = dst.getReverseContents()
self.assertEqual(dstMap, {})
for glyphName in src.keys():
g = src[glyphName]
g.drawPoints(None) # load attrs
dst.writeGlyph(glyphName, g, g.drawPoints)
self.assertNotEqual(dstMap, {})
srcMap = dict(src.getReverseContents()) # copy
self.assertEqual(dstMap, srcMap)
del srcMap["a.glif"]
self.assertEqual(dstMap, srcMap)
def testCustomFileNamingScheme(self):
def myGlyphNameToFileName(glyphName, glyphSet):
return "prefix" + glyphNameToFileName(glyphName, glyphSet)
src = GlyphSet(GLYPHSETDIR, validateRead=True, validateWrite=True)
dst = GlyphSet(
self.dstDir, myGlyphNameToFileName, validateRead=True, validateWrite=True
for glyphName in src.keys():
g = src[glyphName]
g.drawPoints(None) # load attrs
dst.writeGlyph(glyphName, g, g.drawPoints)
d = {}
for k, v in src.contents.items():
d[k] = "prefix" + v
self.assertEqual(d, dst.contents)
def testGetUnicodes(self):
src = GlyphSet(GLYPHSETDIR, validateRead=True, validateWrite=True)
unicodes = src.getUnicodes()
for glyphName in src.keys():
g = src[glyphName]
g.drawPoints(None) # load attrs
if not hasattr(g, "unicodes"):
self.assertEqual(unicodes[glyphName], [])
self.assertEqual(g.unicodes, unicodes[glyphName])
def testReadGlyphInvalidXml(self):
"""Test that calling readGlyph() to read a .glif with invalid XML raises
a library error, instead of an exception from the XML dependency that is
used internally. In addition, check that the raised exception describes
the glyph by name and gives the location of the broken .glif file."""
# Create a glyph set with three empty glyphs.
glyph_set = GlyphSet(self.dstDir)
glyph_set.writeGlyph("a", _Glyph())
glyph_set.writeGlyph("b", _Glyph())
glyph_set.writeGlyph("c", _Glyph())
# Corrupt the XML of /c.
invalid_xml = b"<abc></def>"
Path(self.dstDir, glyph_set.contents["c"]).write_bytes(invalid_xml)
# Confirm that reading /a and /b is fine...
glyph_set.readGlyph("a", _Glyph())
glyph_set.readGlyph("b", _Glyph())
# ...but that reading /c raises a descriptive library error.
expected_message = (
r"GLIF contains invalid XML\.\n"
r"The issue is in glyph 'c', located in '.*c\.glif.*\."
with pytest.raises(GlifLibError, match=expected_message):
glyph_set.readGlyph("c", _Glyph())
class FileNameTest:
def test_default_file_name_scheme(self):
assert glyphNameToFileName("a", None) == "a.glif"
assert glyphNameToFileName("A", None) == "A_.glif"
assert glyphNameToFileName("Aring", None) == "A_ring.glif"
assert glyphNameToFileName("F_A_B", None) == "F__A__B_.glif"
assert glyphNameToFileName("A.alt", None) == "A_.alt.glif"
assert glyphNameToFileName("A.Alt", None) == "A_.A_lt.glif"
assert glyphNameToFileName(".notdef", None) == "_notdef.glif"
assert glyphNameToFileName("T_H", None) == "T__H_.glif"
assert glyphNameToFileName("T_h", None) == "T__h.glif"
assert glyphNameToFileName("t_h", None) == "t_h.glif"
assert glyphNameToFileName("F_F_I", None) == "F__F__I_.glif"
assert glyphNameToFileName("f_f_i", None) == "f_f_i.glif"
assert glyphNameToFileName("AE", None) == "A_E_.glif"
assert glyphNameToFileName("Ae", None) == "A_e.glif"
assert glyphNameToFileName("ae", None) == "ae.glif"
assert glyphNameToFileName("aE", None) == "aE_.glif"
assert glyphNameToFileName("a.alt", None) == "a.alt.glif"
assert glyphNameToFileName("A.aLt", None) == "A_.aL_t.glif"
assert glyphNameToFileName("A.alT", None) == "A_.alT_.glif"
assert glyphNameToFileName("Aacute_V.swash", None) == "A_acute_V_.swash.glif"
assert glyphNameToFileName(".notdef", None) == "_notdef.glif"
assert glyphNameToFileName("con", None) == "_con.glif"
assert glyphNameToFileName("CON", None) == "C_O_N_.glif"
assert glyphNameToFileName("con.alt", None) == "_con.alt.glif"
assert glyphNameToFileName("alt.con", None) == "alt._con.glif"
def test_conflicting_case_insensitive_file_names(self, tmp_path):
src = GlyphSet(GLYPHSETDIR)
dst = GlyphSet(tmp_path)
glyph = src["a"]
dst.writeGlyph("a", glyph)
dst.writeGlyph("A", glyph)
dst.writeGlyph("a_", glyph)
dst.writeGlyph("a_", glyph)
dst.writeGlyph("A_", glyph)
dst.writeGlyph("i_j", glyph)
assert dst.contents == {
"a": "a.glif",
"A": "A_.glif",
"a_": "a_000000000000001.glif",
"A_": "A__.glif",
"i_j": "i_j.glif",
# make sure filenames are unique even on case-insensitive filesystems
assert len({fileName.lower() for fileName in dst.contents.values()}) == 5
class _Glyph:
class ReadWriteFuncTest:
def test_roundtrip(self):
glyph = _Glyph() = "a"
glyph.unicodes = [0x0061]
s1 = writeGlyphToString(, glyph)
glyph2 = _Glyph()
readGlyphFromString(s1, glyph2)
assert glyph.__dict__ == glyph2.__dict__
s2 = writeGlyphToString(, glyph2)
assert s1 == s2
def test_xml_declaration(self):
s = writeGlyphToString("a", _Glyph())
assert s.startswith(XML_DECLARATION % "UTF-8")
def test_parse_xml_remove_comments(self):
s = b"""<?xml version='1.0' encoding='UTF-8'?>
<!-- a comment -->
<glyph name="A" format="2">
<advance width="1290"/>
<unicode hex="0041"/>
<!-- another comment -->
g = _Glyph()
readGlyphFromString(s, g)
assert == "A"
assert g.width == 1290
assert g.unicodes == [0x0041]
def test_read_invalid_xml(self):
"""Test that calling readGlyphFromString() with invalid XML raises a
library error, instead of an exception from the XML dependency that is
used internally."""
invalid_xml = b"<abc></def>"
empty_glyph = _Glyph()
with pytest.raises(GlifLibError, match="GLIF contains invalid XML"):
readGlyphFromString(invalid_xml, empty_glyph)
def test_read_unsupported_format_version(self, caplog):
s = """<?xml version='1.0' encoding='utf-8'?>
<glyph name="A" format="0" formatMinor="0">
<advance width="500"/>
<unicode hex="0041"/>
with pytest.raises(UnsupportedGLIFFormat):
readGlyphFromString(s, _Glyph()) # validate=True by default
with pytest.raises(UnsupportedGLIFFormat):
readGlyphFromString(s, _Glyph(), validate=True)
with caplog.at_level(logging.WARNING, logger="fontTools.ufoLib.glifLib"):
readGlyphFromString(s, _Glyph(), validate=False)
assert len(caplog.records) == 1
assert "Unsupported GLIF format" in caplog.text
assert "Assuming the latest supported version" in caplog.text
def test_read_allow_format_versions(self):
s = """<?xml version='1.0' encoding='utf-8'?>
<glyph name="A" format="2">
<advance width="500"/>
<unicode hex="0041"/>
# these two calls are are equivalent
readGlyphFromString(s, _Glyph(), formatVersions=[1, 2])
readGlyphFromString(s, _Glyph(), formatVersions=[(1, 0), (2, 0)])
# if at least one supported formatVersion, unsupported ones are ignored
readGlyphFromString(s, _Glyph(), formatVersions=[(2, 0), (123, 456)])
with pytest.raises(
ValueError, match="None of the requested GLIF formatVersions are supported"
readGlyphFromString(s, _Glyph(), formatVersions=[0, 2001])
with pytest.raises(GlifLibError, match="Forbidden GLIF format version"):
readGlyphFromString(s, _Glyph(), formatVersions=[1])
def test_read_ensure_x_y(self):
"""Ensure that a proper GlifLibError is raised when point coordinates are
missing, regardless of validation setting."""
s = """<?xml version='1.0' encoding='utf-8'?>
<glyph name="A" format="2">
<point x="545" y="0" type="line"/>
<point x="638" type="line"/>
pen = RecordingPointPen()
with pytest.raises(GlifLibError, match="Required y attribute"):
readGlyphFromString(s, _Glyph(), pen)
with pytest.raises(GlifLibError, match="Required y attribute"):
readGlyphFromString(s, _Glyph(), pen, validate=False)
def test_GlyphSet_unsupported_ufoFormatVersion(tmp_path, caplog):
with pytest.raises(UnsupportedUFOFormat):
GlyphSet(tmp_path, ufoFormatVersion=0)
with pytest.raises(UnsupportedUFOFormat):
GlyphSet(tmp_path, ufoFormatVersion=(0, 1))
def test_GlyphSet_writeGlyph_formatVersion(tmp_path):
src = GlyphSet(GLYPHSETDIR)
dst = GlyphSet(tmp_path, ufoFormatVersion=(2, 0))
glyph = src["A"]
# no explicit formatVersion passed: use the more recent GLIF formatVersion
# that is supported by given ufoFormatVersion (GLIF 1 for UFO 2)
dst.writeGlyph("A", glyph)
glif = dst.getGLIF("A")
assert b'format="1"' in glif
assert b"formatMinor" not in glif # omitted when 0
# explicit, unknown formatVersion
with pytest.raises(UnsupportedGLIFFormat):
dst.writeGlyph("A", glyph, formatVersion=(0, 0))
# explicit, known formatVersion but unsupported by given ufoFormatVersion
with pytest.raises(
match="Unsupported GLIF format version .*for UFO format version",
dst.writeGlyph("A", glyph, formatVersion=(2, 0))