diff --git a/Lib/fontTools/subset/svg.py b/Lib/fontTools/subset/svg.py index e25fb3e65..053db6a45 100644 --- a/Lib/fontTools/subset/svg.py +++ b/Lib/fontTools/subset/svg.py @@ -14,6 +14,7 @@ except ModuleNotFoundError: from fontTools import ttLib from fontTools.subset.util import _add_method +from fontTools.ttLib.tables.S_V_G_ import SVGDocument __all__ = ["subset_glyphs"] @@ -201,10 +202,12 @@ def subset_glyphs(self, s) -> bool: # map from original to new glyph indices (after subsetting) glyph_index_map: Dict[int, int] = s.glyph_index_map - new_docs: List[Tuple[bytes, int, int]] = [] - for doc, start, end in self.docList: + new_docs: List[SVGDocument] = [] + for doc in self.docList: - glyphs = {glyph_order[i] for i in range(start, end + 1)}.intersection(s.glyphs) + glyphs = { + glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1) + }.intersection(s.glyphs) if not glyphs: # no intersection: we can drop the whole record continue @@ -212,7 +215,7 @@ def subset_glyphs(self, s) -> bool: svg = etree.fromstring( # encode because fromstring dislikes xml encoding decl if input is str. # SVG xml encoding must be utf-8 as per OT spec. - doc.encode("utf-8"), + doc.data.encode("utf-8"), parser=etree.XMLParser( # Disable libxml2 security restrictions to support very deep trees. # Without this we would get an error like this: @@ -241,7 +244,7 @@ def subset_glyphs(self, s) -> bool: new_gids = (glyph_index_map[i] for i in gids) for start, end in ranges(new_gids): - new_docs.append((new_doc, start, end)) + new_docs.append(SVGDocument(new_doc, start, end, doc.compressed)) self.docList = new_docs diff --git a/Lib/fontTools/ttLib/tables/S_V_G_.py b/Lib/fontTools/ttLib/tables/S_V_G_.py index bc0e533d6..d49afdca0 100644 --- a/Lib/fontTools/ttLib/tables/S_V_G_.py +++ b/Lib/fontTools/ttLib/tables/S_V_G_.py @@ -17,9 +17,11 @@ The XML format is: """ -from fontTools.misc.textTools import bytesjoin, strjoin, tobytes, tostr +from fontTools.misc.textTools import bytesjoin, safeEval, strjoin, tobytes, tostr from fontTools.misc import sstruct from . import DefaultTable +from collections.abc import Sequence +from dataclasses import dataclass, astuple from io import BytesIO import struct import logging @@ -75,15 +77,18 @@ class table_S_V_G_(DefaultTable.DefaultTable): start = entry.svgDocOffset + subTableStart end = start + entry.svgDocLength doc = data[start:end] + compressed = False if doc.startswith(b"\x1f\x8b"): import gzip bytesIO = BytesIO(doc) with gzip.GzipFile(None, "r", fileobj=bytesIO) as gunzipper: doc = gunzipper.read() - self.compressed = True del bytesIO + compressed = True doc = tostr(doc, "utf_8") - self.docList.append( [doc, entry.startGlyphID, entry.endGlyphID] ) + self.docList.append( + SVGDocument(doc, entry.startGlyphID, entry.endGlyphID, compressed) + ) def compile(self, ttFont): version = 0 @@ -96,9 +101,13 @@ class table_S_V_G_(DefaultTable.DefaultTable): entryList.append(datum) curOffset = len(datum) + doc_index_entry_format_0Size*numEntries seenDocs = {} - for doc, startGlyphID, endGlyphID in self.docList: - docBytes = tobytes(doc, encoding="utf_8") - if getattr(self, "compressed", False) and not docBytes.startswith(b"\x1f\x8b"): + allCompressed = getattr(self, "compressed", False) + for i, doc in enumerate(self.docList): + if isinstance(doc, (list, tuple)): + doc = SVGDocument(*doc) + self.docList[i] = doc + docBytes = tobytes(doc.data, encoding="utf_8") + if (allCompressed or doc.compressed) and not docBytes.startswith(b"\x1f\x8b"): import gzip bytesIO = BytesIO() with gzip.GzipFile(None, "w", fileobj=bytesIO) as gzipper: @@ -115,7 +124,7 @@ class table_S_V_G_(DefaultTable.DefaultTable): curOffset += docLength seenDocs[docBytes] = docOffset docList.append(docBytes) - entry = struct.pack(">HHLL", startGlyphID, endGlyphID, docOffset, docLength) + entry = struct.pack(">HHLL", doc.startGlyphID, doc.endGlyphID, docOffset, docLength) entryList.append(entry) entryList.extend(docList) svgDocData = bytesjoin(entryList) @@ -127,10 +136,16 @@ class table_S_V_G_(DefaultTable.DefaultTable): return data def toXML(self, writer, ttFont): - for doc, startGID, endGID in self.docList: - writer.begintag("svgDoc", startGlyphID=startGID, endGlyphID=endGID) + for i, doc in enumerate(self.docList): + if isinstance(doc, (list, tuple)): + doc = SVGDocument(*doc) + self.docList[i] = doc + attrs = {"startGlyphID": doc.startGlyphID, "endGlyphID": doc.endGlyphID} + if doc.compressed: + attrs["compressed"] = 1 + writer.begintag("svgDoc", **attrs) writer.newline() - writer.writecdata(doc) + writer.writecdata(doc.data) writer.newline() writer.endtag("svgDoc") writer.newline() @@ -143,7 +158,8 @@ class table_S_V_G_(DefaultTable.DefaultTable): doc = doc.strip() startGID = int(attrs["startGlyphID"]) endGID = int(attrs["endGlyphID"]) - self.docList.append( [doc, startGID, endGID] ) + compressed = bool(safeEval(attrs.get("compressed", "0"))) + self.docList.append(SVGDocument(doc, startGID, endGID, compressed)) else: log.warning("Unknown %s %s", name, content) @@ -157,3 +173,23 @@ class DocumentIndexEntry(object): def __repr__(self): return "startGlyphID: %s, endGlyphID: %s, svgDocOffset: %s, svgDocLength: %s" % (self.startGlyphID, self.endGlyphID, self.svgDocOffset, self.svgDocLength) + + +@dataclass +class SVGDocument(Sequence): + data: str + startGlyphID: int + endGlyphID: int + compressed: bool = False + + # Previously, the SVG table's docList attribute contained a lists of 3 items: + # [doc, startGlyphID, endGlyphID]; later, we added a `compressed` attribute. + # For backward compatibility with code that depends of them being sequences of + # fixed length=3, we subclass the Sequence abstract base class and pretend only + # the first three items are present. 'compressed' is only accessible via named + # attribute lookup like regular dataclasses: i.e. `doc.compressed`, not `doc[3]` + def __getitem__(self, index): + return astuple(self)[:3][index] + + def __len__(self): + return 3