Clean up svgDoc compression
This is based on bungeman's https://github.com/fonttools/fonttools/pull/2627 Previously, an entire `SVG ` table would be marked as compressed if any of the decoded SVG documents in it were compressed. Then on encoding all SVG documents would be considered for compression. The XML format had no means to indicate if compression was desired. Instead, mark each svgDoc with its compression status. When decoding mark the svgDoc as compressed if the data was compressed. When encoding try to compress the svgDoc if it is marked as compressed. In the XML format the data itself is always uncompressed, but allow an optional `compressed` boolean attribute (defaults to false) to indicate the svgDoc should be compressed when encoded. We also try to make sure that older code that relies on docList containing sequences of three items (doc, startGID, endGID) will continue to work without modification.
This commit is contained in:
parent
a367e8acf5
commit
be623e257f
@ -14,6 +14,7 @@ except ModuleNotFoundError:
|
||||
|
||||
from fontTools import ttLib
|
||||
from fontTools.subset.util import _add_method
|
||||
from fontTools.ttLib.tables.S_V_G_ import SVGDocument
|
||||
|
||||
|
||||
__all__ = ["subset_glyphs"]
|
||||
@ -201,10 +202,12 @@ def subset_glyphs(self, s) -> bool:
|
||||
# map from original to new glyph indices (after subsetting)
|
||||
glyph_index_map: Dict[int, int] = s.glyph_index_map
|
||||
|
||||
new_docs: List[Tuple[bytes, int, int]] = []
|
||||
for doc, start, end in self.docList:
|
||||
new_docs: List[SVGDocument] = []
|
||||
for doc in self.docList:
|
||||
|
||||
glyphs = {glyph_order[i] for i in range(start, end + 1)}.intersection(s.glyphs)
|
||||
glyphs = {
|
||||
glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
|
||||
}.intersection(s.glyphs)
|
||||
if not glyphs:
|
||||
# no intersection: we can drop the whole record
|
||||
continue
|
||||
@ -212,7 +215,7 @@ def subset_glyphs(self, s) -> bool:
|
||||
svg = etree.fromstring(
|
||||
# encode because fromstring dislikes xml encoding decl if input is str.
|
||||
# SVG xml encoding must be utf-8 as per OT spec.
|
||||
doc.encode("utf-8"),
|
||||
doc.data.encode("utf-8"),
|
||||
parser=etree.XMLParser(
|
||||
# Disable libxml2 security restrictions to support very deep trees.
|
||||
# Without this we would get an error like this:
|
||||
@ -241,7 +244,7 @@ def subset_glyphs(self, s) -> bool:
|
||||
|
||||
new_gids = (glyph_index_map[i] for i in gids)
|
||||
for start, end in ranges(new_gids):
|
||||
new_docs.append((new_doc, start, end))
|
||||
new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))
|
||||
|
||||
self.docList = new_docs
|
||||
|
||||
|
@ -17,9 +17,11 @@ The XML format is:
|
||||
</SVG>
|
||||
"""
|
||||
|
||||
from fontTools.misc.textTools import bytesjoin, strjoin, tobytes, tostr
|
||||
from fontTools.misc.textTools import bytesjoin, safeEval, strjoin, tobytes, tostr
|
||||
from fontTools.misc import sstruct
|
||||
from . import DefaultTable
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass, astuple
|
||||
from io import BytesIO
|
||||
import struct
|
||||
import logging
|
||||
@ -75,15 +77,18 @@ class table_S_V_G_(DefaultTable.DefaultTable):
|
||||
start = entry.svgDocOffset + subTableStart
|
||||
end = start + entry.svgDocLength
|
||||
doc = data[start:end]
|
||||
compressed = False
|
||||
if doc.startswith(b"\x1f\x8b"):
|
||||
import gzip
|
||||
bytesIO = BytesIO(doc)
|
||||
with gzip.GzipFile(None, "r", fileobj=bytesIO) as gunzipper:
|
||||
doc = gunzipper.read()
|
||||
self.compressed = True
|
||||
del bytesIO
|
||||
compressed = True
|
||||
doc = tostr(doc, "utf_8")
|
||||
self.docList.append( [doc, entry.startGlyphID, entry.endGlyphID] )
|
||||
self.docList.append(
|
||||
SVGDocument(doc, entry.startGlyphID, entry.endGlyphID, compressed)
|
||||
)
|
||||
|
||||
def compile(self, ttFont):
|
||||
version = 0
|
||||
@ -96,9 +101,13 @@ class table_S_V_G_(DefaultTable.DefaultTable):
|
||||
entryList.append(datum)
|
||||
curOffset = len(datum) + doc_index_entry_format_0Size*numEntries
|
||||
seenDocs = {}
|
||||
for doc, startGlyphID, endGlyphID in self.docList:
|
||||
docBytes = tobytes(doc, encoding="utf_8")
|
||||
if getattr(self, "compressed", False) and not docBytes.startswith(b"\x1f\x8b"):
|
||||
allCompressed = getattr(self, "compressed", False)
|
||||
for i, doc in enumerate(self.docList):
|
||||
if isinstance(doc, (list, tuple)):
|
||||
doc = SVGDocument(*doc)
|
||||
self.docList[i] = doc
|
||||
docBytes = tobytes(doc.data, encoding="utf_8")
|
||||
if (allCompressed or doc.compressed) and not docBytes.startswith(b"\x1f\x8b"):
|
||||
import gzip
|
||||
bytesIO = BytesIO()
|
||||
with gzip.GzipFile(None, "w", fileobj=bytesIO) as gzipper:
|
||||
@ -115,7 +124,7 @@ class table_S_V_G_(DefaultTable.DefaultTable):
|
||||
curOffset += docLength
|
||||
seenDocs[docBytes] = docOffset
|
||||
docList.append(docBytes)
|
||||
entry = struct.pack(">HHLL", startGlyphID, endGlyphID, docOffset, docLength)
|
||||
entry = struct.pack(">HHLL", doc.startGlyphID, doc.endGlyphID, docOffset, docLength)
|
||||
entryList.append(entry)
|
||||
entryList.extend(docList)
|
||||
svgDocData = bytesjoin(entryList)
|
||||
@ -127,10 +136,16 @@ class table_S_V_G_(DefaultTable.DefaultTable):
|
||||
return data
|
||||
|
||||
def toXML(self, writer, ttFont):
|
||||
for doc, startGID, endGID in self.docList:
|
||||
writer.begintag("svgDoc", startGlyphID=startGID, endGlyphID=endGID)
|
||||
for i, doc in enumerate(self.docList):
|
||||
if isinstance(doc, (list, tuple)):
|
||||
doc = SVGDocument(*doc)
|
||||
self.docList[i] = doc
|
||||
attrs = {"startGlyphID": doc.startGlyphID, "endGlyphID": doc.endGlyphID}
|
||||
if doc.compressed:
|
||||
attrs["compressed"] = 1
|
||||
writer.begintag("svgDoc", **attrs)
|
||||
writer.newline()
|
||||
writer.writecdata(doc)
|
||||
writer.writecdata(doc.data)
|
||||
writer.newline()
|
||||
writer.endtag("svgDoc")
|
||||
writer.newline()
|
||||
@ -143,7 +158,8 @@ class table_S_V_G_(DefaultTable.DefaultTable):
|
||||
doc = doc.strip()
|
||||
startGID = int(attrs["startGlyphID"])
|
||||
endGID = int(attrs["endGlyphID"])
|
||||
self.docList.append( [doc, startGID, endGID] )
|
||||
compressed = bool(safeEval(attrs.get("compressed", "0")))
|
||||
self.docList.append(SVGDocument(doc, startGID, endGID, compressed))
|
||||
else:
|
||||
log.warning("Unknown %s %s", name, content)
|
||||
|
||||
@ -157,3 +173,23 @@ class DocumentIndexEntry(object):
|
||||
|
||||
def __repr__(self):
|
||||
return "startGlyphID: %s, endGlyphID: %s, svgDocOffset: %s, svgDocLength: %s" % (self.startGlyphID, self.endGlyphID, self.svgDocOffset, self.svgDocLength)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SVGDocument(Sequence):
|
||||
data: str
|
||||
startGlyphID: int
|
||||
endGlyphID: int
|
||||
compressed: bool = False
|
||||
|
||||
# Previously, the SVG table's docList attribute contained a lists of 3 items:
|
||||
# [doc, startGlyphID, endGlyphID]; later, we added a `compressed` attribute.
|
||||
# For backward compatibility with code that depends of them being sequences of
|
||||
# fixed length=3, we subclass the Sequence abstract base class and pretend only
|
||||
# the first three items are present. 'compressed' is only accessible via named
|
||||
# attribute lookup like regular dataclasses: i.e. `doc.compressed`, not `doc[3]`
|
||||
def __getitem__(self, index):
|
||||
return astuple(self)[:3][index]
|
||||
|
||||
def __len__(self):
|
||||
return 3
|
||||
|
Loading…
x
Reference in New Issue
Block a user