import re from itertools import groupby try: from lxml import etree except ModuleNotFoundError: etree = None from fontTools import ttLib from fontTools.subset.util import _add_method __all__ = ["subset_glyphs"] GID_RE = re.compile("^glyph(\d+)$") XLINK_HREF = "{http://www.w3.org/1999/xlink}href" def remap_glyph_ids(svg, glyph_index_map): id_map = {} href_elements = [] for el in svg.iter("*"): # we'll rename these later if XLINK_HREF in el.attrib and el.attrib[XLINK_HREF].startswith("#"): href_elements.append(el) el_id = el.attrib.get("id") if el_id is None: continue m = GID_RE.match(el_id) if not m: continue old_index = int(m.group(1)) new_index = glyph_index_map.get(old_index) if new_index is not None: if old_index == new_index: continue new_id = f"glyph{new_index}" else: # If the old id is missing, the element correspond to a glyph that was # excluded from the font's subset. # For now we keep it around, renamed to avoid clashes with the new GID # (though technically there could still be clashes even after we insert # a tilde at the beginning, e.g. '~glyphXXX' is still a valid id...). # TODO Figure out how to best prune the SVG document of unused elements. # https://github.com/fonttools/fonttools/issues/534 new_id = f"~{el_id}" id_map[el_id] = new_id el.attrib["id"] = new_id if not id_map: return # update xlink:href="#..." that refer to the old id to point to the new one for el in href_elements: ref = el.attrib[XLINK_HREF] # we only care about local #fragment identifiers assert ref.startswith("#") old_id = ref[1:] if old_id in id_map: new_id = id_map[old_id] el.attrib[XLINK_HREF] = f"#{new_id}" def ranges(ints): """Yield (min, max) ranges of consecutive integers from the input set""" sorted_ints = sorted(set(ints)) # to group together consecutive ints, we use as 'key' the difference # between their index in the (sorted) list and themselves, which stays # the same for consecutive numbers for _key, group in groupby(enumerate(sorted_ints), lambda i: i[0] - i[1]): consecutive_ints = [v for _i, v in group] yield (consecutive_ints[0], consecutive_ints[-1]) @_add_method(ttLib.getTableClass("SVG ")) def subset_glyphs(self, s): if etree is None: raise ModuleNotFoundError("No module named 'lxml', required to subset SVG") # ordered list of glyph names (before subsetting) glyph_order = s.orig_glyph_order # map from glyph names to original glyph indices rev_orig_glyph_map = s.reverseOrigGlyphMap # map from original to new glyph indices (after subsetting) glyph_index_map = s.glyph_index_map new_docs = [] for doc, start_gid, end_gid in self.docList: old_glyphs = {glyph_order[i] for i in range(start_gid, end_gid + 1)} new_glyphs = old_glyphs.intersection(s.glyphs) if not new_glyphs: # no intersection: we can drop the whole record continue # NOTE If new_glyphs != old_glyphs, there's only a partial intersection: i.e. # we'll likely end up with unused garbage until we figure out how to prune # the unused refereces from the SVG doc. # NOTE huge_tree=True disables security restrictions and support very deep trees # and very long text content. Without it I would get an error like this: # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup` # when parsing noto-emoji-picosvg.svg from googlefonts/color-fonts. # This is lxml-only API, won't work with built-in ElementTree... svg = etree.fromstring(doc, parser=etree.XMLParser(huge_tree=True)) remap_glyph_ids(svg, glyph_index_map) new_doc = etree.tostring(svg) new_gids = {glyph_index_map[rev_orig_glyph_map[g]] for g in new_glyphs} for start_gid, end_gid in ranges(new_gids): new_docs.append((new_doc, start_gid, end_gid)) self.docList = new_docs return bool(self.docList)