Merge pull request #3115 from fonttools/xml-reader-join-content-chunks

xmlReader: join consecutive text data that had been cut by buffered xml parser
This commit is contained in:
Cosimo Lupo 2023-05-22 12:37:23 +01:00 committed by GitHub
commit c2c2cb0949
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 270 additions and 1 deletions

View File

@ -148,7 +148,19 @@ class XMLReader(object):
def _characterDataHandler(self, data):
if self.stackSize > 1:
self.contentStack[-1].append(data)
# parser parses in chunks, so we may get multiple calls
# for the same text node; thus we need to append the data
# to the last item in the content stack:
# https://github.com/fonttools/fonttools/issues/2614
if (
data != "\n"
and self.contentStack[-1]
and isinstance(self.contentStack[-1][-1], str)
and self.contentStack[-1][-1] != "\n"
):
self.contentStack[-1][-1] += data
else:
self.contentStack[-1].append(data)
def _endElementHandler(self, name):
self.stackSize = self.stackSize - 1

View File

@ -0,0 +1,224 @@
<?xml version="1.0" encoding="UTF-8"?>
<ttFont sfntVersion="\x00\x01\x00\x00" ttLibVersion="4.32">
<GlyphOrder>
<!-- The 'id' attribute is only for humans; it is ignored when parsed. -->
<GlyphID id="0" name=".notdef"/>
<GlyphID id="1" name=".null"/>
<GlyphID id="2" name="A"/>
</GlyphOrder>
<head>
<!-- Most of this table will be recalculated by the compiler -->
<tableVersion value="1.0"/>
<fontRevision value="1.0"/>
<checkSumAdjustment value="0x5c9585c9"/>
<magicNumber value="0x5f0f3cf5"/>
<flags value="00000000 00000011"/>
<unitsPerEm value="1024"/>
<created value="Fri May 6 19:55:13 2022"/>
<modified value="Fri May 6 19:55:13 2022"/>
<xMin value="0"/>
<yMin value="0"/>
<xMax value="0"/>
<yMax value="0"/>
<macStyle value="00000000 00000000"/>
<lowestRecPPEM value="3"/>
<fontDirectionHint value="2"/>
<indexToLocFormat value="0"/>
<glyphDataFormat value="0"/>
</head>
<hhea>
<tableVersion value="0x00010000"/>
<ascent value="824"/>
<descent value="200"/>
<lineGap value="0"/>
<advanceWidthMax value="600"/>
<minLeftSideBearing value="0"/>
<minRightSideBearing value="0"/>
<xMaxExtent value="0"/>
<caretSlopeRise value="1"/>
<caretSlopeRun value="0"/>
<caretOffset value="0"/>
<reserved0 value="0"/>
<reserved1 value="0"/>
<reserved2 value="0"/>
<reserved3 value="0"/>
<metricDataFormat value="0"/>
<numberOfHMetrics value="1"/>
</hhea>
<maxp>
<!-- Most of this table will be recalculated by the compiler -->
<tableVersion value="0x10000"/>
<numGlyphs value="3"/>
<maxPoints value="0"/>
<maxContours value="0"/>
<maxCompositePoints value="0"/>
<maxCompositeContours value="0"/>
<maxZones value="2"/>
<maxTwilightPoints value="0"/>
<maxStorage value="0"/>
<maxFunctionDefs value="0"/>
<maxInstructionDefs value="0"/>
<maxStackElements value="0"/>
<maxSizeOfInstructions value="0"/>
<maxComponentElements value="0"/>
<maxComponentDepth value="0"/>
</maxp>
<OS_2>
<!-- The fields 'usFirstCharIndex' and 'usLastCharIndex'
will be recalculated by the compiler -->
<version value="3"/>
<xAvgCharWidth value="600"/>
<usWeightClass value="400"/>
<usWidthClass value="5"/>
<fsType value="00000000 00000100"/>
<ySubscriptXSize value="0"/>
<ySubscriptYSize value="0"/>
<ySubscriptXOffset value="0"/>
<ySubscriptYOffset value="0"/>
<ySuperscriptXSize value="0"/>
<ySuperscriptYSize value="0"/>
<ySuperscriptXOffset value="0"/>
<ySuperscriptYOffset value="0"/>
<yStrikeoutSize value="0"/>
<yStrikeoutPosition value="0"/>
<sFamilyClass value="0"/>
<panose>
<bFamilyType value="0"/>
<bSerifStyle value="0"/>
<bWeight value="0"/>
<bProportion value="0"/>
<bContrast value="0"/>
<bStrokeVariation value="0"/>
<bArmStyle value="0"/>
<bLetterForm value="0"/>
<bMidline value="0"/>
<bXHeight value="0"/>
</panose>
<ulUnicodeRange1 value="00000000 00000000 00000000 00000001"/>
<ulUnicodeRange2 value="00000000 00000000 00000000 00000000"/>
<ulUnicodeRange3 value="00000000 00000000 00000000 00000000"/>
<ulUnicodeRange4 value="00000000 00000000 00000000 00000000"/>
<achVendID value="????"/>
<fsSelection value="00000000 00000000"/>
<usFirstCharIndex value="65"/>
<usLastCharIndex value="65"/>
<sTypoAscender value="0"/>
<sTypoDescender value="0"/>
<sTypoLineGap value="0"/>
<usWinAscent value="0"/>
<usWinDescent value="0"/>
<ulCodePageRange1 value="00000000 00000000 00000000 00000000"/>
<ulCodePageRange2 value="00000000 00000000 00000000 00000000"/>
<sxHeight value="0"/>
<sCapHeight value="0"/>
<usDefaultChar value="0"/>
<usBreakChar value="32"/>
<usMaxContext value="0"/>
</OS_2>
<hmtx>
<mtx name=".notdef" width="600" lsb="0"/>
<mtx name=".null" width="600" lsb="0"/>
<mtx name="A" width="600" lsb="0"/>
</hmtx>
<cmap>
<tableVersion version="0"/>
<cmap_format_4 platformID="0" platEncID="3" language="0">
<map code="0x41" name="A"/><!-- LATIN CAPITAL LETTER A -->
</cmap_format_4>
<cmap_format_4 platformID="3" platEncID="1" language="0">
<map code="0x41" name="A"/><!-- LATIN CAPITAL LETTER A -->
</cmap_format_4>
</cmap>
<loca>
<!-- The 'loca' table will be calculated by the compiler -->
</loca>
<glyf>
<!-- The xMin, yMin, xMax and yMax values
will be recalculated by the compiler. -->
<TTGlyph name=".notdef"/><!-- contains no outline data -->
<TTGlyph name=".null"/><!-- contains no outline data -->
<TTGlyph name="A"/><!-- contains no outline data -->
</glyf>
<name>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x0" unicode="True">
HelloTestFont
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x0" unicode="True">
TotallyNormal
</namerecord>
<namerecord nameID="6" platformID="1" platEncID="0" langID="0x0" unicode="True">
HelloTestFont-TotallyNormal
</namerecord>
<namerecord nameID="1" platformID="1" platEncID="0" langID="0x4" unicode="True">
HalloTestFont
</namerecord>
<namerecord nameID="2" platformID="1" platEncID="0" langID="0x4" unicode="True">
TotaalNormaal
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x409">
HelloTestFont
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x409">
TotallyNormal
</namerecord>
<namerecord nameID="6" platformID="3" platEncID="1" langID="0x409">
HelloTestFont-TotallyNormal
</namerecord>
<namerecord nameID="1" platformID="3" platEncID="1" langID="0x413">
HalloTestFont
</namerecord>
<namerecord nameID="2" platformID="3" platEncID="1" langID="0x413">
TotaalNormaal
</namerecord>
</name>
<post>
<formatType value="2.0"/>
<italicAngle value="0.0"/>
<underlinePosition value="0"/>
<underlineThickness value="0"/>
<isFixedPitch value="0"/>
<minMemType42 value="0"/>
<maxMemType42 value="0"/>
<minMemType1 value="0"/>
<maxMemType1 value="0"/>
<psNames>
<!-- This file uses unique glyph names based on the information
found in the 'post' table. Since these names might not be unique,
we have to invent artificial names in case of clashes. In order to
be able to retain the original information, we need a name to
ps name mapping for those cases where they differ. That's what
you see below.
-->
</psNames>
<extraNames>
<!-- following are the name that are not taken from the standard Mac glyph order -->
</extraNames>
</post>
<DSIG>
<!-- note that the Digital Signature will be invalid after recompilation! -->
<tableHeader flag="0x1" numSigs="1" version="1"/>
<SignatureRecord format="1">
-----BEGIN PKCS7-----
0000000100000000
-----END PKCS7-----
</SignatureRecord>
</DSIG>
</ttFont>

View File

@ -1,7 +1,9 @@
from fontTools.misc.testTools import parseXML
from fontTools.misc.timeTools import timestampSinceEpoch
from fontTools.ttLib import TTFont, TTLibError
from fontTools.ttLib.tables.DefaultTable import DefaultTable
from fontTools import ttx
import base64
import getopt
import logging
import os
@ -1016,6 +1018,37 @@ def test_main_ttx_compile_stdin_to_stdout(tmp_path):
assert outpath.is_file()
def test_roundtrip_DSIG_split_at_XML_parse_buffer_size(tmp_path):
inpath = Path("Tests").joinpath(
"ttx", "data", "roundtrip_DSIG_split_at_XML_parse_buffer_size.ttx"
)
font = TTFont()
font.importXML(inpath)
font["DMMY"] = DefaultTable(tag="DMMY")
# just enough dummy bytes to hit the cut off point whereby DSIG data gets
# split into two chunks and triggers the bug from
# https://github.com/fonttools/fonttools/issues/2614
font["DMMY"].data = b"\x01\x02\x03\x04" * 2438
font.saveXML(tmp_path / "roundtrip_DSIG_split_at_XML_parse_buffer_size.ttx")
outpath = tmp_path / "font.ttf"
args = [
sys.executable,
"-m",
"fontTools.ttx",
"-q",
"-o",
str(outpath),
str(tmp_path / "roundtrip_DSIG_split_at_XML_parse_buffer_size.ttx"),
]
subprocess.run(args, check=True)
assert outpath.is_file()
assert TTFont(outpath)["DSIG"].signatureRecords[0].pkcs7 == base64.b64decode(
b"0000000100000000"
)
# ---------------------------
# support functions for tests
# ---------------------------