From a7ac2de5cdaa11351475e258be6c428d9394f0d3 Mon Sep 17 00:00:00 2001 From: Khaled Hosny Date: Mon, 5 Jul 2021 02:13:09 +0200 Subject: [PATCH] [tfmLib] New library for reading TFM files New library for reading TeX Font Metrics files. Does not support writing them back. Fixes https://github.com/fonttools/fonttools/issues/2352 --- Lib/fontTools/tfmLib.py | 460 ++++++++++++++++++++++++++++++ Tests/tfmLib/data/cmex10.tfm | Bin 0 -> 992 bytes Tests/tfmLib/data/cmr10.tfm | Bin 0 -> 1296 bytes Tests/tfmLib/data/cmsy10.tfm | Bin 0 -> 1124 bytes Tests/tfmLib/data/dummy-space.tfm | Bin 0 -> 152 bytes Tests/tfmLib/tfmLib_test.py | 90 ++++++ 6 files changed, 550 insertions(+) create mode 100644 Lib/fontTools/tfmLib.py create mode 100644 Tests/tfmLib/data/cmex10.tfm create mode 100644 Tests/tfmLib/data/cmr10.tfm create mode 100644 Tests/tfmLib/data/cmsy10.tfm create mode 100644 Tests/tfmLib/data/dummy-space.tfm create mode 100644 Tests/tfmLib/tfmLib_test.py diff --git a/Lib/fontTools/tfmLib.py b/Lib/fontTools/tfmLib.py new file mode 100644 index 000000000..673373ffd --- /dev/null +++ b/Lib/fontTools/tfmLib.py @@ -0,0 +1,460 @@ +"""Module for reading TFM (TeX Font Metrics) files. + +The TFM format is described in the TFtoPL WEB source code, whose typeset form +can be found on `CTAN `_. + + >>> from fontTools.tfmLib import TFM + >>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm") + >>> + >>> # Accessing an attribute gets you metadata. + >>> tfm.checksum + 1274110073 + >>> tfm.designsize + 10.0 + >>> tfm.codingscheme + 'TeX text' + >>> tfm.family + 'CMR' + >>> tfm.seven_bit_safe_flag + False + >>> tfm.face + 234 + >>> tfm.extraheader + {} + >>> tfm.fontdimens + {'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578} + >>> # Accessing a character gets you its metrics. + >>> # “width” is always available, other metrics are available only when + >>> # applicable. All values are relative to “designsize”. + >>> tfm.chars[ord("g")] + {'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219} + >>> # Kerning and ligature can be accessed as well. + >>> tfm.kerning[ord("c")] + {104: -0.02777862548828125, 107: -0.02777862548828125} + >>> tfm.ligatures[ord("f")] + {105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)} +""" + +from types import SimpleNamespace + +from fontTools.misc.sstruct import calcsize, unpack, unpack2 + +SIZES_FORMAT = """ + > + lf: h # length of the entire file, in words + lh: h # length of the header data, in words + bc: h # smallest character code in the font + ec: h # largest character code in the font + nw: h # number of words in the width table + nh: h # number of words in the height table + nd: h # number of words in the depth table + ni: h # number of words in the italic correction table + nl: h # number of words in the ligature/kern table + nk: h # number of words in the kern table + ne: h # number of words in the extensible character table + np: h # number of font parameter words +""" + +SIZES_SIZE = calcsize(SIZES_FORMAT) + +FIXED_FORMAT = "12.20F" + +HEADER_FORMAT1 = f""" + > + checksum: L + designsize: {FIXED_FORMAT} +""" + +HEADER_FORMAT2 = f""" + {HEADER_FORMAT1} + codingscheme: 40p +""" + +HEADER_FORMAT3 = f""" + {HEADER_FORMAT2} + family: 20p +""" + +HEADER_FORMAT4 = f""" + {HEADER_FORMAT3} + seven_bit_safe_flag: ? + ignored: x + ignored: x + face: B +""" + +HEADER_SIZE1 = calcsize(HEADER_FORMAT1) +HEADER_SIZE2 = calcsize(HEADER_FORMAT2) +HEADER_SIZE3 = calcsize(HEADER_FORMAT3) +HEADER_SIZE4 = calcsize(HEADER_FORMAT4) + +LIG_KERN_COMMAND = """ + > + skip_byte: B + next_char: B + op_byte: B + remainder: B +""" + +BASE_PARAMS = [ + "SLANT", + "SPACE", + "STRETCH", + "SHRINK", + "XHEIGHT", + "QUAD", + "EXTRASPACE", +] + +MATHSY_PARAMS = [ + "NUM1", + "NUM2", + "NUM3", + "DENOM1", + "DENOM2", + "SUP1", + "SUP2", + "SUP3", + "SUB1", + "SUB2", + "SUPDROP", + "SUBDROP", + "DELIM1", + "DELIM2", + "AXISHEIGHT", +] + +MATHEX_PARAMS = [ + "DEFAULTRULETHICKNESS", + "BIGOPSPACING1", + "BIGOPSPACING2", + "BIGOPSPACING3", + "BIGOPSPACING4", + "BIGOPSPACING5", +] + +VANILLA = 0 +MATHSY = 1 +MATHEX = 2 + +UNREACHABLE = 0 +PASSTHROUGH = 1 +ACCESSABLE = 2 + +NO_TAG = 0 +LIG_TAG = 1 +LIST_TAG = 2 +EXT_TAG = 3 + +STOP_FLAG = 128 +KERN_FLAG = 128 + + +class TFMException(Exception): + def __init__(self, message): + super().__init__(message) + + +class TFM: + def __init__(self, file): + self._read(file) + + def __repr__(self): + return ( + f"" + ) + + def _read(self, file): + if hasattr(file, "read"): + data = file.read() + else: + with open(file, "rb") as fp: + data = fp.read() + + self._data = data + + if len(data) < SIZES_SIZE: + raise TFMException("Too short input file") + + sizes = SimpleNamespace() + unpack2(SIZES_FORMAT, data, sizes) + + # Do some file structure sanity checks. + # TeX and TFtoPL do additional functional checks and might even correct + # “errors” in the input file, but we instead try to output the file as + # it is as long as it is parsable, even if the data make no sense. + + if sizes.lf < 0: + raise TFMException("The file claims to have negative or zero length!") + + if len(data) < sizes.lf * 4: + raise TFMException("The file has fewer bytes than it claims!") + + for name, length in vars(sizes).items(): + if length < 0: + raise TFMException("The subfile size: '{name}' is negative!") + + if sizes.lh < 2: + raise TFMException(f"The header length is only {sizes.lh}!") + + if sizes.bc > sizes.ec + 1 or sizes.ec > 255: + raise TFMException( + f"The character code range {sizes.bc}..{sizes.ec} is illegal!" + ) + + if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0: + raise TFMException("Incomplete subfiles for character dimensions!") + + if sizes.ne > 256: + raise TFMException(f"There are {ne} extensible recipes!") + + if sizes.lf != ( + 6 + + sizes.lh + + (sizes.ec - sizes.bc + 1) + + sizes.nw + + sizes.nh + + sizes.nd + + sizes.ni + + sizes.nl + + sizes.nk + + sizes.ne + + sizes.np + ): + raise TFMException("Subfile sizes don’t add up to the stated total") + + # Subfile offsets, used in the helper function below. These all are + # 32-bit word offsets not 8-bit byte offsets. + char_base = 6 + sizes.lh - sizes.bc + width_base = char_base + sizes.ec + 1 + height_base = width_base + sizes.nw + depth_base = height_base + sizes.nh + italic_base = depth_base + sizes.nd + lig_kern_base = italic_base + sizes.ni + kern_base = lig_kern_base + sizes.nl + exten_base = kern_base + sizes.nk + param_base = exten_base + sizes.ne + + # Helper functions for accessing individual data. If this looks + # nonidiomatic Python, I blame the effect of reading the literate WEB + # documentation of TFtoPL. + def char_info(c): + return 4 * (char_base + c) + + def width_index(c): + return data[char_info(c)] + + def noneexistent(c): + return c < sizes.bc or c > sizes.ec or width_index(c) == 0 + + def height_index(c): + return data[char_info(c) + 1] // 16 + + def depth_index(c): + return data[char_info(c) + 1] % 16 + + def italic_index(c): + return data[char_info(c) + 2] // 4 + + def tag(c): + return data[char_info(c) + 2] % 4 + + def remainder(c): + return data[char_info(c) + 3] + + def width(c): + r = 4 * (width_base + width_index(c)) + return read_fixed(r, "v")["v"] + + def height(c): + r = 4 * (height_base + height_index(c)) + return read_fixed(r, "v")["v"] + + def depth(c): + r = 4 * (depth_base + depth_index(c)) + return read_fixed(r, "v")["v"] + + def italic(c): + r = 4 * (italic_base + italic_index(c)) + return read_fixed(r, "v")["v"] + + def exten(c): + return 4 * (exten_base + remainder(c)) + + def lig_step(i): + return 4 * (lig_kern_base + i) + + def lig_kern_command(i): + command = SimpleNamespace() + unpack2(LIG_KERN_COMMAND, data[i:], command) + return command + + def kern(i): + r = 4 * (kern_base + i) + return read_fixed(r, "v")["v"] + + def param(i): + return 4 * (param_base + i) + + def read_fixed(index, key, obj=None): + ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj) + return ret[0] + + # Set all attributes to empty values regardless of the header size. + unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self) + + offset = 24 + length = sizes.lh * 4 + self.extraheader = {} + if length >= HEADER_SIZE4: + rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1] + if self.face < 18: + s = self.face % 2 + b = self.face // 2 + self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3] + for i in range(sizes.lh - HEADER_SIZE4 // 4): + rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1] + elif length >= HEADER_SIZE3: + unpack2(HEADER_FORMAT3, data[offset:], self) + elif length >= HEADER_SIZE2: + unpack2(HEADER_FORMAT2, data[offset:], self) + elif length >= HEADER_SIZE1: + unpack2(HEADER_FORMAT1, data[offset:], self) + + self.fonttype = VANILLA + scheme = self.codingscheme.upper() + if scheme.startswith("TEX MATH SY"): + self.fonttype = MATHSY + elif scheme.startswith("TEX MATH EX"): + self.fonttype = MATHEX + + self.fontdimens = {} + for i in range(sizes.np): + name = f"PARAMETER{i+1}" + if i <= 6: + name = BASE_PARAMS[i] + elif self.fonttype == MATHSY and i <= 21: + name = MATHSY_PARAMS[i - 7] + elif self.fonttype == MATHEX and i <= 12: + name = MATHEX_PARAMS[i - 7] + read_fixed(param(i), name, self.fontdimens) + + lig_kern_map = {} + self.right_boundary_char = None + self.left_boundary_char = None + if sizes.nl > 0: + cmd = lig_kern_command(lig_step(0)) + if cmd.skip_byte == 255: + self.right_boundary_char = cmd.next_char + + cmd = lig_kern_command(lig_step((sizes.nl - 1))) + if cmd.skip_byte == 255: + self.left_boundary_char = 256 + r = 256 * cmd.op_byte + cmd.remainder + lig_kern_map[self.left_boundary_char] = r + + self.chars = {} + for c in range(sizes.bc, sizes.ec + 1): + if width_index(c) > 0: + self.chars[c] = info = {} + info["width"] = width(c) + if height_index(c) > 0: + info["height"] = height(c) + if depth_index(c) > 0: + info["depth"] = depth(c) + if italic_index(c) > 0: + info["italic"] = italic(c) + char_tag = tag(c) + if char_tag == NO_TAG: + pass + elif char_tag == LIG_TAG: + lig_kern_map[c] = remainder(c) + elif char_tag == LIST_TAG: + info["nextlarger"] = remainder(c) + elif char_tag == EXT_TAG: + info["varchar"] = varchar = {} + for i in range(4): + part = data[exten(c) + i] + if i == 3 or part > 0: + name = "rep" + if i == 0: + name = "top" + elif i == 1: + name = "mid" + elif i == 2: + name = "bot" + if noneexistent(part): + varchar[name] = c + else: + varchar[name] = part + + self.ligatures = {} + self.kerning = {} + for c, i in sorted(lig_kern_map.items()): + cmd = lig_kern_command(lig_step(i)) + if cmd.skip_byte > STOP_FLAG: + i = 256 * cmd.op_byte + cmd.remainder + + while i < sizes.nl: + cmd = lig_kern_command(lig_step(i)) + if cmd.skip_byte > STOP_FLAG: + pass + else: + if cmd.op_byte >= KERN_FLAG: + r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder + self.kerning.setdefault(c, {})[cmd.next_char] = kern(r) + else: + r = cmd.op_byte + if r == 4 or (r > 7 and r != 11): + # Ligature step with nonstandard code, we output + # the code verbatim. + lig = r + else: + lig = "" + if r % 4 > 1: + lig += "/" + lig += "LIG" + if r % 2 != 0: + lig += "/" + while r > 3: + lig += ">" + r -= 4 + self.ligatures.setdefault(c, {})[cmd.next_char] = ( + lig, + cmd.remainder, + ) + + if cmd.skip_byte >= STOP_FLAG: + break + i += cmd.skip_byte + 1 + + +if __name__ == "__main__": + import sys + + tfm = TFM(sys.argv[1]) + print( + "\n".join( + x + for x in [ + f"tfm.checksum={tfm.checksum}", + f"tfm.designsize={tfm.designsize}", + f"tfm.codingscheme={tfm.codingscheme}", + f"tfm.fonttype={tfm.fonttype}", + f"tfm.family={tfm.family}", + f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}", + f"tfm.face={tfm.face}", + f"tfm.extraheader={tfm.extraheader}", + f"tfm.fontdimens={tfm.fontdimens}", + f"tfm.right_boundary_char={tfm.right_boundary_char}", + f"tfm.left_boundary_char={tfm.left_boundary_char}", + f"tfm.kerning={tfm.kerning}", + f"tfm.ligatures={tfm.ligatures}", + f"tfm.chars={tfm.chars}", + ] + ) + ) + print(tfm) diff --git a/Tests/tfmLib/data/cmex10.tfm b/Tests/tfmLib/data/cmex10.tfm new file mode 100644 index 0000000000000000000000000000000000000000..d5427ad3f7f5d6ffbb3bbec8f39ab39e0b9bd1dd GIT binary patch literal 992 zcmZ9~%TE(g6bA6G=k_^mhqgecLuX1AQBaXbc@)rsATer)M8%+@hA0RqAW~i%Sh#V8 zM&knAkhrVS#EpM|8xmyU%C!lJ3tgKSR(>5>C{2Dn=iYnHo!sQ$H(~%*k%kvK6m!5r zc=yA63>!e~%IHvfY9v3N9$m_hPR~uuOgkm}|J=R(eM9>3cAX7B}|KeE%C7qG*3|@t| z@D9FA7QVqD_|`h86aLvP_;;Qmkb8*$_XFGuY^I=Pze8hva5Ie%1R>Ui3hxoxd4bUG zWrSHB5CDuG5%p)IlM0TDd%AT^>&oHb7B82E&TA16pvT$_;^S?&+;72E46Ms0AN=<0MR@ee{sDbxh z)oN!ju5=_%a}!IXOiPMc_qnmD^Yp=sLaB(>68b?rBkiE?F5myYWQoZ-w$0Zz?-KtT zg$8T> z4x?|RHa?2_yU3+BTS6PD?Hx+(HM!Po+>u7;QNgdJw$Inp(_s8fp!eQ-T6kv= z&-{7W@1?FY@7Q3R*pW)drlQ0?@OK*}+-YYp6Ssmk{)Ca2xh<7DHe$q^iA_|1x~WRb zrUrPgkeWOM&W;f~UQU9MS`yUDc;!~G!}pAnfvsan>BVzg<`S=n;D@tN447J^G{fFSX)5M7>dCs}o#cSJ}qbu&msRJgL={78fP@+kqc- z$;q8?rt@@KR8f~SpxrK`_hAh7xQzP@J+Csc%IjioUY}~Wc2SdcWAL`VgYWU*g9AF_ za$0Z!8t5w*xO3>F3;q11zt4s4t-crHg}MyOa0Vt_Uen|rgu~Q-j9%;4`W~<5h0fQ_ z5u1fcXp?^$U4hGR7JTiu_*c*`(TnJUi|7^UD)TSUbFc*D>4Gcxnts0Tix6Y>OJjV@87j;jP`nshe%U1g_|3qP}wAk zm87W*C0p1OrwAZ@YOigJaIg)6srke92t7%ht+%LjvY9DF*2f4pQTUd)uzC=x+Bzzh3Cguu#V>lBC2(pB2X2)v(zjtA$ zv(OTF$p+u8W%ZX(>*}D!CZUylgf?;yc7L2`g$@qAaRhqoIrIlr#0>PgJ~+Dj;TSyy z$HW#mroY2b>S1&qgh92&{Z1Hfqi{L`aE25(&yT>_{u|ETQaEp3fs1;&qTk@E9)c_O z46a8X;Cgxu?&LJMNx8eS2JWsf+&5~GRPqU)>1F!&6{PH%v-IA51KaEvw7FhOXG0h4 zq}@I~3w8q@QQScSU!UCNB7TR-Vn7?MnOw%7X&4w=1j7^#*)qrY1y%^Q7-aX*H zWpJuh{LMrQR4VnHj949Vt^=j{0PTmX{lLN^us9D^R13Df2&}vg?8qe8h2vlYkHIDa z;MGsSPYi%B-G-X%>xg!MU<4-=LM3d3X7#1>WpoB?A_!hG4?Z$VI@(rrSR$;q{C;^Z zau~0FLOV1Ldqxk8AH%S9w?VtL6SkN2Q12W7r~6fw2B8&p057JIUDgb5?y~Ip2S79O AgGXBFQki^;2(ij*x3>g?$BmtRz5U&6L literal 0 HcmV?d00001 diff --git a/Tests/tfmLib/tfmLib_test.py b/Tests/tfmLib/tfmLib_test.py new file mode 100644 index 000000000..ad74ed7fc --- /dev/null +++ b/Tests/tfmLib/tfmLib_test.py @@ -0,0 +1,90 @@ +import glob +import os + +import pytest + +from fontTools import tfmLib + +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") + + +@pytest.mark.parametrize("path", glob.glob(f"{DATA_DIR}/cm*.tfm")) +def test_read(path): + tfm = tfmLib.TFM(path) + assert tfm.designsize == 10.0 + assert tfm.fontdimens + assert len(tfm.fontdimens) >= 7 + assert tfm.extraheader == {} + assert tfm.right_boundary_char is None + assert tfm.left_boundary_char is None + assert len(tfm.chars) == 128 + + +def test_read_boundary_char(): + path = os.path.join(DATA_DIR, "dummy-space.tfm") + tfm = tfmLib.TFM(path) + assert tfm.right_boundary_char == 1 + assert tfm.left_boundary_char == 256 + + +def test_read_fontdimens_vanilla(): + path = os.path.join(DATA_DIR, "cmr10.tfm") + tfm = tfmLib.TFM(path) + assert tfm.fontdimens == { + "SLANT": 0.0, + "SPACE": 0.33333396911621094, + "STRETCH": 0.16666698455810547, + "SHRINK": 0.11111164093017578, + "XHEIGHT": 0.4305553436279297, + "QUAD": 1.0000028610229492, + "EXTRASPACE": 0.11111164093017578, + } + + +def test_read_fontdimens_mathex(): + path = os.path.join(DATA_DIR, "cmex10.tfm") + tfm = tfmLib.TFM(path) + assert tfm.fontdimens == { + "SLANT": 0.0, + "SPACE": 0.0, + "STRETCH": 0.0, + "SHRINK": 0.0, + "XHEIGHT": 0.4305553436279297, + "QUAD": 1.0000028610229492, + "EXTRASPACE": 0.0, + "DEFAULTRULETHICKNESS": 0.03999900817871094, + "BIGOPSPACING1": 0.11111164093017578, + "BIGOPSPACING2": 0.16666698455810547, + "BIGOPSPACING3": 0.19999980926513672, + "BIGOPSPACING4": 0.6000003814697266, + "BIGOPSPACING5": 0.10000038146972656, + } + + +def test_read_fontdimens_mathsy(): + path = os.path.join(DATA_DIR, "cmsy10.tfm") + tfm = tfmLib.TFM(path) + assert tfm.fontdimens == { + "SLANT": 0.25, + "SPACE": 0.0, + "STRETCH": 0.0, + "SHRINK": 0.0, + "XHEIGHT": 0.4305553436279297, + "QUAD": 1.0000028610229492, + "EXTRASPACE": 0.0, + "NUM1": 0.6765079498291016, + "NUM2": 0.39373207092285156, + "NUM3": 0.44373130798339844, + "DENOM1": 0.6859512329101562, + "DENOM2": 0.34484100341796875, + "SUP1": 0.41289234161376953, + "SUP2": 0.36289215087890625, + "SUP3": 0.28888893127441406, + "SUB1": 0.14999961853027344, + "SUB2": 0.24721717834472656, + "SUBDROP": 0.05000019073486328, + "SUPDROP": 0.3861083984375, + "DELIM1": 2.3899993896484375, + "DELIM2": 1.010000228881836, + "AXISHEIGHT": 0.25, + }