[tfmLib] New library for reading TFM files
New library for reading TeX Font Metrics files. Does not support writing them back. Fixes https://github.com/fonttools/fonttools/issues/2352
This commit is contained in:
parent
383e70fc39
commit
a7ac2de5cd
460
Lib/fontTools/tfmLib.py
Normal file
460
Lib/fontTools/tfmLib.py
Normal file
@ -0,0 +1,460 @@
|
||||
"""Module for reading TFM (TeX Font Metrics) files.
|
||||
|
||||
The TFM format is described in the TFtoPL WEB source code, whose typeset form
|
||||
can be found on `CTAN <http://mirrors.ctan.org/info/knuth-pdf/texware/tftopl.pdf>`_.
|
||||
|
||||
>>> from fontTools.tfmLib import TFM
|
||||
>>> tfm = TFM("Tests/tfmLib/data/cmr10.tfm")
|
||||
>>>
|
||||
>>> # Accessing an attribute gets you metadata.
|
||||
>>> tfm.checksum
|
||||
1274110073
|
||||
>>> tfm.designsize
|
||||
10.0
|
||||
>>> tfm.codingscheme
|
||||
'TeX text'
|
||||
>>> tfm.family
|
||||
'CMR'
|
||||
>>> tfm.seven_bit_safe_flag
|
||||
False
|
||||
>>> tfm.face
|
||||
234
|
||||
>>> tfm.extraheader
|
||||
{}
|
||||
>>> tfm.fontdimens
|
||||
{'SLANT': 0.0, 'SPACE': 0.33333396911621094, 'STRETCH': 0.16666698455810547, 'SHRINK': 0.11111164093017578, 'XHEIGHT': 0.4305553436279297, 'QUAD': 1.0000028610229492, 'EXTRASPACE': 0.11111164093017578}
|
||||
>>> # Accessing a character gets you its metrics.
|
||||
>>> # “width” is always available, other metrics are available only when
|
||||
>>> # applicable. All values are relative to “designsize”.
|
||||
>>> tfm.chars[ord("g")]
|
||||
{'width': 0.5000019073486328, 'height': 0.4305553436279297, 'depth': 0.1944446563720703, 'italic': 0.013888359069824219}
|
||||
>>> # Kerning and ligature can be accessed as well.
|
||||
>>> tfm.kerning[ord("c")]
|
||||
{104: -0.02777862548828125, 107: -0.02777862548828125}
|
||||
>>> tfm.ligatures[ord("f")]
|
||||
{105: ('LIG', 12), 102: ('LIG', 11), 108: ('LIG', 13)}
|
||||
"""
|
||||
|
||||
from types import SimpleNamespace
|
||||
|
||||
from fontTools.misc.sstruct import calcsize, unpack, unpack2
|
||||
|
||||
SIZES_FORMAT = """
|
||||
>
|
||||
lf: h # length of the entire file, in words
|
||||
lh: h # length of the header data, in words
|
||||
bc: h # smallest character code in the font
|
||||
ec: h # largest character code in the font
|
||||
nw: h # number of words in the width table
|
||||
nh: h # number of words in the height table
|
||||
nd: h # number of words in the depth table
|
||||
ni: h # number of words in the italic correction table
|
||||
nl: h # number of words in the ligature/kern table
|
||||
nk: h # number of words in the kern table
|
||||
ne: h # number of words in the extensible character table
|
||||
np: h # number of font parameter words
|
||||
"""
|
||||
|
||||
SIZES_SIZE = calcsize(SIZES_FORMAT)
|
||||
|
||||
FIXED_FORMAT = "12.20F"
|
||||
|
||||
HEADER_FORMAT1 = f"""
|
||||
>
|
||||
checksum: L
|
||||
designsize: {FIXED_FORMAT}
|
||||
"""
|
||||
|
||||
HEADER_FORMAT2 = f"""
|
||||
{HEADER_FORMAT1}
|
||||
codingscheme: 40p
|
||||
"""
|
||||
|
||||
HEADER_FORMAT3 = f"""
|
||||
{HEADER_FORMAT2}
|
||||
family: 20p
|
||||
"""
|
||||
|
||||
HEADER_FORMAT4 = f"""
|
||||
{HEADER_FORMAT3}
|
||||
seven_bit_safe_flag: ?
|
||||
ignored: x
|
||||
ignored: x
|
||||
face: B
|
||||
"""
|
||||
|
||||
HEADER_SIZE1 = calcsize(HEADER_FORMAT1)
|
||||
HEADER_SIZE2 = calcsize(HEADER_FORMAT2)
|
||||
HEADER_SIZE3 = calcsize(HEADER_FORMAT3)
|
||||
HEADER_SIZE4 = calcsize(HEADER_FORMAT4)
|
||||
|
||||
LIG_KERN_COMMAND = """
|
||||
>
|
||||
skip_byte: B
|
||||
next_char: B
|
||||
op_byte: B
|
||||
remainder: B
|
||||
"""
|
||||
|
||||
BASE_PARAMS = [
|
||||
"SLANT",
|
||||
"SPACE",
|
||||
"STRETCH",
|
||||
"SHRINK",
|
||||
"XHEIGHT",
|
||||
"QUAD",
|
||||
"EXTRASPACE",
|
||||
]
|
||||
|
||||
MATHSY_PARAMS = [
|
||||
"NUM1",
|
||||
"NUM2",
|
||||
"NUM3",
|
||||
"DENOM1",
|
||||
"DENOM2",
|
||||
"SUP1",
|
||||
"SUP2",
|
||||
"SUP3",
|
||||
"SUB1",
|
||||
"SUB2",
|
||||
"SUPDROP",
|
||||
"SUBDROP",
|
||||
"DELIM1",
|
||||
"DELIM2",
|
||||
"AXISHEIGHT",
|
||||
]
|
||||
|
||||
MATHEX_PARAMS = [
|
||||
"DEFAULTRULETHICKNESS",
|
||||
"BIGOPSPACING1",
|
||||
"BIGOPSPACING2",
|
||||
"BIGOPSPACING3",
|
||||
"BIGOPSPACING4",
|
||||
"BIGOPSPACING5",
|
||||
]
|
||||
|
||||
VANILLA = 0
|
||||
MATHSY = 1
|
||||
MATHEX = 2
|
||||
|
||||
UNREACHABLE = 0
|
||||
PASSTHROUGH = 1
|
||||
ACCESSABLE = 2
|
||||
|
||||
NO_TAG = 0
|
||||
LIG_TAG = 1
|
||||
LIST_TAG = 2
|
||||
EXT_TAG = 3
|
||||
|
||||
STOP_FLAG = 128
|
||||
KERN_FLAG = 128
|
||||
|
||||
|
||||
class TFMException(Exception):
|
||||
def __init__(self, message):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class TFM:
|
||||
def __init__(self, file):
|
||||
self._read(file)
|
||||
|
||||
def __repr__(self):
|
||||
return (
|
||||
f"<TFM"
|
||||
f" for {self.family}"
|
||||
f" in {self.codingscheme}"
|
||||
f" at {self.designsize:g}pt>"
|
||||
)
|
||||
|
||||
def _read(self, file):
|
||||
if hasattr(file, "read"):
|
||||
data = file.read()
|
||||
else:
|
||||
with open(file, "rb") as fp:
|
||||
data = fp.read()
|
||||
|
||||
self._data = data
|
||||
|
||||
if len(data) < SIZES_SIZE:
|
||||
raise TFMException("Too short input file")
|
||||
|
||||
sizes = SimpleNamespace()
|
||||
unpack2(SIZES_FORMAT, data, sizes)
|
||||
|
||||
# Do some file structure sanity checks.
|
||||
# TeX and TFtoPL do additional functional checks and might even correct
|
||||
# “errors” in the input file, but we instead try to output the file as
|
||||
# it is as long as it is parsable, even if the data make no sense.
|
||||
|
||||
if sizes.lf < 0:
|
||||
raise TFMException("The file claims to have negative or zero length!")
|
||||
|
||||
if len(data) < sizes.lf * 4:
|
||||
raise TFMException("The file has fewer bytes than it claims!")
|
||||
|
||||
for name, length in vars(sizes).items():
|
||||
if length < 0:
|
||||
raise TFMException("The subfile size: '{name}' is negative!")
|
||||
|
||||
if sizes.lh < 2:
|
||||
raise TFMException(f"The header length is only {sizes.lh}!")
|
||||
|
||||
if sizes.bc > sizes.ec + 1 or sizes.ec > 255:
|
||||
raise TFMException(
|
||||
f"The character code range {sizes.bc}..{sizes.ec} is illegal!"
|
||||
)
|
||||
|
||||
if sizes.nw == 0 or sizes.nh == 0 or sizes.nd == 0 or sizes.ni == 0:
|
||||
raise TFMException("Incomplete subfiles for character dimensions!")
|
||||
|
||||
if sizes.ne > 256:
|
||||
raise TFMException(f"There are {ne} extensible recipes!")
|
||||
|
||||
if sizes.lf != (
|
||||
6
|
||||
+ sizes.lh
|
||||
+ (sizes.ec - sizes.bc + 1)
|
||||
+ sizes.nw
|
||||
+ sizes.nh
|
||||
+ sizes.nd
|
||||
+ sizes.ni
|
||||
+ sizes.nl
|
||||
+ sizes.nk
|
||||
+ sizes.ne
|
||||
+ sizes.np
|
||||
):
|
||||
raise TFMException("Subfile sizes don’t add up to the stated total")
|
||||
|
||||
# Subfile offsets, used in the helper function below. These all are
|
||||
# 32-bit word offsets not 8-bit byte offsets.
|
||||
char_base = 6 + sizes.lh - sizes.bc
|
||||
width_base = char_base + sizes.ec + 1
|
||||
height_base = width_base + sizes.nw
|
||||
depth_base = height_base + sizes.nh
|
||||
italic_base = depth_base + sizes.nd
|
||||
lig_kern_base = italic_base + sizes.ni
|
||||
kern_base = lig_kern_base + sizes.nl
|
||||
exten_base = kern_base + sizes.nk
|
||||
param_base = exten_base + sizes.ne
|
||||
|
||||
# Helper functions for accessing individual data. If this looks
|
||||
# nonidiomatic Python, I blame the effect of reading the literate WEB
|
||||
# documentation of TFtoPL.
|
||||
def char_info(c):
|
||||
return 4 * (char_base + c)
|
||||
|
||||
def width_index(c):
|
||||
return data[char_info(c)]
|
||||
|
||||
def noneexistent(c):
|
||||
return c < sizes.bc or c > sizes.ec or width_index(c) == 0
|
||||
|
||||
def height_index(c):
|
||||
return data[char_info(c) + 1] // 16
|
||||
|
||||
def depth_index(c):
|
||||
return data[char_info(c) + 1] % 16
|
||||
|
||||
def italic_index(c):
|
||||
return data[char_info(c) + 2] // 4
|
||||
|
||||
def tag(c):
|
||||
return data[char_info(c) + 2] % 4
|
||||
|
||||
def remainder(c):
|
||||
return data[char_info(c) + 3]
|
||||
|
||||
def width(c):
|
||||
r = 4 * (width_base + width_index(c))
|
||||
return read_fixed(r, "v")["v"]
|
||||
|
||||
def height(c):
|
||||
r = 4 * (height_base + height_index(c))
|
||||
return read_fixed(r, "v")["v"]
|
||||
|
||||
def depth(c):
|
||||
r = 4 * (depth_base + depth_index(c))
|
||||
return read_fixed(r, "v")["v"]
|
||||
|
||||
def italic(c):
|
||||
r = 4 * (italic_base + italic_index(c))
|
||||
return read_fixed(r, "v")["v"]
|
||||
|
||||
def exten(c):
|
||||
return 4 * (exten_base + remainder(c))
|
||||
|
||||
def lig_step(i):
|
||||
return 4 * (lig_kern_base + i)
|
||||
|
||||
def lig_kern_command(i):
|
||||
command = SimpleNamespace()
|
||||
unpack2(LIG_KERN_COMMAND, data[i:], command)
|
||||
return command
|
||||
|
||||
def kern(i):
|
||||
r = 4 * (kern_base + i)
|
||||
return read_fixed(r, "v")["v"]
|
||||
|
||||
def param(i):
|
||||
return 4 * (param_base + i)
|
||||
|
||||
def read_fixed(index, key, obj=None):
|
||||
ret = unpack2(f">;{key}:{FIXED_FORMAT}", data[index:], obj)
|
||||
return ret[0]
|
||||
|
||||
# Set all attributes to empty values regardless of the header size.
|
||||
unpack(HEADER_FORMAT4, [0] * HEADER_SIZE4, self)
|
||||
|
||||
offset = 24
|
||||
length = sizes.lh * 4
|
||||
self.extraheader = {}
|
||||
if length >= HEADER_SIZE4:
|
||||
rest = unpack2(HEADER_FORMAT4, data[offset:], self)[1]
|
||||
if self.face < 18:
|
||||
s = self.face % 2
|
||||
b = self.face // 2
|
||||
self.face = "MBL"[b % 3] + "RI"[s] + "RCE"[b // 3]
|
||||
for i in range(sizes.lh - HEADER_SIZE4 // 4):
|
||||
rest = unpack2(f">;HEADER{i + 18}:l", rest, self.extraheader)[1]
|
||||
elif length >= HEADER_SIZE3:
|
||||
unpack2(HEADER_FORMAT3, data[offset:], self)
|
||||
elif length >= HEADER_SIZE2:
|
||||
unpack2(HEADER_FORMAT2, data[offset:], self)
|
||||
elif length >= HEADER_SIZE1:
|
||||
unpack2(HEADER_FORMAT1, data[offset:], self)
|
||||
|
||||
self.fonttype = VANILLA
|
||||
scheme = self.codingscheme.upper()
|
||||
if scheme.startswith("TEX MATH SY"):
|
||||
self.fonttype = MATHSY
|
||||
elif scheme.startswith("TEX MATH EX"):
|
||||
self.fonttype = MATHEX
|
||||
|
||||
self.fontdimens = {}
|
||||
for i in range(sizes.np):
|
||||
name = f"PARAMETER{i+1}"
|
||||
if i <= 6:
|
||||
name = BASE_PARAMS[i]
|
||||
elif self.fonttype == MATHSY and i <= 21:
|
||||
name = MATHSY_PARAMS[i - 7]
|
||||
elif self.fonttype == MATHEX and i <= 12:
|
||||
name = MATHEX_PARAMS[i - 7]
|
||||
read_fixed(param(i), name, self.fontdimens)
|
||||
|
||||
lig_kern_map = {}
|
||||
self.right_boundary_char = None
|
||||
self.left_boundary_char = None
|
||||
if sizes.nl > 0:
|
||||
cmd = lig_kern_command(lig_step(0))
|
||||
if cmd.skip_byte == 255:
|
||||
self.right_boundary_char = cmd.next_char
|
||||
|
||||
cmd = lig_kern_command(lig_step((sizes.nl - 1)))
|
||||
if cmd.skip_byte == 255:
|
||||
self.left_boundary_char = 256
|
||||
r = 256 * cmd.op_byte + cmd.remainder
|
||||
lig_kern_map[self.left_boundary_char] = r
|
||||
|
||||
self.chars = {}
|
||||
for c in range(sizes.bc, sizes.ec + 1):
|
||||
if width_index(c) > 0:
|
||||
self.chars[c] = info = {}
|
||||
info["width"] = width(c)
|
||||
if height_index(c) > 0:
|
||||
info["height"] = height(c)
|
||||
if depth_index(c) > 0:
|
||||
info["depth"] = depth(c)
|
||||
if italic_index(c) > 0:
|
||||
info["italic"] = italic(c)
|
||||
char_tag = tag(c)
|
||||
if char_tag == NO_TAG:
|
||||
pass
|
||||
elif char_tag == LIG_TAG:
|
||||
lig_kern_map[c] = remainder(c)
|
||||
elif char_tag == LIST_TAG:
|
||||
info["nextlarger"] = remainder(c)
|
||||
elif char_tag == EXT_TAG:
|
||||
info["varchar"] = varchar = {}
|
||||
for i in range(4):
|
||||
part = data[exten(c) + i]
|
||||
if i == 3 or part > 0:
|
||||
name = "rep"
|
||||
if i == 0:
|
||||
name = "top"
|
||||
elif i == 1:
|
||||
name = "mid"
|
||||
elif i == 2:
|
||||
name = "bot"
|
||||
if noneexistent(part):
|
||||
varchar[name] = c
|
||||
else:
|
||||
varchar[name] = part
|
||||
|
||||
self.ligatures = {}
|
||||
self.kerning = {}
|
||||
for c, i in sorted(lig_kern_map.items()):
|
||||
cmd = lig_kern_command(lig_step(i))
|
||||
if cmd.skip_byte > STOP_FLAG:
|
||||
i = 256 * cmd.op_byte + cmd.remainder
|
||||
|
||||
while i < sizes.nl:
|
||||
cmd = lig_kern_command(lig_step(i))
|
||||
if cmd.skip_byte > STOP_FLAG:
|
||||
pass
|
||||
else:
|
||||
if cmd.op_byte >= KERN_FLAG:
|
||||
r = 256 * (cmd.op_byte - KERN_FLAG) + cmd.remainder
|
||||
self.kerning.setdefault(c, {})[cmd.next_char] = kern(r)
|
||||
else:
|
||||
r = cmd.op_byte
|
||||
if r == 4 or (r > 7 and r != 11):
|
||||
# Ligature step with nonstandard code, we output
|
||||
# the code verbatim.
|
||||
lig = r
|
||||
else:
|
||||
lig = ""
|
||||
if r % 4 > 1:
|
||||
lig += "/"
|
||||
lig += "LIG"
|
||||
if r % 2 != 0:
|
||||
lig += "/"
|
||||
while r > 3:
|
||||
lig += ">"
|
||||
r -= 4
|
||||
self.ligatures.setdefault(c, {})[cmd.next_char] = (
|
||||
lig,
|
||||
cmd.remainder,
|
||||
)
|
||||
|
||||
if cmd.skip_byte >= STOP_FLAG:
|
||||
break
|
||||
i += cmd.skip_byte + 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
tfm = TFM(sys.argv[1])
|
||||
print(
|
||||
"\n".join(
|
||||
x
|
||||
for x in [
|
||||
f"tfm.checksum={tfm.checksum}",
|
||||
f"tfm.designsize={tfm.designsize}",
|
||||
f"tfm.codingscheme={tfm.codingscheme}",
|
||||
f"tfm.fonttype={tfm.fonttype}",
|
||||
f"tfm.family={tfm.family}",
|
||||
f"tfm.seven_bit_safe_flag={tfm.seven_bit_safe_flag}",
|
||||
f"tfm.face={tfm.face}",
|
||||
f"tfm.extraheader={tfm.extraheader}",
|
||||
f"tfm.fontdimens={tfm.fontdimens}",
|
||||
f"tfm.right_boundary_char={tfm.right_boundary_char}",
|
||||
f"tfm.left_boundary_char={tfm.left_boundary_char}",
|
||||
f"tfm.kerning={tfm.kerning}",
|
||||
f"tfm.ligatures={tfm.ligatures}",
|
||||
f"tfm.chars={tfm.chars}",
|
||||
]
|
||||
)
|
||||
)
|
||||
print(tfm)
|
BIN
Tests/tfmLib/data/cmex10.tfm
Normal file
BIN
Tests/tfmLib/data/cmex10.tfm
Normal file
Binary file not shown.
BIN
Tests/tfmLib/data/cmr10.tfm
Normal file
BIN
Tests/tfmLib/data/cmr10.tfm
Normal file
Binary file not shown.
BIN
Tests/tfmLib/data/cmsy10.tfm
Normal file
BIN
Tests/tfmLib/data/cmsy10.tfm
Normal file
Binary file not shown.
BIN
Tests/tfmLib/data/dummy-space.tfm
Normal file
BIN
Tests/tfmLib/data/dummy-space.tfm
Normal file
Binary file not shown.
90
Tests/tfmLib/tfmLib_test.py
Normal file
90
Tests/tfmLib/tfmLib_test.py
Normal file
@ -0,0 +1,90 @@
|
||||
import glob
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from fontTools import tfmLib
|
||||
|
||||
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("path", glob.glob(f"{DATA_DIR}/cm*.tfm"))
|
||||
def test_read(path):
|
||||
tfm = tfmLib.TFM(path)
|
||||
assert tfm.designsize == 10.0
|
||||
assert tfm.fontdimens
|
||||
assert len(tfm.fontdimens) >= 7
|
||||
assert tfm.extraheader == {}
|
||||
assert tfm.right_boundary_char is None
|
||||
assert tfm.left_boundary_char is None
|
||||
assert len(tfm.chars) == 128
|
||||
|
||||
|
||||
def test_read_boundary_char():
|
||||
path = os.path.join(DATA_DIR, "dummy-space.tfm")
|
||||
tfm = tfmLib.TFM(path)
|
||||
assert tfm.right_boundary_char == 1
|
||||
assert tfm.left_boundary_char == 256
|
||||
|
||||
|
||||
def test_read_fontdimens_vanilla():
|
||||
path = os.path.join(DATA_DIR, "cmr10.tfm")
|
||||
tfm = tfmLib.TFM(path)
|
||||
assert tfm.fontdimens == {
|
||||
"SLANT": 0.0,
|
||||
"SPACE": 0.33333396911621094,
|
||||
"STRETCH": 0.16666698455810547,
|
||||
"SHRINK": 0.11111164093017578,
|
||||
"XHEIGHT": 0.4305553436279297,
|
||||
"QUAD": 1.0000028610229492,
|
||||
"EXTRASPACE": 0.11111164093017578,
|
||||
}
|
||||
|
||||
|
||||
def test_read_fontdimens_mathex():
|
||||
path = os.path.join(DATA_DIR, "cmex10.tfm")
|
||||
tfm = tfmLib.TFM(path)
|
||||
assert tfm.fontdimens == {
|
||||
"SLANT": 0.0,
|
||||
"SPACE": 0.0,
|
||||
"STRETCH": 0.0,
|
||||
"SHRINK": 0.0,
|
||||
"XHEIGHT": 0.4305553436279297,
|
||||
"QUAD": 1.0000028610229492,
|
||||
"EXTRASPACE": 0.0,
|
||||
"DEFAULTRULETHICKNESS": 0.03999900817871094,
|
||||
"BIGOPSPACING1": 0.11111164093017578,
|
||||
"BIGOPSPACING2": 0.16666698455810547,
|
||||
"BIGOPSPACING3": 0.19999980926513672,
|
||||
"BIGOPSPACING4": 0.6000003814697266,
|
||||
"BIGOPSPACING5": 0.10000038146972656,
|
||||
}
|
||||
|
||||
|
||||
def test_read_fontdimens_mathsy():
|
||||
path = os.path.join(DATA_DIR, "cmsy10.tfm")
|
||||
tfm = tfmLib.TFM(path)
|
||||
assert tfm.fontdimens == {
|
||||
"SLANT": 0.25,
|
||||
"SPACE": 0.0,
|
||||
"STRETCH": 0.0,
|
||||
"SHRINK": 0.0,
|
||||
"XHEIGHT": 0.4305553436279297,
|
||||
"QUAD": 1.0000028610229492,
|
||||
"EXTRASPACE": 0.0,
|
||||
"NUM1": 0.6765079498291016,
|
||||
"NUM2": 0.39373207092285156,
|
||||
"NUM3": 0.44373130798339844,
|
||||
"DENOM1": 0.6859512329101562,
|
||||
"DENOM2": 0.34484100341796875,
|
||||
"SUP1": 0.41289234161376953,
|
||||
"SUP2": 0.36289215087890625,
|
||||
"SUP3": 0.28888893127441406,
|
||||
"SUB1": 0.14999961853027344,
|
||||
"SUB2": 0.24721717834472656,
|
||||
"SUBDROP": 0.05000019073486328,
|
||||
"SUPDROP": 0.3861083984375,
|
||||
"DELIM1": 2.3899993896484375,
|
||||
"DELIM2": 1.010000228881836,
|
||||
"AXISHEIGHT": 0.25,
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user