allow an encoding to be specified when parsing a Type 1 font

This commit is contained in:
justvanrossum 2018-04-12 14:39:44 +02:00
parent 0d474737d7
commit 52ea50f4bc
2 changed files with 11 additions and 8 deletions

View File

@ -43,13 +43,14 @@ class PSError(Exception): pass
class PSTokenizer(object): class PSTokenizer(object):
def __init__(self, buf=b''): def __init__(self, buf=b'', encoding="ascii"):
# Force self.buf to be a byte string # Force self.buf to be a byte string
buf = tobytes(buf) buf = tobytes(buf)
self.buf = buf self.buf = buf
self.len = len(buf) self.len = len(buf)
self.pos = 0 self.pos = 0
self.closed = False self.closed = False
self.encoding = encoding
def read(self, n=-1): def read(self, n=-1):
"""Read at most 'n' bytes from the buffer, or less if the read """Read at most 'n' bytes from the buffer, or less if the read
@ -122,7 +123,7 @@ class PSTokenizer(object):
_, nextpos = m.span() _, nextpos = m.span()
token = buf[pos:nextpos] token = buf[pos:nextpos]
self.pos = pos + len(token) self.pos = pos + len(token)
token = tostr(token, encoding='ascii') token = tostr(token, encoding=self.encoding)
return tokentype, token return tokentype, token
def skipwhite(self, whitematch=skipwhiteRE.match): def skipwhite(self, whitematch=skipwhiteRE.match):
@ -145,9 +146,10 @@ class PSTokenizer(object):
class PSInterpreter(PSOperators): class PSInterpreter(PSOperators):
def __init__(self): def __init__(self, encoding="ascii"):
systemdict = {} systemdict = {}
userdict = {} userdict = {}
self.encoding = encoding
self.dictstack = [systemdict, userdict] self.dictstack = [systemdict, userdict]
self.stack = [] self.stack = []
self.proclevel = 0 self.proclevel = 0
@ -174,7 +176,7 @@ class PSInterpreter(PSOperators):
self.suckoperators(systemdict, baseclass) self.suckoperators(systemdict, baseclass)
def interpret(self, data, getattr=getattr): def interpret(self, data, getattr=getattr):
tokenizer = self.tokenizer = PSTokenizer(data) tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
getnexttoken = tokenizer.getnexttoken getnexttoken = tokenizer.getnexttoken
do_token = self.do_token do_token = self.do_token
handle_object = self.handle_object handle_object = self.handle_object
@ -345,13 +347,13 @@ def unpack_item(item):
newitem = item.value newitem = item.value
return newitem return newitem
def suckfont(data): def suckfont(data, encoding="ascii"):
m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data) m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data)
if m: if m:
fontName = m.group(1) fontName = m.group(1)
else: else:
fontName = None fontName = None
interpreter = PSInterpreter() interpreter = PSInterpreter(encoding=encoding)
interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop") interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
interpreter.interpret(data) interpreter.interpret(data)
fontdir = interpreter.dictstack[0]['FontDirectory'].value fontdir = interpreter.dictstack[0]['FontDirectory'].value

View File

@ -49,11 +49,12 @@ class T1Font(object):
Type 1 fonts. Type 1 fonts.
""" """
def __init__(self, path=None): def __init__(self, path=None, encoding="ascii"):
if path is not None: if path is not None:
self.data, type = read(path) self.data, type = read(path)
else: else:
pass # XXX pass # XXX
self.encoding = encoding
def saveAs(self, path, type, dohex=False): def saveAs(self, path, type, dohex=False):
write(path, self.getData(), type, dohex) write(path, self.getData(), type, dohex)
@ -82,7 +83,7 @@ class T1Font(object):
def parse(self): def parse(self):
from fontTools.misc import psLib from fontTools.misc import psLib
from fontTools.misc import psCharStrings from fontTools.misc import psCharStrings
self.font = psLib.suckfont(self.data) self.font = psLib.suckfont(self.data, self.encoding)
charStrings = self.font["CharStrings"] charStrings = self.font["CharStrings"]
lenIV = self.font["Private"].get("lenIV", 4) lenIV = self.font["Private"].get("lenIV", 4)
assert lenIV >= 0 assert lenIV >= 0