Change all imports to use textTools module, except the test_py23.py test which is kept until we decide to remove the module (if ever).
385 lines
9.3 KiB
Python
385 lines
9.3 KiB
Python
from fontTools.misc.textTools import bytechr, byteord, bytesjoin, tobytes, tostr
|
|
from fontTools.misc import eexec
|
|
from .psOperators import (
|
|
PSOperators,
|
|
ps_StandardEncoding,
|
|
ps_array,
|
|
ps_boolean,
|
|
ps_dict,
|
|
ps_integer,
|
|
ps_literal,
|
|
ps_mark,
|
|
ps_name,
|
|
ps_operator,
|
|
ps_procedure,
|
|
ps_procmark,
|
|
ps_real,
|
|
ps_string,
|
|
)
|
|
import re
|
|
from collections.abc import Callable
|
|
from string import whitespace
|
|
import logging
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
ps_special = b'()<>[]{}%' # / is one too, but we take care of that one differently
|
|
|
|
skipwhiteRE = re.compile(bytesjoin([b"[", whitespace, b"]*"]))
|
|
endofthingPat = bytesjoin([b"[^][(){}<>/%", whitespace, b"]*"])
|
|
endofthingRE = re.compile(endofthingPat)
|
|
commentRE = re.compile(b"%[^\n\r]*")
|
|
|
|
# XXX This not entirely correct as it doesn't allow *nested* embedded parens:
|
|
stringPat = br"""
|
|
\(
|
|
(
|
|
(
|
|
[^()]* \ [()]
|
|
)
|
|
|
|
|
(
|
|
[^()]* \( [^()]* \)
|
|
)
|
|
)*
|
|
[^()]*
|
|
\)
|
|
"""
|
|
stringPat = b"".join(stringPat.split())
|
|
stringRE = re.compile(stringPat)
|
|
|
|
hexstringRE = re.compile(bytesjoin([b"<[", whitespace, b"0-9A-Fa-f]*>"]))
|
|
|
|
class PSTokenError(Exception): pass
|
|
class PSError(Exception): pass
|
|
|
|
|
|
class PSTokenizer(object):
|
|
|
|
def __init__(self, buf=b'', encoding="ascii"):
|
|
# Force self.buf to be a byte string
|
|
buf = tobytes(buf)
|
|
self.buf = buf
|
|
self.len = len(buf)
|
|
self.pos = 0
|
|
self.closed = False
|
|
self.encoding = encoding
|
|
|
|
def read(self, n=-1):
|
|
"""Read at most 'n' bytes from the buffer, or less if the read
|
|
hits EOF before obtaining 'n' bytes.
|
|
If 'n' is negative or omitted, read all data until EOF is reached.
|
|
"""
|
|
if self.closed:
|
|
raise ValueError("I/O operation on closed file")
|
|
if n is None or n < 0:
|
|
newpos = self.len
|
|
else:
|
|
newpos = min(self.pos+n, self.len)
|
|
r = self.buf[self.pos:newpos]
|
|
self.pos = newpos
|
|
return r
|
|
|
|
def close(self):
|
|
if not self.closed:
|
|
self.closed = True
|
|
del self.buf, self.pos
|
|
|
|
def getnexttoken(self,
|
|
# localize some stuff, for performance
|
|
len=len,
|
|
ps_special=ps_special,
|
|
stringmatch=stringRE.match,
|
|
hexstringmatch=hexstringRE.match,
|
|
commentmatch=commentRE.match,
|
|
endmatch=endofthingRE.match):
|
|
|
|
self.skipwhite()
|
|
if self.pos >= self.len:
|
|
return None, None
|
|
pos = self.pos
|
|
buf = self.buf
|
|
char = bytechr(byteord(buf[pos]))
|
|
if char in ps_special:
|
|
if char in b'{}[]':
|
|
tokentype = 'do_special'
|
|
token = char
|
|
elif char == b'%':
|
|
tokentype = 'do_comment'
|
|
_, nextpos = commentmatch(buf, pos).span()
|
|
token = buf[pos:nextpos]
|
|
elif char == b'(':
|
|
tokentype = 'do_string'
|
|
m = stringmatch(buf, pos)
|
|
if m is None:
|
|
raise PSTokenError('bad string at character %d' % pos)
|
|
_, nextpos = m.span()
|
|
token = buf[pos:nextpos]
|
|
elif char == b'<':
|
|
tokentype = 'do_hexstring'
|
|
m = hexstringmatch(buf, pos)
|
|
if m is None:
|
|
raise PSTokenError('bad hexstring at character %d' % pos)
|
|
_, nextpos = m.span()
|
|
token = buf[pos:nextpos]
|
|
else:
|
|
raise PSTokenError('bad token at character %d' % pos)
|
|
else:
|
|
if char == b'/':
|
|
tokentype = 'do_literal'
|
|
m = endmatch(buf, pos+1)
|
|
else:
|
|
tokentype = ''
|
|
m = endmatch(buf, pos)
|
|
if m is None:
|
|
raise PSTokenError('bad token at character %d' % pos)
|
|
_, nextpos = m.span()
|
|
token = buf[pos:nextpos]
|
|
self.pos = pos + len(token)
|
|
token = tostr(token, encoding=self.encoding)
|
|
return tokentype, token
|
|
|
|
def skipwhite(self, whitematch=skipwhiteRE.match):
|
|
_, nextpos = whitematch(self.buf, self.pos).span()
|
|
self.pos = nextpos
|
|
|
|
def starteexec(self):
|
|
self.pos = self.pos + 1
|
|
self.dirtybuf = self.buf[self.pos:]
|
|
self.buf, R = eexec.decrypt(self.dirtybuf, 55665)
|
|
self.len = len(self.buf)
|
|
self.pos = 4
|
|
|
|
def stopeexec(self):
|
|
if not hasattr(self, 'dirtybuf'):
|
|
return
|
|
self.buf = self.dirtybuf
|
|
del self.dirtybuf
|
|
|
|
|
|
class PSInterpreter(PSOperators):
|
|
|
|
def __init__(self, encoding="ascii"):
|
|
systemdict = {}
|
|
userdict = {}
|
|
self.encoding = encoding
|
|
self.dictstack = [systemdict, userdict]
|
|
self.stack = []
|
|
self.proclevel = 0
|
|
self.procmark = ps_procmark()
|
|
self.fillsystemdict()
|
|
|
|
def fillsystemdict(self):
|
|
systemdict = self.dictstack[0]
|
|
systemdict['['] = systemdict['mark'] = self.mark = ps_mark()
|
|
systemdict[']'] = ps_operator(']', self.do_makearray)
|
|
systemdict['true'] = ps_boolean(1)
|
|
systemdict['false'] = ps_boolean(0)
|
|
systemdict['StandardEncoding'] = ps_array(ps_StandardEncoding)
|
|
systemdict['FontDirectory'] = ps_dict({})
|
|
self.suckoperators(systemdict, self.__class__)
|
|
|
|
def suckoperators(self, systemdict, klass):
|
|
for name in dir(klass):
|
|
attr = getattr(self, name)
|
|
if isinstance(attr, Callable) and name[:3] == 'ps_':
|
|
name = name[3:]
|
|
systemdict[name] = ps_operator(name, attr)
|
|
for baseclass in klass.__bases__:
|
|
self.suckoperators(systemdict, baseclass)
|
|
|
|
def interpret(self, data, getattr=getattr):
|
|
tokenizer = self.tokenizer = PSTokenizer(data, self.encoding)
|
|
getnexttoken = tokenizer.getnexttoken
|
|
do_token = self.do_token
|
|
handle_object = self.handle_object
|
|
try:
|
|
while 1:
|
|
tokentype, token = getnexttoken()
|
|
if not token:
|
|
break
|
|
if tokentype:
|
|
handler = getattr(self, tokentype)
|
|
object = handler(token)
|
|
else:
|
|
object = do_token(token)
|
|
if object is not None:
|
|
handle_object(object)
|
|
tokenizer.close()
|
|
self.tokenizer = None
|
|
except:
|
|
if self.tokenizer is not None:
|
|
log.debug(
|
|
'ps error:\n'
|
|
'- - - - - - -\n'
|
|
'%s\n'
|
|
'>>>\n'
|
|
'%s\n'
|
|
'- - - - - - -',
|
|
self.tokenizer.buf[self.tokenizer.pos-50:self.tokenizer.pos],
|
|
self.tokenizer.buf[self.tokenizer.pos:self.tokenizer.pos+50])
|
|
raise
|
|
|
|
def handle_object(self, object):
|
|
if not (self.proclevel or object.literal or object.type == 'proceduretype'):
|
|
if object.type != 'operatortype':
|
|
object = self.resolve_name(object.value)
|
|
if object.literal:
|
|
self.push(object)
|
|
else:
|
|
if object.type == 'proceduretype':
|
|
self.call_procedure(object)
|
|
else:
|
|
object.function()
|
|
else:
|
|
self.push(object)
|
|
|
|
def call_procedure(self, proc):
|
|
handle_object = self.handle_object
|
|
for item in proc.value:
|
|
handle_object(item)
|
|
|
|
def resolve_name(self, name):
|
|
dictstack = self.dictstack
|
|
for i in range(len(dictstack)-1, -1, -1):
|
|
if name in dictstack[i]:
|
|
return dictstack[i][name]
|
|
raise PSError('name error: ' + str(name))
|
|
|
|
def do_token(self, token,
|
|
int=int,
|
|
float=float,
|
|
ps_name=ps_name,
|
|
ps_integer=ps_integer,
|
|
ps_real=ps_real):
|
|
try:
|
|
num = int(token)
|
|
except (ValueError, OverflowError):
|
|
try:
|
|
num = float(token)
|
|
except (ValueError, OverflowError):
|
|
if '#' in token:
|
|
hashpos = token.find('#')
|
|
try:
|
|
base = int(token[:hashpos])
|
|
num = int(token[hashpos+1:], base)
|
|
except (ValueError, OverflowError):
|
|
return ps_name(token)
|
|
else:
|
|
return ps_integer(num)
|
|
else:
|
|
return ps_name(token)
|
|
else:
|
|
return ps_real(num)
|
|
else:
|
|
return ps_integer(num)
|
|
|
|
def do_comment(self, token):
|
|
pass
|
|
|
|
def do_literal(self, token):
|
|
return ps_literal(token[1:])
|
|
|
|
def do_string(self, token):
|
|
return ps_string(token[1:-1])
|
|
|
|
def do_hexstring(self, token):
|
|
hexStr = "".join(token[1:-1].split())
|
|
if len(hexStr) % 2:
|
|
hexStr = hexStr + '0'
|
|
cleanstr = []
|
|
for i in range(0, len(hexStr), 2):
|
|
cleanstr.append(chr(int(hexStr[i:i+2], 16)))
|
|
cleanstr = "".join(cleanstr)
|
|
return ps_string(cleanstr)
|
|
|
|
def do_special(self, token):
|
|
if token == '{':
|
|
self.proclevel = self.proclevel + 1
|
|
return self.procmark
|
|
elif token == '}':
|
|
proc = []
|
|
while 1:
|
|
topobject = self.pop()
|
|
if topobject == self.procmark:
|
|
break
|
|
proc.append(topobject)
|
|
self.proclevel = self.proclevel - 1
|
|
proc.reverse()
|
|
return ps_procedure(proc)
|
|
elif token == '[':
|
|
return self.mark
|
|
elif token == ']':
|
|
return ps_name(']')
|
|
else:
|
|
raise PSTokenError('huh?')
|
|
|
|
def push(self, object):
|
|
self.stack.append(object)
|
|
|
|
def pop(self, *types):
|
|
stack = self.stack
|
|
if not stack:
|
|
raise PSError('stack underflow')
|
|
object = stack[-1]
|
|
if types:
|
|
if object.type not in types:
|
|
raise PSError('typecheck, expected %s, found %s' % (repr(types), object.type))
|
|
del stack[-1]
|
|
return object
|
|
|
|
def do_makearray(self):
|
|
array = []
|
|
while 1:
|
|
topobject = self.pop()
|
|
if topobject == self.mark:
|
|
break
|
|
array.append(topobject)
|
|
array.reverse()
|
|
self.push(ps_array(array))
|
|
|
|
def close(self):
|
|
"""Remove circular references."""
|
|
del self.stack
|
|
del self.dictstack
|
|
|
|
|
|
def unpack_item(item):
|
|
tp = type(item.value)
|
|
if tp == dict:
|
|
newitem = {}
|
|
for key, value in item.value.items():
|
|
newitem[key] = unpack_item(value)
|
|
elif tp == list:
|
|
newitem = [None] * len(item.value)
|
|
for i in range(len(item.value)):
|
|
newitem[i] = unpack_item(item.value[i])
|
|
if item.type == 'proceduretype':
|
|
newitem = tuple(newitem)
|
|
else:
|
|
newitem = item.value
|
|
return newitem
|
|
|
|
def suckfont(data, encoding="ascii"):
|
|
m = re.search(br"/FontName\s+/([^ \t\n\r]+)\s+def", data)
|
|
if m:
|
|
fontName = m.group(1)
|
|
else:
|
|
fontName = None
|
|
interpreter = PSInterpreter(encoding=encoding)
|
|
interpreter.interpret(b"/Helvetica 4 dict dup /Encoding StandardEncoding put definefont pop")
|
|
interpreter.interpret(data)
|
|
fontdir = interpreter.dictstack[0]['FontDirectory'].value
|
|
if fontName in fontdir:
|
|
rawfont = fontdir[fontName]
|
|
else:
|
|
# fall back, in case fontName wasn't found
|
|
fontNames = list(fontdir.keys())
|
|
if len(fontNames) > 1:
|
|
fontNames.remove("Helvetica")
|
|
fontNames.sort()
|
|
rawfont = fontdir[fontNames[0]]
|
|
interpreter.close()
|
|
return unpack_item(rawfont)
|