[unicodedata] use bisect.bisect_right function
CPython comes with a fast C implementation of bisect module. This gives 4 to 5 times speed-ups over my pure-python version.
This commit is contained in:
parent
5b3c189f6d
commit
3442da1529
@ -2,7 +2,7 @@ from __future__ import (
|
|||||||
print_function, division, absolute_import, unicode_literals)
|
print_function, division, absolute_import, unicode_literals)
|
||||||
from fontTools.misc.py23 import *
|
from fontTools.misc.py23 import *
|
||||||
|
|
||||||
import functools
|
from bisect import bisect_right
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# use unicodedata backport compatible with python2:
|
# use unicodedata backport compatible with python2:
|
||||||
@ -12,7 +12,7 @@ except ImportError:
|
|||||||
# fall back to built-in unicodedata (possibly outdated)
|
# fall back to built-in unicodedata (possibly outdated)
|
||||||
from unicodedata import *
|
from unicodedata import *
|
||||||
|
|
||||||
from .scripts import SCRIPT_RANGES
|
from .scripts import SCRIPT_RANGES, SCRIPT_NAMES
|
||||||
|
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
@ -36,36 +36,16 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def _memoize(func):
|
|
||||||
# Decorator that caches a function's return value each time it is
|
|
||||||
# called, and returns the cached value if called later with the same
|
|
||||||
# argument.
|
|
||||||
cache = func.cache = {}
|
|
||||||
|
|
||||||
@functools.wraps(func)
|
|
||||||
def wrapper(arg):
|
|
||||||
if arg not in cache:
|
|
||||||
cache[arg] = func(arg)
|
|
||||||
return cache[arg]
|
|
||||||
return wrapper
|
|
||||||
|
|
||||||
|
|
||||||
@_memoize
|
|
||||||
def script(char):
|
def script(char):
|
||||||
"""For the unicode character 'char' return the script name."""
|
|
||||||
code = byteord(char)
|
code = byteord(char)
|
||||||
return _binary_search_range(code, SCRIPT_RANGES, default="Unknown")
|
# 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which
|
||||||
|
# comes after (to the right of) any existing entries of x in a, and it
|
||||||
|
# partitions array a into two halves so that, for the left side
|
||||||
def _binary_search_range(code, ranges, default=None):
|
# all(val <= x for val in a[lo:i]), and for the right side
|
||||||
left = 0
|
# all(val > x for val in a[i:hi]).
|
||||||
right = len(ranges) - 1
|
# Our 'SCRIPT_RANGES' is a sorted list of ranges (only their starting
|
||||||
while right >= left:
|
# breakpoints); we want to use `bisect_right` to look up the range that
|
||||||
mid = (left + right) >> 1
|
# contains the given codepoint: i.e. whose start is less than or equal
|
||||||
if code < ranges[mid][0]:
|
# to the codepoint. Thus, we subtract -1 from the index returned.
|
||||||
right = mid - 1
|
i = bisect_right(SCRIPT_RANGES, code)
|
||||||
elif code > ranges[mid][1]:
|
return SCRIPT_NAMES[i-1]
|
||||||
left = mid + 1
|
|
||||||
else:
|
|
||||||
return ranges[mid][2]
|
|
||||||
return default
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user