diff --git a/Lib/fontTools/ttLib/tables/O_S_2f_2.py b/Lib/fontTools/ttLib/tables/O_S_2f_2.py index 078f5167f..9d833d960 100644 --- a/Lib/fontTools/ttLib/tables/O_S_2f_2.py +++ b/Lib/fontTools/ttLib/tables/O_S_2f_2.py @@ -2,6 +2,7 @@ from fontTools.misc.py23 import * from fontTools.misc import sstruct from fontTools.misc.textTools import safeEval, num2binary, binary2num from fontTools.ttLib.tables import DefaultTable +import bisect import logging @@ -475,22 +476,19 @@ OS2_UNICODE_RANGES = ( ) -_unicodeRangeSets = [] +_unicodeStarts = [] +_unicodeValues = [None] -def _getUnicodeRangeSets(): - # build the sets of codepoints for each unicode range bit, and cache result - if not _unicodeRangeSets: - for bit, blocks in enumerate(OS2_UNICODE_RANGES): - rangeset = set() - for _, (start, stop) in blocks: - rangeset.update(set(range(start, stop+1))) - if bit == 57: - # The spec says that bit 57 ("Non Plane 0") implies that there's - # at least one codepoint beyond the BMP; so I also include all - # the non-BMP codepoints here - rangeset.update(set(range(0x10000, 0x110000))) - _unicodeRangeSets.append(rangeset) - return _unicodeRangeSets +def _getUnicodeRanges(): + # build the ranges of codepoints for each unicode range bit, and cache result + if not _unicodeStarts: + unicodeRanges = [ + (start, (stop, bit)) for bit, blocks in enumerate(OS2_UNICODE_RANGES) + for _, (start, stop) in blocks] + for start, (stop, bit) in sorted(unicodeRanges): + _unicodeStarts.append(start) + _unicodeValues.append((stop, bit)) + return _unicodeStarts, _unicodeValues def intersectUnicodeRanges(unicodes, inverse=False): @@ -504,15 +502,22 @@ def intersectUnicodeRanges(unicodes, inverse=False): >>> intersectUnicodeRanges([0x0410, 0x1F000]) == {9, 57, 122} True >>> intersectUnicodeRanges([0x0410, 0x1F000], inverse=True) == ( - ... set(range(123)) - {9, 57, 122}) + ... set(range(len(OS2_UNICODE_RANGES))) - {9, 57, 122}) True """ unicodes = set(unicodes) - uniranges = _getUnicodeRangeSets() - bits = set([ - bit for bit, unirange in enumerate(uniranges) - if not unirange.isdisjoint(unicodes) ^ inverse]) - return bits + unicodestarts, unicodevalues = _getUnicodeRanges() + bits = set() + for code in unicodes: + stop, bit = unicodevalues[bisect.bisect(unicodestarts, code)] + if code <= stop: + bits.add(bit) + # The spec says that bit 57 ("Non Plane 0") implies that there's + # at least one codepoint beyond the BMP; so I also include all + # the non-BMP codepoints here + if any(0x10000 <= code < 0x110000 for code in unicodes): + bits.add(57) + return set(range(len(OS2_UNICODE_RANGES))) - bits if inverse else bits if __name__ == "__main__":