Merge pull request #1150 from anthrotype/ot-tags-from-script
unicodedata: add ot_tags_from_script function
This commit is contained in:
commit
2ed59f20f1
37
Lib/fontTools/unicodedata/OTTags.py
Normal file
37
Lib/fontTools/unicodedata/OTTags.py
Normal file
@ -0,0 +1,37 @@
|
||||
# Data updated to OpenType 1.8.2 as of January 2018.
|
||||
|
||||
# Complete list of OpenType script tags at:
|
||||
# https://www.microsoft.com/typography/otspec/scripttags.htm
|
||||
|
||||
# Most of the script tags are the same as the ISO 15924 tag but lowercased,
|
||||
# so we only have to handle the exceptional cases:
|
||||
# - KATAKANA and HIRAGANA both map to 'kana';
|
||||
# - spaces at the end are preserved, unlike ISO 15924;
|
||||
# - we map special script codes for Inherited, Common and Unknown to DFLT.
|
||||
|
||||
DEFAULT_SCRIPT = "DFLT"
|
||||
|
||||
SCRIPT_EXCEPTIONS = {
|
||||
"Hira": "kana",
|
||||
"Hrkt": "kana",
|
||||
"Laoo": "lao ",
|
||||
"Yiii": "yi ",
|
||||
"Nkoo": "nko ",
|
||||
"Vaii": "vai ",
|
||||
"Zinh": DEFAULT_SCRIPT,
|
||||
"Zyyy": DEFAULT_SCRIPT,
|
||||
"Zzzz": DEFAULT_SCRIPT,
|
||||
}
|
||||
|
||||
NEW_SCRIPT_TAGS = {
|
||||
"Beng": ("bng2",),
|
||||
"Deva": ("dev2",),
|
||||
"Gujr": ("gjr2",),
|
||||
"Guru": ("gur2",),
|
||||
"Knda": ("knd2",),
|
||||
"Mlym": ("mlm2",),
|
||||
"Orya": ("ory2",),
|
||||
"Taml": ("tml2",),
|
||||
"Telu": ("tel2",),
|
||||
"Mymr": ("mym2",),
|
||||
}
|
@ -13,7 +13,7 @@ except ImportError: # pragma: no cover
|
||||
# fall back to built-in unicodedata (possibly outdated)
|
||||
from unicodedata import *
|
||||
|
||||
from . import Blocks, Scripts, ScriptExtensions
|
||||
from . import Blocks, Scripts, ScriptExtensions, OTTags
|
||||
|
||||
|
||||
__all__ = [tostr(s) for s in (
|
||||
@ -38,6 +38,7 @@ __all__ = [tostr(s) for s in (
|
||||
"script_extension",
|
||||
"script_name",
|
||||
"script_code",
|
||||
"ot_tags_from_script",
|
||||
)]
|
||||
|
||||
|
||||
@ -147,3 +148,24 @@ def block(char):
|
||||
code = byteord(char)
|
||||
i = bisect_right(Blocks.RANGES, code)
|
||||
return Blocks.VALUES[i-1]
|
||||
|
||||
|
||||
def ot_tags_from_script(script_code):
|
||||
""" Return a list of OpenType script tags associated with a given
|
||||
Unicode script code.
|
||||
Return ['DFLT'] script tag for invalid/unknown script codes.
|
||||
"""
|
||||
if script_code not in Scripts.NAMES:
|
||||
return [OTTags.DEFAULT_SCRIPT]
|
||||
|
||||
script_tags = [
|
||||
OTTags.SCRIPT_EXCEPTIONS.get(
|
||||
script_code,
|
||||
script_code[0].lower() + script_code[1:]
|
||||
)
|
||||
]
|
||||
if script_code in OTTags.NEW_SCRIPT_TAGS:
|
||||
script_tags.extend(OTTags.NEW_SCRIPT_TAGS[script_code])
|
||||
script_tags.reverse() # last in, first out
|
||||
|
||||
return script_tags
|
||||
|
@ -203,6 +203,21 @@ def test_block():
|
||||
assert unicodedata.block("\u1c90") == "No_Block"
|
||||
|
||||
|
||||
def test_ot_tags_from_script():
|
||||
# simple
|
||||
assert unicodedata.ot_tags_from_script("Latn") == ["latn"]
|
||||
# script mapped to multiple new and old script tags
|
||||
assert unicodedata.ot_tags_from_script("Deva") == ["dev2", "deva"]
|
||||
# exceptions
|
||||
assert unicodedata.ot_tags_from_script("Hira") == ["kana"]
|
||||
# special script codes map to DFLT
|
||||
assert unicodedata.ot_tags_from_script("Zinh") == ["DFLT"]
|
||||
assert unicodedata.ot_tags_from_script("Zyyy") == ["DFLT"]
|
||||
assert unicodedata.ot_tags_from_script("Zzzz") == ["DFLT"]
|
||||
# this is invalid or unknown
|
||||
assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.exit(pytest.main(sys.argv))
|
||||
|
Loading…
x
Reference in New Issue
Block a user