Merge pull request #1150 from anthrotype/ot-tags-from-script
unicodedata: add ot_tags_from_script function
This commit is contained in:
commit
2ed59f20f1
37
Lib/fontTools/unicodedata/OTTags.py
Normal file
37
Lib/fontTools/unicodedata/OTTags.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
# Data updated to OpenType 1.8.2 as of January 2018.
|
||||||
|
|
||||||
|
# Complete list of OpenType script tags at:
|
||||||
|
# https://www.microsoft.com/typography/otspec/scripttags.htm
|
||||||
|
|
||||||
|
# Most of the script tags are the same as the ISO 15924 tag but lowercased,
|
||||||
|
# so we only have to handle the exceptional cases:
|
||||||
|
# - KATAKANA and HIRAGANA both map to 'kana';
|
||||||
|
# - spaces at the end are preserved, unlike ISO 15924;
|
||||||
|
# - we map special script codes for Inherited, Common and Unknown to DFLT.
|
||||||
|
|
||||||
|
DEFAULT_SCRIPT = "DFLT"
|
||||||
|
|
||||||
|
SCRIPT_EXCEPTIONS = {
|
||||||
|
"Hira": "kana",
|
||||||
|
"Hrkt": "kana",
|
||||||
|
"Laoo": "lao ",
|
||||||
|
"Yiii": "yi ",
|
||||||
|
"Nkoo": "nko ",
|
||||||
|
"Vaii": "vai ",
|
||||||
|
"Zinh": DEFAULT_SCRIPT,
|
||||||
|
"Zyyy": DEFAULT_SCRIPT,
|
||||||
|
"Zzzz": DEFAULT_SCRIPT,
|
||||||
|
}
|
||||||
|
|
||||||
|
NEW_SCRIPT_TAGS = {
|
||||||
|
"Beng": ("bng2",),
|
||||||
|
"Deva": ("dev2",),
|
||||||
|
"Gujr": ("gjr2",),
|
||||||
|
"Guru": ("gur2",),
|
||||||
|
"Knda": ("knd2",),
|
||||||
|
"Mlym": ("mlm2",),
|
||||||
|
"Orya": ("ory2",),
|
||||||
|
"Taml": ("tml2",),
|
||||||
|
"Telu": ("tel2",),
|
||||||
|
"Mymr": ("mym2",),
|
||||||
|
}
|
@ -13,7 +13,7 @@ except ImportError: # pragma: no cover
|
|||||||
# fall back to built-in unicodedata (possibly outdated)
|
# fall back to built-in unicodedata (possibly outdated)
|
||||||
from unicodedata import *
|
from unicodedata import *
|
||||||
|
|
||||||
from . import Blocks, Scripts, ScriptExtensions
|
from . import Blocks, Scripts, ScriptExtensions, OTTags
|
||||||
|
|
||||||
|
|
||||||
__all__ = [tostr(s) for s in (
|
__all__ = [tostr(s) for s in (
|
||||||
@ -38,6 +38,7 @@ __all__ = [tostr(s) for s in (
|
|||||||
"script_extension",
|
"script_extension",
|
||||||
"script_name",
|
"script_name",
|
||||||
"script_code",
|
"script_code",
|
||||||
|
"ot_tags_from_script",
|
||||||
)]
|
)]
|
||||||
|
|
||||||
|
|
||||||
@ -147,3 +148,24 @@ def block(char):
|
|||||||
code = byteord(char)
|
code = byteord(char)
|
||||||
i = bisect_right(Blocks.RANGES, code)
|
i = bisect_right(Blocks.RANGES, code)
|
||||||
return Blocks.VALUES[i-1]
|
return Blocks.VALUES[i-1]
|
||||||
|
|
||||||
|
|
||||||
|
def ot_tags_from_script(script_code):
|
||||||
|
""" Return a list of OpenType script tags associated with a given
|
||||||
|
Unicode script code.
|
||||||
|
Return ['DFLT'] script tag for invalid/unknown script codes.
|
||||||
|
"""
|
||||||
|
if script_code not in Scripts.NAMES:
|
||||||
|
return [OTTags.DEFAULT_SCRIPT]
|
||||||
|
|
||||||
|
script_tags = [
|
||||||
|
OTTags.SCRIPT_EXCEPTIONS.get(
|
||||||
|
script_code,
|
||||||
|
script_code[0].lower() + script_code[1:]
|
||||||
|
)
|
||||||
|
]
|
||||||
|
if script_code in OTTags.NEW_SCRIPT_TAGS:
|
||||||
|
script_tags.extend(OTTags.NEW_SCRIPT_TAGS[script_code])
|
||||||
|
script_tags.reverse() # last in, first out
|
||||||
|
|
||||||
|
return script_tags
|
||||||
|
@ -203,6 +203,21 @@ def test_block():
|
|||||||
assert unicodedata.block("\u1c90") == "No_Block"
|
assert unicodedata.block("\u1c90") == "No_Block"
|
||||||
|
|
||||||
|
|
||||||
|
def test_ot_tags_from_script():
|
||||||
|
# simple
|
||||||
|
assert unicodedata.ot_tags_from_script("Latn") == ["latn"]
|
||||||
|
# script mapped to multiple new and old script tags
|
||||||
|
assert unicodedata.ot_tags_from_script("Deva") == ["dev2", "deva"]
|
||||||
|
# exceptions
|
||||||
|
assert unicodedata.ot_tags_from_script("Hira") == ["kana"]
|
||||||
|
# special script codes map to DFLT
|
||||||
|
assert unicodedata.ot_tags_from_script("Zinh") == ["DFLT"]
|
||||||
|
assert unicodedata.ot_tags_from_script("Zyyy") == ["DFLT"]
|
||||||
|
assert unicodedata.ot_tags_from_script("Zzzz") == ["DFLT"]
|
||||||
|
# this is invalid or unknown
|
||||||
|
assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"]
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
sys.exit(pytest.main(sys.argv))
|
sys.exit(pytest.main(sys.argv))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user