From c9259c47238d2c1874468111c404c2c79ccf4c86 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Thu, 18 Jan 2018 20:20:17 +0000 Subject: [PATCH] unicodedata: add ot_tags_from_script function Fixes https://github.com/fonttools/fonttools/issues/1112 This implements the same logic found in harbfuzz hb-ot-tag.cc to convert between Unicode (or ISO 15924) script codes to OpenType script tags as defined at: https://www.microsoft.com/typography/otspec/scripttags.htm https://github.com/harfbuzz/harfbuzz/blob/461a605fdec3361a038d3715adf615353c4f91fa/src/hb-ot-tag.cc#L127 --- Lib/fontTools/unicodedata/OTTags.py | 37 +++++++++++++++++++++++++++ Lib/fontTools/unicodedata/__init__.py | 23 ++++++++++++++++- Tests/unicodedata_test.py | 15 +++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 Lib/fontTools/unicodedata/OTTags.py diff --git a/Lib/fontTools/unicodedata/OTTags.py b/Lib/fontTools/unicodedata/OTTags.py new file mode 100644 index 000000000..a40aec2ef --- /dev/null +++ b/Lib/fontTools/unicodedata/OTTags.py @@ -0,0 +1,37 @@ +# Data updated to OpenType 1.8.2 as of January 2018. + +# Complete list of OpenType script tags at: +# https://www.microsoft.com/typography/otspec/scripttags.htm + +# Most of the script tags are the same as the ISO 15924 tag but lowercased, +# so we only have to handle the exceptional cases: +# - KATAKANA and HIRAGANA both map to 'kana'; +# - spaces at the end are preserved, unlike ISO 15924; +# - we map special script codes for Inherited, Common and Unknown to DFLT. + +DEFAULT_SCRIPT = "DFLT" + +SCRIPT_EXCEPTIONS = { + "Hira": "kana", + "Hrkt": "kana", + "Laoo": "lao ", + "Yiii": "yi ", + "Nkoo": "nko ", + "Vaii": "vai ", + "Zinh": DEFAULT_SCRIPT, + "Zyyy": DEFAULT_SCRIPT, + "Zzzz": DEFAULT_SCRIPT, +} + +NEW_SCRIPT_TAGS = { + "Beng": ("bng2",), + "Deva": ("dev2",), + "Gujr": ("gjr2",), + "Guru": ("gur2",), + "Knda": ("knd2",), + "Mlym": ("mlm2",), + "Orya": ("ory2",), + "Taml": ("tml2",), + "Telu": ("tel2",), + "Mymr": ("mym2",), +} diff --git a/Lib/fontTools/unicodedata/__init__.py b/Lib/fontTools/unicodedata/__init__.py index c2bc29e15..bec2b4f0b 100644 --- a/Lib/fontTools/unicodedata/__init__.py +++ b/Lib/fontTools/unicodedata/__init__.py @@ -13,7 +13,7 @@ except ImportError: # pragma: no cover # fall back to built-in unicodedata (possibly outdated) from unicodedata import * -from . import Blocks, Scripts, ScriptExtensions +from . import Blocks, Scripts, ScriptExtensions, OTTags __all__ = [tostr(s) for s in ( @@ -147,3 +147,24 @@ def block(char): code = byteord(char) i = bisect_right(Blocks.RANGES, code) return Blocks.VALUES[i-1] + + +def ot_tags_from_script(script_code): + """ Return a list of OpenType script tags associated with a given + Unicode script code. + Return ['DFLT'] script tag for invalid/unknown script codes. + """ + if script_code not in Scripts.NAMES: + return [OTTags.DEFAULT_SCRIPT] + + script_tags = [ + OTTags.SCRIPT_EXCEPTIONS.get( + script_code, + script_code[0].lower() + script_code[1:] + ) + ] + if script_code in OTTags.NEW_SCRIPT_TAGS: + script_tags.extend(OTTags.NEW_SCRIPT_TAGS[script_code]) + script_tags.reverse() # last in, first out + + return script_tags diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index dba02e7c9..a3ac66249 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -203,6 +203,21 @@ def test_block(): assert unicodedata.block("\u1c90") == "No_Block" +def test_ot_tags_from_script(): + # simple + assert unicodedata.ot_tags_from_script("Latn") == ["latn"] + # script mapped to multiple new and old script tags + assert unicodedata.ot_tags_from_script("Deva") == ["dev2", "deva"] + # exceptions + assert unicodedata.ot_tags_from_script("Hira") == ["kana"] + # special script codes map to DFLT + assert unicodedata.ot_tags_from_script("Zinh") == ["DFLT"] + assert unicodedata.ot_tags_from_script("Zyyy") == ["DFLT"] + assert unicodedata.ot_tags_from_script("Zzzz") == ["DFLT"] + # this in invalid or unknown + assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"] + + if __name__ == "__main__": import sys sys.exit(pytest.main(sys.argv))