From 677954d5b94b4f06af60dda51cd06bdc59760094 Mon Sep 17 00:00:00 2001 From: Cosimo Lupo Date: Tue, 23 Jan 2018 11:45:20 -0800 Subject: [PATCH] unicodedata: add ot_tag_to_script function returns the Unicode script code for a given OpenType script tag, or None if no match is found --- Lib/fontTools/unicodedata/OTTags.py | 4 +++ Lib/fontTools/unicodedata/__init__.py | 35 +++++++++++++++++++++++++++ Tests/unicodedata_test.py | 20 +++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/Lib/fontTools/unicodedata/OTTags.py b/Lib/fontTools/unicodedata/OTTags.py index a40aec2ef..39226805d 100644 --- a/Lib/fontTools/unicodedata/OTTags.py +++ b/Lib/fontTools/unicodedata/OTTags.py @@ -35,3 +35,7 @@ NEW_SCRIPT_TAGS = { "Telu": ("tel2",), "Mymr": ("mym2",), } + +NEW_SCRIPT_TAGS_REVERSED = { + value: key for key, values in NEW_SCRIPT_TAGS.items() for value in values +} diff --git a/Lib/fontTools/unicodedata/__init__.py b/Lib/fontTools/unicodedata/__init__.py index 689f04e73..0a69dbd44 100644 --- a/Lib/fontTools/unicodedata/__init__.py +++ b/Lib/fontTools/unicodedata/__init__.py @@ -40,6 +40,7 @@ __all__ = [tostr(s) for s in ( "script_code", "script_horizontal_direction", "ot_tags_from_script", + "ot_tag_to_script", )] @@ -239,3 +240,37 @@ def ot_tags_from_script(script_code): script_tags.reverse() # last in, first out return script_tags + + +def ot_tag_to_script(tag): + """ Return the Unicode script code for the given OpenType script tag, or + None for "DFLT" tag or if there is no Unicode script associated with it. + Raises ValueError if the tag is invalid. + """ + tag = tostr(tag).strip() + if not tag or " " in tag or len(tag) > 4: + raise ValueError("invalid OpenType tag: %r" % tag) + + while len(tag) != 4: + tag += str(" ") # pad with spaces + + if tag == OTTags.DEFAULT_SCRIPT: + # it's unclear which Unicode script the "DFLT" OpenType tag maps to, + # so here we return None + return None + + if tag in OTTags.NEW_SCRIPT_TAGS_REVERSED: + return OTTags.NEW_SCRIPT_TAGS_REVERSED[tag] + + # This side of the conversion is fully algorithmic + + # Any spaces at the end of the tag are replaced by repeating the last + # letter. Eg 'nko ' -> 'Nkoo'. + # Change first char to uppercase + script_code = tag[0].upper() + tag[1] + for i in range(2, 4): + script_code += (script_code[i-1] if tag[i] == " " else tag[i]) + + if script_code not in Scripts.NAMES: + return None + return script_code diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index 0d0f2bf36..96c0f01dd 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -218,6 +218,26 @@ def test_ot_tags_from_script(): assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"] +def test_ot_tag_to_script(): + assert unicodedata.ot_tag_to_script("latn") == "Latn" + assert unicodedata.ot_tag_to_script("kana") == "Kana" + assert unicodedata.ot_tag_to_script("DFLT") == None + assert unicodedata.ot_tag_to_script("aaaa") == None + assert unicodedata.ot_tag_to_script("beng") == "Beng" + assert unicodedata.ot_tag_to_script("bng2") == "Beng" + assert unicodedata.ot_tag_to_script("dev2") == "Deva" + assert unicodedata.ot_tag_to_script("gjr2") == "Gujr" + assert unicodedata.ot_tag_to_script("yi ") == "Yiii" + assert unicodedata.ot_tag_to_script("nko ") == "Nkoo" + assert unicodedata.ot_tag_to_script("vai ") == "Vaii" + assert unicodedata.ot_tag_to_script("lao ") == "Laoo" + assert unicodedata.ot_tag_to_script("yi") == "Yiii" + + for invalid_value in ("", " ", "z zz", "zzzzz"): + with pytest.raises(ValueError, match="invalid OpenType tag"): + unicodedata.ot_tag_to_script(invalid_value) + + def test_script_horizontal_direction(): assert unicodedata.script_horizontal_direction("Latn") == "LTR" assert unicodedata.script_horizontal_direction("Arab") == "RTL"