unicodedata: add ot_tag_to_script function
returns the Unicode script code for a given OpenType script tag, or None if no match is found
This commit is contained in:
parent
29deb7e6fb
commit
677954d5b9
@ -35,3 +35,7 @@ NEW_SCRIPT_TAGS = {
|
|||||||
"Telu": ("tel2",),
|
"Telu": ("tel2",),
|
||||||
"Mymr": ("mym2",),
|
"Mymr": ("mym2",),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
NEW_SCRIPT_TAGS_REVERSED = {
|
||||||
|
value: key for key, values in NEW_SCRIPT_TAGS.items() for value in values
|
||||||
|
}
|
||||||
|
@ -40,6 +40,7 @@ __all__ = [tostr(s) for s in (
|
|||||||
"script_code",
|
"script_code",
|
||||||
"script_horizontal_direction",
|
"script_horizontal_direction",
|
||||||
"ot_tags_from_script",
|
"ot_tags_from_script",
|
||||||
|
"ot_tag_to_script",
|
||||||
)]
|
)]
|
||||||
|
|
||||||
|
|
||||||
@ -239,3 +240,37 @@ def ot_tags_from_script(script_code):
|
|||||||
script_tags.reverse() # last in, first out
|
script_tags.reverse() # last in, first out
|
||||||
|
|
||||||
return script_tags
|
return script_tags
|
||||||
|
|
||||||
|
|
||||||
|
def ot_tag_to_script(tag):
|
||||||
|
""" Return the Unicode script code for the given OpenType script tag, or
|
||||||
|
None for "DFLT" tag or if there is no Unicode script associated with it.
|
||||||
|
Raises ValueError if the tag is invalid.
|
||||||
|
"""
|
||||||
|
tag = tostr(tag).strip()
|
||||||
|
if not tag or " " in tag or len(tag) > 4:
|
||||||
|
raise ValueError("invalid OpenType tag: %r" % tag)
|
||||||
|
|
||||||
|
while len(tag) != 4:
|
||||||
|
tag += str(" ") # pad with spaces
|
||||||
|
|
||||||
|
if tag == OTTags.DEFAULT_SCRIPT:
|
||||||
|
# it's unclear which Unicode script the "DFLT" OpenType tag maps to,
|
||||||
|
# so here we return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
if tag in OTTags.NEW_SCRIPT_TAGS_REVERSED:
|
||||||
|
return OTTags.NEW_SCRIPT_TAGS_REVERSED[tag]
|
||||||
|
|
||||||
|
# This side of the conversion is fully algorithmic
|
||||||
|
|
||||||
|
# Any spaces at the end of the tag are replaced by repeating the last
|
||||||
|
# letter. Eg 'nko ' -> 'Nkoo'.
|
||||||
|
# Change first char to uppercase
|
||||||
|
script_code = tag[0].upper() + tag[1]
|
||||||
|
for i in range(2, 4):
|
||||||
|
script_code += (script_code[i-1] if tag[i] == " " else tag[i])
|
||||||
|
|
||||||
|
if script_code not in Scripts.NAMES:
|
||||||
|
return None
|
||||||
|
return script_code
|
||||||
|
@ -218,6 +218,26 @@ def test_ot_tags_from_script():
|
|||||||
assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"]
|
assert unicodedata.ot_tags_from_script("Aaaa") == ["DFLT"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_ot_tag_to_script():
|
||||||
|
assert unicodedata.ot_tag_to_script("latn") == "Latn"
|
||||||
|
assert unicodedata.ot_tag_to_script("kana") == "Kana"
|
||||||
|
assert unicodedata.ot_tag_to_script("DFLT") == None
|
||||||
|
assert unicodedata.ot_tag_to_script("aaaa") == None
|
||||||
|
assert unicodedata.ot_tag_to_script("beng") == "Beng"
|
||||||
|
assert unicodedata.ot_tag_to_script("bng2") == "Beng"
|
||||||
|
assert unicodedata.ot_tag_to_script("dev2") == "Deva"
|
||||||
|
assert unicodedata.ot_tag_to_script("gjr2") == "Gujr"
|
||||||
|
assert unicodedata.ot_tag_to_script("yi ") == "Yiii"
|
||||||
|
assert unicodedata.ot_tag_to_script("nko ") == "Nkoo"
|
||||||
|
assert unicodedata.ot_tag_to_script("vai ") == "Vaii"
|
||||||
|
assert unicodedata.ot_tag_to_script("lao ") == "Laoo"
|
||||||
|
assert unicodedata.ot_tag_to_script("yi") == "Yiii"
|
||||||
|
|
||||||
|
for invalid_value in ("", " ", "z zz", "zzzzz"):
|
||||||
|
with pytest.raises(ValueError, match="invalid OpenType tag"):
|
||||||
|
unicodedata.ot_tag_to_script(invalid_value)
|
||||||
|
|
||||||
|
|
||||||
def test_script_horizontal_direction():
|
def test_script_horizontal_direction():
|
||||||
assert unicodedata.script_horizontal_direction("Latn") == "LTR"
|
assert unicodedata.script_horizontal_direction("Latn") == "LTR"
|
||||||
assert unicodedata.script_horizontal_direction("Arab") == "RTL"
|
assert unicodedata.script_horizontal_direction("Arab") == "RTL"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user