diff --git a/Lib/fontTools/unicodedata/__init__.py b/Lib/fontTools/unicodedata/__init__.py index 4c6e346aa..7428b5416 100644 --- a/Lib/fontTools/unicodedata/__init__.py +++ b/Lib/fontTools/unicodedata/__init__.py @@ -39,15 +39,15 @@ __all__ = [ def script(char): - """ Return the script property assigned to the Unicode character 'char' - as string. + """ Return the four-letter script code assigned to the Unicode character + 'char' as string. >>> script("a") - 'Latin' + 'Latn' >>> script(",") - 'Common' + 'Zyyy' >>> script(unichr(0x10FFFF)) - 'Unknown' + 'Zzzz' """ code = byteord(char) # 'bisect_right(a, x, lo=0, hi=len(a))' returns an insertion point which @@ -67,11 +67,11 @@ def script_extension(char): """ Return the script extension property assigned to the Unicode character 'char' as a set of string. - >>> script_extension("a") == {'Latin'} + >>> script_extension("a") == {'Latn'} True >>> script_extension(unichr(0x060C)) == {'Arab', 'Syrc', 'Thaa'} True - >>> script_extension(unichr(0x10FFFF)) == {'Unknown'} + >>> script_extension(unichr(0x10FFFF)) == {'Zzzz'} True """ code = byteord(char) diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index 8c42f3196..ba9a3c448 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -6,160 +6,160 @@ from fontTools import unicodedata def test_script(): - assert unicodedata.script("a") == "Latin" - assert unicodedata.script(unichr(0)) == "Common" - assert unicodedata.script(unichr(0x0378)) == "Unknown" - assert unicodedata.script(unichr(0x10FFFF)) == "Unknown" + assert unicodedata.script("a") == "Latn" + assert unicodedata.script(unichr(0)) == "Zyyy" + assert unicodedata.script(unichr(0x0378)) == "Zzzz" + assert unicodedata.script(unichr(0x10FFFF)) == "Zzzz" # these were randomly sampled, one character per script - assert unicodedata.script(unichr(0x1E918)) == 'Adlam' + assert unicodedata.script(unichr(0x1E918)) == 'Adlm' assert unicodedata.script(unichr(0x1170D)) == 'Ahom' - assert unicodedata.script(unichr(0x145A0)) == 'Anatolian_Hieroglyphs' - assert unicodedata.script(unichr(0x0607)) == 'Arabic' - assert unicodedata.script(unichr(0x056C)) == 'Armenian' - assert unicodedata.script(unichr(0x10B27)) == 'Avestan' - assert unicodedata.script(unichr(0x1B41)) == 'Balinese' - assert unicodedata.script(unichr(0x168AD)) == 'Bamum' - assert unicodedata.script(unichr(0x16ADD)) == 'Bassa_Vah' - assert unicodedata.script(unichr(0x1BE5)) == 'Batak' - assert unicodedata.script(unichr(0x09F3)) == 'Bengali' - assert unicodedata.script(unichr(0x11C5B)) == 'Bhaiksuki' - assert unicodedata.script(unichr(0x3126)) == 'Bopomofo' - assert unicodedata.script(unichr(0x1103B)) == 'Brahmi' - assert unicodedata.script(unichr(0x2849)) == 'Braille' - assert unicodedata.script(unichr(0x1A0A)) == 'Buginese' - assert unicodedata.script(unichr(0x174E)) == 'Buhid' - assert unicodedata.script(unichr(0x18EE)) == 'Canadian_Aboriginal' - assert unicodedata.script(unichr(0x102B7)) == 'Carian' - assert unicodedata.script(unichr(0x1053D)) == 'Caucasian_Albanian' - assert unicodedata.script(unichr(0x11123)) == 'Chakma' + assert unicodedata.script(unichr(0x145A0)) == 'Hluw' + assert unicodedata.script(unichr(0x0607)) == 'Arab' + assert unicodedata.script(unichr(0x056C)) == 'Armn' + assert unicodedata.script(unichr(0x10B27)) == 'Avst' + assert unicodedata.script(unichr(0x1B41)) == 'Bali' + assert unicodedata.script(unichr(0x168AD)) == 'Bamu' + assert unicodedata.script(unichr(0x16ADD)) == 'Bass' + assert unicodedata.script(unichr(0x1BE5)) == 'Batk' + assert unicodedata.script(unichr(0x09F3)) == 'Beng' + assert unicodedata.script(unichr(0x11C5B)) == 'Bhks' + assert unicodedata.script(unichr(0x3126)) == 'Bopo' + assert unicodedata.script(unichr(0x1103B)) == 'Brah' + assert unicodedata.script(unichr(0x2849)) == 'Brai' + assert unicodedata.script(unichr(0x1A0A)) == 'Bugi' + assert unicodedata.script(unichr(0x174E)) == 'Buhd' + assert unicodedata.script(unichr(0x18EE)) == 'Cans' + assert unicodedata.script(unichr(0x102B7)) == 'Cari' + assert unicodedata.script(unichr(0x1053D)) == 'Aghb' + assert unicodedata.script(unichr(0x11123)) == 'Cakm' assert unicodedata.script(unichr(0xAA1F)) == 'Cham' - assert unicodedata.script(unichr(0xAB95)) == 'Cherokee' - assert unicodedata.script(unichr(0x1F0C7)) == 'Common' - assert unicodedata.script(unichr(0x2C85)) == 'Coptic' - assert unicodedata.script(unichr(0x12014)) == 'Cuneiform' - assert unicodedata.script(unichr(0x1082E)) == 'Cypriot' - assert unicodedata.script(unichr(0xA686)) == 'Cyrillic' - assert unicodedata.script(unichr(0x10417)) == 'Deseret' - assert unicodedata.script(unichr(0x093E)) == 'Devanagari' - assert unicodedata.script(unichr(0x1BC4B)) == 'Duployan' - assert unicodedata.script(unichr(0x1310C)) == 'Egyptian_Hieroglyphs' - assert unicodedata.script(unichr(0x1051C)) == 'Elbasan' - assert unicodedata.script(unichr(0x2DA6)) == 'Ethiopic' - assert unicodedata.script(unichr(0x10AD)) == 'Georgian' - assert unicodedata.script(unichr(0x2C52)) == 'Glagolitic' - assert unicodedata.script(unichr(0x10343)) == 'Gothic' - assert unicodedata.script(unichr(0x11371)) == 'Grantha' - assert unicodedata.script(unichr(0x03D0)) == 'Greek' - assert unicodedata.script(unichr(0x0AAA)) == 'Gujarati' - assert unicodedata.script(unichr(0x0A4C)) == 'Gurmukhi' - assert unicodedata.script(unichr(0x23C9F)) == 'Han' - assert unicodedata.script(unichr(0xC259)) == 'Hangul' - assert unicodedata.script(unichr(0x1722)) == 'Hanunoo' - assert unicodedata.script(unichr(0x108F5)) == 'Hatran' - assert unicodedata.script(unichr(0x05C2)) == 'Hebrew' - assert unicodedata.script(unichr(0x1B072)) == 'Hiragana' - assert unicodedata.script(unichr(0x10847)) == 'Imperial_Aramaic' - assert unicodedata.script(unichr(0x033A)) == 'Inherited' - assert unicodedata.script(unichr(0x10B66)) == 'Inscriptional_Pahlavi' - assert unicodedata.script(unichr(0x10B4B)) == 'Inscriptional_Parthian' - assert unicodedata.script(unichr(0xA98A)) == 'Javanese' - assert unicodedata.script(unichr(0x110B2)) == 'Kaithi' - assert unicodedata.script(unichr(0x0CC6)) == 'Kannada' - assert unicodedata.script(unichr(0x3337)) == 'Katakana' - assert unicodedata.script(unichr(0xA915)) == 'Kayah_Li' - assert unicodedata.script(unichr(0x10A2E)) == 'Kharoshthi' - assert unicodedata.script(unichr(0x17AA)) == 'Khmer' - assert unicodedata.script(unichr(0x11225)) == 'Khojki' - assert unicodedata.script(unichr(0x112B6)) == 'Khudawadi' - assert unicodedata.script(unichr(0x0ED7)) == 'Lao' - assert unicodedata.script(unichr(0xAB3C)) == 'Latin' - assert unicodedata.script(unichr(0x1C48)) == 'Lepcha' - assert unicodedata.script(unichr(0x1923)) == 'Limbu' - assert unicodedata.script(unichr(0x1071D)) == 'Linear_A' - assert unicodedata.script(unichr(0x100EC)) == 'Linear_B' + assert unicodedata.script(unichr(0xAB95)) == 'Cher' + assert unicodedata.script(unichr(0x1F0C7)) == 'Zyyy' + assert unicodedata.script(unichr(0x2C85)) == 'Copt' + assert unicodedata.script(unichr(0x12014)) == 'Xsux' + assert unicodedata.script(unichr(0x1082E)) == 'Cprt' + assert unicodedata.script(unichr(0xA686)) == 'Cyrl' + assert unicodedata.script(unichr(0x10417)) == 'Dsrt' + assert unicodedata.script(unichr(0x093E)) == 'Deva' + assert unicodedata.script(unichr(0x1BC4B)) == 'Dupl' + assert unicodedata.script(unichr(0x1310C)) == 'Egyp' + assert unicodedata.script(unichr(0x1051C)) == 'Elba' + assert unicodedata.script(unichr(0x2DA6)) == 'Ethi' + assert unicodedata.script(unichr(0x10AD)) == 'Geor' + assert unicodedata.script(unichr(0x2C52)) == 'Glag' + assert unicodedata.script(unichr(0x10343)) == 'Goth' + assert unicodedata.script(unichr(0x11371)) == 'Gran' + assert unicodedata.script(unichr(0x03D0)) == 'Grek' + assert unicodedata.script(unichr(0x0AAA)) == 'Gujr' + assert unicodedata.script(unichr(0x0A4C)) == 'Guru' + assert unicodedata.script(unichr(0x23C9F)) == 'Hani' + assert unicodedata.script(unichr(0xC259)) == 'Hang' + assert unicodedata.script(unichr(0x1722)) == 'Hano' + assert unicodedata.script(unichr(0x108F5)) == 'Hatr' + assert unicodedata.script(unichr(0x05C2)) == 'Hebr' + assert unicodedata.script(unichr(0x1B072)) == 'Hira' + assert unicodedata.script(unichr(0x10847)) == 'Armi' + assert unicodedata.script(unichr(0x033A)) == 'Zinh' + assert unicodedata.script(unichr(0x10B66)) == 'Phli' + assert unicodedata.script(unichr(0x10B4B)) == 'Prti' + assert unicodedata.script(unichr(0xA98A)) == 'Java' + assert unicodedata.script(unichr(0x110B2)) == 'Kthi' + assert unicodedata.script(unichr(0x0CC6)) == 'Knda' + assert unicodedata.script(unichr(0x3337)) == 'Kana' + assert unicodedata.script(unichr(0xA915)) == 'Kali' + assert unicodedata.script(unichr(0x10A2E)) == 'Khar' + assert unicodedata.script(unichr(0x17AA)) == 'Khmr' + assert unicodedata.script(unichr(0x11225)) == 'Khoj' + assert unicodedata.script(unichr(0x112B6)) == 'Sind' + assert unicodedata.script(unichr(0x0ED7)) == 'Laoo' + assert unicodedata.script(unichr(0xAB3C)) == 'Latn' + assert unicodedata.script(unichr(0x1C48)) == 'Lepc' + assert unicodedata.script(unichr(0x1923)) == 'Limb' + assert unicodedata.script(unichr(0x1071D)) == 'Lina' + assert unicodedata.script(unichr(0x100EC)) == 'Linb' assert unicodedata.script(unichr(0xA4E9)) == 'Lisu' - assert unicodedata.script(unichr(0x10284)) == 'Lycian' - assert unicodedata.script(unichr(0x10926)) == 'Lydian' - assert unicodedata.script(unichr(0x11161)) == 'Mahajani' - assert unicodedata.script(unichr(0x0D56)) == 'Malayalam' - assert unicodedata.script(unichr(0x0856)) == 'Mandaic' - assert unicodedata.script(unichr(0x10AF0)) == 'Manichaean' - assert unicodedata.script(unichr(0x11CB0)) == 'Marchen' - assert unicodedata.script(unichr(0x11D28)) == 'Masaram_Gondi' - assert unicodedata.script(unichr(0xABDD)) == 'Meetei_Mayek' - assert unicodedata.script(unichr(0x1E897)) == 'Mende_Kikakui' - assert unicodedata.script(unichr(0x109B0)) == 'Meroitic_Cursive' - assert unicodedata.script(unichr(0x10993)) == 'Meroitic_Hieroglyphs' - assert unicodedata.script(unichr(0x16F5D)) == 'Miao' + assert unicodedata.script(unichr(0x10284)) == 'Lyci' + assert unicodedata.script(unichr(0x10926)) == 'Lydi' + assert unicodedata.script(unichr(0x11161)) == 'Mahj' + assert unicodedata.script(unichr(0x0D56)) == 'Mlym' + assert unicodedata.script(unichr(0x0856)) == 'Mand' + assert unicodedata.script(unichr(0x10AF0)) == 'Mani' + assert unicodedata.script(unichr(0x11CB0)) == 'Marc' + assert unicodedata.script(unichr(0x11D28)) == 'Gonm' + assert unicodedata.script(unichr(0xABDD)) == 'Mtei' + assert unicodedata.script(unichr(0x1E897)) == 'Mend' + assert unicodedata.script(unichr(0x109B0)) == 'Merc' + assert unicodedata.script(unichr(0x10993)) == 'Mero' + assert unicodedata.script(unichr(0x16F5D)) == 'Plrd' assert unicodedata.script(unichr(0x1160B)) == 'Modi' - assert unicodedata.script(unichr(0x18A8)) == 'Mongolian' - assert unicodedata.script(unichr(0x16A48)) == 'Mro' - assert unicodedata.script(unichr(0x1128C)) == 'Multani' - assert unicodedata.script(unichr(0x105B)) == 'Myanmar' - assert unicodedata.script(unichr(0x108AF)) == 'Nabataean' - assert unicodedata.script(unichr(0x19B3)) == 'New_Tai_Lue' + assert unicodedata.script(unichr(0x18A8)) == 'Mong' + assert unicodedata.script(unichr(0x16A48)) == 'Mroo' + assert unicodedata.script(unichr(0x1128C)) == 'Mult' + assert unicodedata.script(unichr(0x105B)) == 'Mymr' + assert unicodedata.script(unichr(0x108AF)) == 'Nbat' + assert unicodedata.script(unichr(0x19B3)) == 'Talu' assert unicodedata.script(unichr(0x1143D)) == 'Newa' - assert unicodedata.script(unichr(0x07F4)) == 'Nko' - assert unicodedata.script(unichr(0x1B192)) == 'Nushu' - assert unicodedata.script(unichr(0x169C)) == 'Ogham' - assert unicodedata.script(unichr(0x1C56)) == 'Ol_Chiki' - assert unicodedata.script(unichr(0x10CE9)) == 'Old_Hungarian' - assert unicodedata.script(unichr(0x10316)) == 'Old_Italic' - assert unicodedata.script(unichr(0x10A93)) == 'Old_North_Arabian' - assert unicodedata.script(unichr(0x1035A)) == 'Old_Permic' - assert unicodedata.script(unichr(0x103D5)) == 'Old_Persian' - assert unicodedata.script(unichr(0x10A65)) == 'Old_South_Arabian' - assert unicodedata.script(unichr(0x10C09)) == 'Old_Turkic' - assert unicodedata.script(unichr(0x0B60)) == 'Oriya' - assert unicodedata.script(unichr(0x104CF)) == 'Osage' - assert unicodedata.script(unichr(0x104A8)) == 'Osmanya' - assert unicodedata.script(unichr(0x16B12)) == 'Pahawh_Hmong' - assert unicodedata.script(unichr(0x10879)) == 'Palmyrene' - assert unicodedata.script(unichr(0x11AF1)) == 'Pau_Cin_Hau' - assert unicodedata.script(unichr(0xA869)) == 'Phags_Pa' - assert unicodedata.script(unichr(0x10909)) == 'Phoenician' - assert unicodedata.script(unichr(0x10B81)) == 'Psalter_Pahlavi' - assert unicodedata.script(unichr(0xA941)) == 'Rejang' - assert unicodedata.script(unichr(0x16C3)) == 'Runic' - assert unicodedata.script(unichr(0x0814)) == 'Samaritan' - assert unicodedata.script(unichr(0xA88C)) == 'Saurashtra' - assert unicodedata.script(unichr(0x111C8)) == 'Sharada' - assert unicodedata.script(unichr(0x1045F)) == 'Shavian' - assert unicodedata.script(unichr(0x115AD)) == 'Siddham' - assert unicodedata.script(unichr(0x1D8C0)) == 'SignWriting' - assert unicodedata.script(unichr(0x0DB9)) == 'Sinhala' - assert unicodedata.script(unichr(0x110F9)) == 'Sora_Sompeng' - assert unicodedata.script(unichr(0x11A60)) == 'Soyombo' - assert unicodedata.script(unichr(0x1B94)) == 'Sundanese' - assert unicodedata.script(unichr(0xA81F)) == 'Syloti_Nagri' - assert unicodedata.script(unichr(0x0740)) == 'Syriac' - assert unicodedata.script(unichr(0x1714)) == 'Tagalog' - assert unicodedata.script(unichr(0x1761)) == 'Tagbanwa' - assert unicodedata.script(unichr(0x1965)) == 'Tai_Le' - assert unicodedata.script(unichr(0x1A32)) == 'Tai_Tham' - assert unicodedata.script(unichr(0xAA86)) == 'Tai_Viet' - assert unicodedata.script(unichr(0x116A5)) == 'Takri' - assert unicodedata.script(unichr(0x0B8E)) == 'Tamil' - assert unicodedata.script(unichr(0x1754D)) == 'Tangut' - assert unicodedata.script(unichr(0x0C40)) == 'Telugu' - assert unicodedata.script(unichr(0x07A4)) == 'Thaana' + assert unicodedata.script(unichr(0x07F4)) == 'Nkoo' + assert unicodedata.script(unichr(0x1B192)) == 'Nshu' + assert unicodedata.script(unichr(0x169C)) == 'Ogam' + assert unicodedata.script(unichr(0x1C56)) == 'Olck' + assert unicodedata.script(unichr(0x10CE9)) == 'Hung' + assert unicodedata.script(unichr(0x10316)) == 'Ital' + assert unicodedata.script(unichr(0x10A93)) == 'Narb' + assert unicodedata.script(unichr(0x1035A)) == 'Perm' + assert unicodedata.script(unichr(0x103D5)) == 'Xpeo' + assert unicodedata.script(unichr(0x10A65)) == 'Sarb' + assert unicodedata.script(unichr(0x10C09)) == 'Orkh' + assert unicodedata.script(unichr(0x0B60)) == 'Orya' + assert unicodedata.script(unichr(0x104CF)) == 'Osge' + assert unicodedata.script(unichr(0x104A8)) == 'Osma' + assert unicodedata.script(unichr(0x16B12)) == 'Hmng' + assert unicodedata.script(unichr(0x10879)) == 'Palm' + assert unicodedata.script(unichr(0x11AF1)) == 'Pauc' + assert unicodedata.script(unichr(0xA869)) == 'Phag' + assert unicodedata.script(unichr(0x10909)) == 'Phnx' + assert unicodedata.script(unichr(0x10B81)) == 'Phlp' + assert unicodedata.script(unichr(0xA941)) == 'Rjng' + assert unicodedata.script(unichr(0x16C3)) == 'Runr' + assert unicodedata.script(unichr(0x0814)) == 'Samr' + assert unicodedata.script(unichr(0xA88C)) == 'Saur' + assert unicodedata.script(unichr(0x111C8)) == 'Shrd' + assert unicodedata.script(unichr(0x1045F)) == 'Shaw' + assert unicodedata.script(unichr(0x115AD)) == 'Sidd' + assert unicodedata.script(unichr(0x1D8C0)) == 'Sgnw' + assert unicodedata.script(unichr(0x0DB9)) == 'Sinh' + assert unicodedata.script(unichr(0x110F9)) == 'Sora' + assert unicodedata.script(unichr(0x11A60)) == 'Soyo' + assert unicodedata.script(unichr(0x1B94)) == 'Sund' + assert unicodedata.script(unichr(0xA81F)) == 'Sylo' + assert unicodedata.script(unichr(0x0740)) == 'Syrc' + assert unicodedata.script(unichr(0x1714)) == 'Tglg' + assert unicodedata.script(unichr(0x1761)) == 'Tagb' + assert unicodedata.script(unichr(0x1965)) == 'Tale' + assert unicodedata.script(unichr(0x1A32)) == 'Lana' + assert unicodedata.script(unichr(0xAA86)) == 'Tavt' + assert unicodedata.script(unichr(0x116A5)) == 'Takr' + assert unicodedata.script(unichr(0x0B8E)) == 'Taml' + assert unicodedata.script(unichr(0x1754D)) == 'Tang' + assert unicodedata.script(unichr(0x0C40)) == 'Telu' + assert unicodedata.script(unichr(0x07A4)) == 'Thaa' assert unicodedata.script(unichr(0x0E42)) == 'Thai' - assert unicodedata.script(unichr(0x0F09)) == 'Tibetan' - assert unicodedata.script(unichr(0x2D3A)) == 'Tifinagh' - assert unicodedata.script(unichr(0x114B0)) == 'Tirhuta' - assert unicodedata.script(unichr(0x1038B)) == 'Ugaritic' - assert unicodedata.script(unichr(0xA585)) == 'Vai' - assert unicodedata.script(unichr(0x118CF)) == 'Warang_Citi' - assert unicodedata.script(unichr(0xA066)) == 'Yi' - assert unicodedata.script(unichr(0x11A31)) == 'Zanabazar_Square' + assert unicodedata.script(unichr(0x0F09)) == 'Tibt' + assert unicodedata.script(unichr(0x2D3A)) == 'Tfng' + assert unicodedata.script(unichr(0x114B0)) == 'Tirh' + assert unicodedata.script(unichr(0x1038B)) == 'Ugar' + assert unicodedata.script(unichr(0xA585)) == 'Vaii' + assert unicodedata.script(unichr(0x118CF)) == 'Wara' + assert unicodedata.script(unichr(0xA066)) == 'Yiii' + assert unicodedata.script(unichr(0x11A31)) == 'Zanb' def test_script_extension(): - assert unicodedata.script_extension("a") == {"Latin"} - assert unicodedata.script_extension(unichr(0)) == {"Common"} - assert unicodedata.script_extension(unichr(0x0378)) == {"Unknown"} - assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Unknown"} + assert unicodedata.script_extension("a") == {"Latn"} + assert unicodedata.script_extension(unichr(0)) == {"Zyyy"} + assert unicodedata.script_extension(unichr(0x0378)) == {"Zzzz"} + assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Zzzz"} assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'} assert unicodedata.script_extension("\u0964") == {