diff --git a/Lib/fontTools/unicodedata/Blocks.py b/Lib/fontTools/unicodedata/Blocks.py index 132b09548..0755074b7 100644 --- a/Lib/fontTools/unicodedata/Blocks.py +++ b/Lib/fontTools/unicodedata/Blocks.py @@ -4,8 +4,8 @@ # Source: https://unicode.org/Public/UNIDATA/Blocks.txt # License: http://unicode.org/copyright.html#License # -# Blocks-12.1.0.txt -# Date: 2019-03-08, 23:59:00 GMT [KW] +# Blocks-13.0.0.txt +# Date: 2019-07-10, 19:06:00 GMT [KW] # © 2019 Unicode®, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -234,10 +234,12 @@ RANGES = [ 0x10D00, # .. 0x10D3F ; Hanifi Rohingya 0x10D40, # .. 0x10E5F ; No_Block 0x10E60, # .. 0x10E7F ; Rumi Numeral Symbols - 0x10E80, # .. 0x10EFF ; No_Block + 0x10E80, # .. 0x10EBF ; Yezidi + 0x10EC0, # .. 0x10EFF ; No_Block 0x10F00, # .. 0x10F2F ; Old Sogdian 0x10F30, # .. 0x10F6F ; Sogdian - 0x10F70, # .. 0x10FDF ; No_Block + 0x10F70, # .. 0x10FAF ; No_Block + 0x10FB0, # .. 0x10FDF ; Chorasmian 0x10FE0, # .. 0x10FFF ; Elymaic 0x11000, # .. 0x1107F ; Brahmi 0x11080, # .. 0x110CF ; Kaithi @@ -265,7 +267,8 @@ RANGES = [ 0x11800, # .. 0x1184F ; Dogra 0x11850, # .. 0x1189F ; No_Block 0x118A0, # .. 0x118FF ; Warang Citi - 0x11900, # .. 0x1199F ; No_Block + 0x11900, # .. 0x1195F ; Dives Akuru + 0x11960, # .. 0x1199F ; No_Block 0x119A0, # .. 0x119FF ; Nandinagari 0x11A00, # .. 0x11A4F ; Zanabazar Square 0x11A50, # .. 0x11AAF ; Soyombo @@ -279,7 +282,8 @@ RANGES = [ 0x11D60, # .. 0x11DAF ; Gunjala Gondi 0x11DB0, # .. 0x11EDF ; No_Block 0x11EE0, # .. 0x11EFF ; Makasar - 0x11F00, # .. 0x11FBF ; No_Block + 0x11F00, # .. 0x11FAF ; No_Block + 0x11FB0, # .. 0x11FBF ; Lisu Supplement 0x11FC0, # .. 0x11FFF ; Tamil Supplement 0x12000, # .. 0x123FF ; Cuneiform 0x12400, # .. 0x1247F ; Cuneiform Numbers and Punctuation @@ -303,7 +307,9 @@ RANGES = [ 0x16FE0, # .. 0x16FFF ; Ideographic Symbols and Punctuation 0x17000, # .. 0x187FF ; Tangut 0x18800, # .. 0x18AFF ; Tangut Components - 0x18B00, # .. 0x1AFFF ; No_Block + 0x18B00, # .. 0x18CFF ; Khitan Small Script + 0x18D00, # .. 0x18D8F ; Tangut Supplement + 0x18D90, # .. 0x1AFFF ; No_Block 0x1B000, # .. 0x1B0FF ; Kana Supplement 0x1B100, # .. 0x1B12F ; Kana Extended-A 0x1B130, # .. 0x1B16F ; Small Kana Extension @@ -354,7 +360,8 @@ RANGES = [ 0x1F900, # .. 0x1F9FF ; Supplemental Symbols and Pictographs 0x1FA00, # .. 0x1FA6F ; Chess Symbols 0x1FA70, # .. 0x1FAFF ; Symbols and Pictographs Extended-A - 0x1FB00, # .. 0x1FFFF ; No_Block + 0x1FB00, # .. 0x1FBFF ; Symbols for Legacy Computing + 0x1FC00, # .. 0x1FFFF ; No_Block 0x20000, # .. 0x2A6DF ; CJK Unified Ideographs Extension B 0x2A6E0, # .. 0x2A6FF ; No_Block 0x2A700, # .. 0x2B73F ; CJK Unified Ideographs Extension C @@ -363,7 +370,9 @@ RANGES = [ 0x2CEB0, # .. 0x2EBEF ; CJK Unified Ideographs Extension F 0x2EBF0, # .. 0x2F7FF ; No_Block 0x2F800, # .. 0x2FA1F ; CJK Compatibility Ideographs Supplement - 0x2FA20, # .. 0xDFFFF ; No_Block + 0x2FA20, # .. 0x2FFFF ; No_Block + 0x30000, # .. 0x3134F ; CJK Unified Ideographs Extension G + 0x31350, # .. 0xDFFFF ; No_Block 0xE0000, # .. 0xE007F ; Tags 0xE0080, # .. 0xE00FF ; No_Block 0xE0100, # .. 0xE01EF ; Variation Selectors Supplement @@ -590,10 +599,12 @@ VALUES = [ 'Hanifi Rohingya', # 10D00..10D3F 'No_Block', # 10D40..10E5F 'Rumi Numeral Symbols', # 10E60..10E7F - 'No_Block', # 10E80..10EFF + 'Yezidi', # 10E80..10EBF + 'No_Block', # 10EC0..10EFF 'Old Sogdian', # 10F00..10F2F 'Sogdian', # 10F30..10F6F - 'No_Block', # 10F70..10FDF + 'No_Block', # 10F70..10FAF + 'Chorasmian', # 10FB0..10FDF 'Elymaic', # 10FE0..10FFF 'Brahmi', # 11000..1107F 'Kaithi', # 11080..110CF @@ -621,7 +632,8 @@ VALUES = [ 'Dogra', # 11800..1184F 'No_Block', # 11850..1189F 'Warang Citi', # 118A0..118FF - 'No_Block', # 11900..1199F + 'Dives Akuru', # 11900..1195F + 'No_Block', # 11960..1199F 'Nandinagari', # 119A0..119FF 'Zanabazar Square', # 11A00..11A4F 'Soyombo', # 11A50..11AAF @@ -635,7 +647,8 @@ VALUES = [ 'Gunjala Gondi', # 11D60..11DAF 'No_Block', # 11DB0..11EDF 'Makasar', # 11EE0..11EFF - 'No_Block', # 11F00..11FBF + 'No_Block', # 11F00..11FAF + 'Lisu Supplement', # 11FB0..11FBF 'Tamil Supplement', # 11FC0..11FFF 'Cuneiform', # 12000..123FF 'Cuneiform Numbers and Punctuation', # 12400..1247F @@ -659,7 +672,9 @@ VALUES = [ 'Ideographic Symbols and Punctuation', # 16FE0..16FFF 'Tangut', # 17000..187FF 'Tangut Components', # 18800..18AFF - 'No_Block', # 18B00..1AFFF + 'Khitan Small Script', # 18B00..18CFF + 'Tangut Supplement', # 18D00..18D8F + 'No_Block', # 18D90..1AFFF 'Kana Supplement', # 1B000..1B0FF 'Kana Extended-A', # 1B100..1B12F 'Small Kana Extension', # 1B130..1B16F @@ -710,7 +725,8 @@ VALUES = [ 'Supplemental Symbols and Pictographs', # 1F900..1F9FF 'Chess Symbols', # 1FA00..1FA6F 'Symbols and Pictographs Extended-A', # 1FA70..1FAFF - 'No_Block', # 1FB00..1FFFF + 'Symbols for Legacy Computing', # 1FB00..1FBFF + 'No_Block', # 1FC00..1FFFF 'CJK Unified Ideographs Extension B', # 20000..2A6DF 'No_Block', # 2A6E0..2A6FF 'CJK Unified Ideographs Extension C', # 2A700..2B73F @@ -719,7 +735,9 @@ VALUES = [ 'CJK Unified Ideographs Extension F', # 2CEB0..2EBEF 'No_Block', # 2EBF0..2F7FF 'CJK Compatibility Ideographs Supplement', # 2F800..2FA1F - 'No_Block', # 2FA20..DFFFF + 'No_Block', # 2FA20..2FFFF + 'CJK Unified Ideographs Extension G', # 30000..3134F + 'No_Block', # 31350..DFFFF 'Tags', # E0000..E007F 'No_Block', # E0080..E00FF 'Variation Selectors Supplement', # E0100..E01EF diff --git a/Lib/fontTools/unicodedata/ScriptExtensions.py b/Lib/fontTools/unicodedata/ScriptExtensions.py index d7d2e3c8a..b4e09cd27 100644 --- a/Lib/fontTools/unicodedata/ScriptExtensions.py +++ b/Lib/fontTools/unicodedata/ScriptExtensions.py @@ -4,9 +4,9 @@ # Source: https://unicode.org/Public/UNIDATA/ScriptExtensions.txt # License: http://unicode.org/copyright.html#License # -# ScriptExtensions-12.1.0.txt -# Date: 2019-04-01, 09:10:42 GMT -# © 2019 Unicode®, Inc. +# ScriptExtensions-13.0.0.txt +# Date: 2020-01-22, 00:07:43 GMT +# © 2020 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -52,21 +52,19 @@ RANGES = [ 0x0484, # .. 0x0484 ; {'Cyrl', 'Glag'} 0x0485, # .. 0x0486 ; {'Cyrl', 'Latn'} 0x0487, # .. 0x0487 ; {'Cyrl', 'Glag'} - 0x0488, # .. 0x0588 ; None - 0x0589, # .. 0x0589 ; {'Armn', 'Geor'} - 0x058A, # .. 0x060B ; None - 0x060C, # .. 0x060C ; {'Arab', 'Rohg', 'Syrc', 'Thaa'} + 0x0488, # .. 0x060B ; None + 0x060C, # .. 0x060C ; {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} 0x060D, # .. 0x061A ; None - 0x061B, # .. 0x061B ; {'Arab', 'Rohg', 'Syrc', 'Thaa'} + 0x061B, # .. 0x061B ; {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} 0x061C, # .. 0x061C ; {'Arab', 'Syrc', 'Thaa'} 0x061D, # .. 0x061E ; None - 0x061F, # .. 0x061F ; {'Arab', 'Rohg', 'Syrc', 'Thaa'} + 0x061F, # .. 0x061F ; {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} 0x0620, # .. 0x063F ; None 0x0640, # .. 0x0640 ; {'Adlm', 'Arab', 'Mand', 'Mani', 'Phlp', 'Rohg', 'Sogd', 'Syrc'} 0x0641, # .. 0x064A ; None 0x064B, # .. 0x0655 ; {'Arab', 'Syrc'} 0x0656, # .. 0x065F ; None - 0x0660, # .. 0x0669 ; {'Arab', 'Thaa'} + 0x0660, # .. 0x0669 ; {'Arab', 'Thaa', 'Yezi'} 0x066A, # .. 0x066F ; None 0x0670, # .. 0x0670 ; {'Arab', 'Syrc'} 0x0671, # .. 0x06D3 ; None @@ -129,7 +127,9 @@ RANGES = [ 0x1CFA, # .. 0x1CFA ; {'Nand'} 0x1CFB, # .. 0x1DBF ; None 0x1DC0, # .. 0x1DC1 ; {'Grek'} - 0x1DC2, # .. 0x202E ; None + 0x1DC2, # .. 0x1DF7 ; None + 0x1DF8, # .. 0x1DF8 ; {'Cyrl', 'Syrc'} + 0x1DF9, # .. 0x202E ; None 0x202F, # .. 0x202F ; {'Latn', 'Mong'} 0x2030, # .. 0x20EF ; None 0x20F0, # .. 0x20F0 ; {'Deva', 'Gran', 'Latn'} @@ -183,7 +183,9 @@ RANGES = [ 0x33E0, # .. 0x33FE ; {'Hani'} 0x33FF, # .. 0xA66E ; None 0xA66F, # .. 0xA66F ; {'Cyrl', 'Glag'} - 0xA670, # .. 0xA82F ; None + 0xA670, # .. 0xA6FF ; None + 0xA700, # .. 0xA707 ; {'Hani', 'Latn'} + 0xA708, # .. 0xA82F ; None 0xA830, # .. 0xA832 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Mlym', 'Modi', 'Nand', 'Sind', 'Takr', 'Tirh'} 0xA833, # .. 0xA835 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Modi', 'Nand', 'Sind', 'Takr', 'Tirh'} 0xA836, # .. 0xA839 ; {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'} @@ -246,21 +248,19 @@ VALUES = [ {'Cyrl', 'Glag'}, # 0484..0484 {'Cyrl', 'Latn'}, # 0485..0486 {'Cyrl', 'Glag'}, # 0487..0487 - None, # 0488..0588 - {'Armn', 'Geor'}, # 0589..0589 - None, # 058A..060B - {'Arab', 'Rohg', 'Syrc', 'Thaa'}, # 060C..060C + None, # 0488..060B + {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 060C..060C None, # 060D..061A - {'Arab', 'Rohg', 'Syrc', 'Thaa'}, # 061B..061B + {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 061B..061B {'Arab', 'Syrc', 'Thaa'}, # 061C..061C None, # 061D..061E - {'Arab', 'Rohg', 'Syrc', 'Thaa'}, # 061F..061F + {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 061F..061F None, # 0620..063F {'Adlm', 'Arab', 'Mand', 'Mani', 'Phlp', 'Rohg', 'Sogd', 'Syrc'}, # 0640..0640 None, # 0641..064A {'Arab', 'Syrc'}, # 064B..0655 None, # 0656..065F - {'Arab', 'Thaa'}, # 0660..0669 + {'Arab', 'Thaa', 'Yezi'}, # 0660..0669 None, # 066A..066F {'Arab', 'Syrc'}, # 0670..0670 None, # 0671..06D3 @@ -323,7 +323,9 @@ VALUES = [ {'Nand'}, # 1CFA..1CFA None, # 1CFB..1DBF {'Grek'}, # 1DC0..1DC1 - None, # 1DC2..202E + None, # 1DC2..1DF7 + {'Cyrl', 'Syrc'}, # 1DF8..1DF8 + None, # 1DF9..202E {'Latn', 'Mong'}, # 202F..202F None, # 2030..20EF {'Deva', 'Gran', 'Latn'}, # 20F0..20F0 @@ -377,7 +379,9 @@ VALUES = [ {'Hani'}, # 33E0..33FE None, # 33FF..A66E {'Cyrl', 'Glag'}, # A66F..A66F - None, # A670..A82F + None, # A670..A6FF + {'Hani', 'Latn'}, # A700..A707 + None, # A708..A82F {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Mlym', 'Modi', 'Nand', 'Sind', 'Takr', 'Tirh'}, # A830..A832 {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Knda', 'Kthi', 'Mahj', 'Modi', 'Nand', 'Sind', 'Takr', 'Tirh'}, # A833..A835 {'Deva', 'Dogr', 'Gujr', 'Guru', 'Khoj', 'Kthi', 'Mahj', 'Modi', 'Sind', 'Takr', 'Tirh'}, # A836..A839 diff --git a/Lib/fontTools/unicodedata/Scripts.py b/Lib/fontTools/unicodedata/Scripts.py index dc8c1e2b0..12f9a0e31 100644 --- a/Lib/fontTools/unicodedata/Scripts.py +++ b/Lib/fontTools/unicodedata/Scripts.py @@ -4,9 +4,9 @@ # Source: https://unicode.org/Public/UNIDATA/Scripts.txt # License: http://unicode.org/copyright.html#License # -# Scripts-12.1.0.txt -# Date: 2019-04-01, 09:10:42 GMT -# © 2019 Unicode®, Inc. +# Scripts-13.0.0.txt +# Date: 2020-01-22, 00:07:43 GMT +# © 2020 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -68,9 +68,7 @@ RANGES = [ 0x0530, # .. 0x0530 ; Unknown 0x0531, # .. 0x0556 ; Armenian 0x0557, # .. 0x0558 ; Unknown - 0x0559, # .. 0x0588 ; Armenian - 0x0589, # .. 0x0589 ; Common - 0x058A, # .. 0x058A ; Armenian + 0x0559, # .. 0x058A ; Armenian 0x058B, # .. 0x058C ; Unknown 0x058D, # .. 0x058F ; Armenian 0x0590, # .. 0x0590 ; Unknown @@ -122,8 +120,8 @@ RANGES = [ 0x086B, # .. 0x089F ; Unknown 0x08A0, # .. 0x08B4 ; Arabic 0x08B5, # .. 0x08B5 ; Unknown - 0x08B6, # .. 0x08BD ; Arabic - 0x08BE, # .. 0x08D2 ; Unknown + 0x08B6, # .. 0x08C7 ; Arabic + 0x08C8, # .. 0x08D2 ; Unknown 0x08D3, # .. 0x08E1 ; Arabic 0x08E2, # .. 0x08E2 ; Common 0x08E3, # .. 0x08FF ; Arabic @@ -239,8 +237,8 @@ RANGES = [ 0x0B47, # .. 0x0B48 ; Oriya 0x0B49, # .. 0x0B4A ; Unknown 0x0B4B, # .. 0x0B4D ; Oriya - 0x0B4E, # .. 0x0B55 ; Unknown - 0x0B56, # .. 0x0B57 ; Oriya + 0x0B4E, # .. 0x0B54 ; Unknown + 0x0B55, # .. 0x0B57 ; Oriya 0x0B58, # .. 0x0B5B ; Unknown 0x0B5C, # .. 0x0B5D ; Oriya 0x0B5E, # .. 0x0B5E ; Unknown @@ -329,9 +327,7 @@ RANGES = [ 0x0CF0, # .. 0x0CF0 ; Unknown 0x0CF1, # .. 0x0CF2 ; Kannada 0x0CF3, # .. 0x0CFF ; Unknown - 0x0D00, # .. 0x0D03 ; Malayalam - 0x0D04, # .. 0x0D04 ; Unknown - 0x0D05, # .. 0x0D0C ; Malayalam + 0x0D00, # .. 0x0D0C ; Malayalam 0x0D0D, # .. 0x0D0D ; Unknown 0x0D0E, # .. 0x0D10 ; Malayalam 0x0D11, # .. 0x0D11 ; Unknown @@ -344,8 +340,8 @@ RANGES = [ 0x0D54, # .. 0x0D63 ; Malayalam 0x0D64, # .. 0x0D65 ; Unknown 0x0D66, # .. 0x0D7F ; Malayalam - 0x0D80, # .. 0x0D81 ; Unknown - 0x0D82, # .. 0x0D83 ; Sinhala + 0x0D80, # .. 0x0D80 ; Unknown + 0x0D81, # .. 0x0D83 ; Sinhala 0x0D84, # .. 0x0D84 ; Unknown 0x0D85, # .. 0x0D96 ; Sinhala 0x0D97, # .. 0x0D99 ; Unknown @@ -537,8 +533,8 @@ RANGES = [ 0x1A9A, # .. 0x1A9F ; Unknown 0x1AA0, # .. 0x1AAD ; Tai_Tham 0x1AAE, # .. 0x1AAF ; Unknown - 0x1AB0, # .. 0x1ABE ; Inherited - 0x1ABF, # .. 0x1AFF ; Unknown + 0x1AB0, # .. 0x1AC0 ; Inherited + 0x1AC1, # .. 0x1AFF ; Unknown 0x1B00, # .. 0x1B4B ; Balinese 0x1B4C, # .. 0x1B4F ; Unknown 0x1B50, # .. 0x1B7C ; Balinese @@ -658,8 +654,8 @@ RANGES = [ 0x2900, # .. 0x2B73 ; Common 0x2B74, # .. 0x2B75 ; Unknown 0x2B76, # .. 0x2B95 ; Common - 0x2B96, # .. 0x2B97 ; Unknown - 0x2B98, # .. 0x2BFF ; Common + 0x2B96, # .. 0x2B96 ; Unknown + 0x2B97, # .. 0x2BFF ; Common 0x2C00, # .. 0x2C2E ; Glagolitic 0x2C2F, # .. 0x2C2F ; Unknown 0x2C30, # .. 0x2C5E ; Glagolitic @@ -698,8 +694,8 @@ RANGES = [ 0x2DD8, # .. 0x2DDE ; Ethiopic 0x2DDF, # .. 0x2DDF ; Unknown 0x2DE0, # .. 0x2DFF ; Cyrillic - 0x2E00, # .. 0x2E4F ; Common - 0x2E50, # .. 0x2E7F ; Unknown + 0x2E00, # .. 0x2E52 ; Common + 0x2E53, # .. 0x2E7F ; Unknown 0x2E80, # .. 0x2E99 ; Han 0x2E9A, # .. 0x2E9A ; Unknown 0x2E9B, # .. 0x2EF3 ; Han @@ -735,8 +731,7 @@ RANGES = [ 0x3131, # .. 0x318E ; Hangul 0x318F, # .. 0x318F ; Unknown 0x3190, # .. 0x319F ; Common - 0x31A0, # .. 0x31BA ; Bopomofo - 0x31BB, # .. 0x31BF ; Unknown + 0x31A0, # .. 0x31BF ; Bopomofo 0x31C0, # .. 0x31E3 ; Common 0x31E4, # .. 0x31EF ; Unknown 0x31F0, # .. 0x31FF ; Katakana @@ -749,11 +744,10 @@ RANGES = [ 0x32FF, # .. 0x32FF ; Common 0x3300, # .. 0x3357 ; Katakana 0x3358, # .. 0x33FF ; Common - 0x3400, # .. 0x4DB5 ; Han - 0x4DB6, # .. 0x4DBF ; Unknown + 0x3400, # .. 0x4DBF ; Han 0x4DC0, # .. 0x4DFF ; Common - 0x4E00, # .. 0x9FEF ; Han - 0x9FF0, # .. 0x9FFF ; Unknown + 0x4E00, # .. 0x9FFC ; Han + 0x9FFD, # .. 0x9FFF ; Unknown 0xA000, # .. 0xA48C ; Yi 0xA48D, # .. 0xA48F ; Unknown 0xA490, # .. 0xA4C6 ; Yi @@ -769,11 +763,11 @@ RANGES = [ 0xA788, # .. 0xA78A ; Common 0xA78B, # .. 0xA7BF ; Latin 0xA7C0, # .. 0xA7C1 ; Unknown - 0xA7C2, # .. 0xA7C6 ; Latin - 0xA7C7, # .. 0xA7F6 ; Unknown - 0xA7F7, # .. 0xA7FF ; Latin - 0xA800, # .. 0xA82B ; Syloti_Nagri - 0xA82C, # .. 0xA82F ; Unknown + 0xA7C2, # .. 0xA7CA ; Latin + 0xA7CB, # .. 0xA7F4 ; Unknown + 0xA7F5, # .. 0xA7FF ; Latin + 0xA800, # .. 0xA82C ; Syloti_Nagri + 0xA82D, # .. 0xA82F ; Unknown 0xA830, # .. 0xA839 ; Common 0xA83A, # .. 0xA83F ; Unknown 0xA840, # .. 0xA877 ; Phags_Pa @@ -826,8 +820,9 @@ RANGES = [ 0xAB5B, # .. 0xAB5B ; Common 0xAB5C, # .. 0xAB64 ; Latin 0xAB65, # .. 0xAB65 ; Greek - 0xAB66, # .. 0xAB67 ; Latin - 0xAB68, # .. 0xAB6F ; Unknown + 0xAB66, # .. 0xAB69 ; Latin + 0xAB6A, # .. 0xAB6B ; Common + 0xAB6C, # .. 0xAB6F ; Unknown 0xAB70, # .. 0xABBF ; Cherokee 0xABC0, # .. 0xABED ; Meetei_Mayek 0xABEE, # .. 0xABEF ; Unknown @@ -932,8 +927,8 @@ RANGES = [ 0x10137, # .. 0x1013F ; Common 0x10140, # .. 0x1018E ; Greek 0x1018F, # .. 0x1018F ; Unknown - 0x10190, # .. 0x1019B ; Common - 0x1019C, # .. 0x1019F ; Unknown + 0x10190, # .. 0x1019C ; Common + 0x1019D, # .. 0x1019F ; Unknown 0x101A0, # .. 0x101A0 ; Greek 0x101A1, # .. 0x101CF ; Unknown 0x101D0, # .. 0x101FC ; Common @@ -1069,11 +1064,19 @@ RANGES = [ 0x10D30, # .. 0x10D39 ; Hanifi_Rohingya 0x10D3A, # .. 0x10E5F ; Unknown 0x10E60, # .. 0x10E7E ; Arabic - 0x10E7F, # .. 0x10EFF ; Unknown + 0x10E7F, # .. 0x10E7F ; Unknown + 0x10E80, # .. 0x10EA9 ; Yezidi + 0x10EAA, # .. 0x10EAA ; Unknown + 0x10EAB, # .. 0x10EAD ; Yezidi + 0x10EAE, # .. 0x10EAF ; Unknown + 0x10EB0, # .. 0x10EB1 ; Yezidi + 0x10EB2, # .. 0x10EFF ; Unknown 0x10F00, # .. 0x10F27 ; Old_Sogdian 0x10F28, # .. 0x10F2F ; Unknown 0x10F30, # .. 0x10F59 ; Sogdian - 0x10F5A, # .. 0x10FDF ; Unknown + 0x10F5A, # .. 0x10FAF ; Unknown + 0x10FB0, # .. 0x10FCB ; Chorasmian + 0x10FCC, # .. 0x10FDF ; Unknown 0x10FE0, # .. 0x10FF6 ; Elymaic 0x10FF7, # .. 0x10FFF ; Unknown 0x11000, # .. 0x1104D ; Brahmi @@ -1091,13 +1094,11 @@ RANGES = [ 0x110FA, # .. 0x110FF ; Unknown 0x11100, # .. 0x11134 ; Chakma 0x11135, # .. 0x11135 ; Unknown - 0x11136, # .. 0x11146 ; Chakma - 0x11147, # .. 0x1114F ; Unknown + 0x11136, # .. 0x11147 ; Chakma + 0x11148, # .. 0x1114F ; Unknown 0x11150, # .. 0x11176 ; Mahajani 0x11177, # .. 0x1117F ; Unknown - 0x11180, # .. 0x111CD ; Sharada - 0x111CE, # .. 0x111CF ; Unknown - 0x111D0, # .. 0x111DF ; Sharada + 0x11180, # .. 0x111DF ; Sharada 0x111E0, # .. 0x111E0 ; Unknown 0x111E1, # .. 0x111F4 ; Sinhala 0x111F5, # .. 0x111FF ; Unknown @@ -1150,12 +1151,10 @@ RANGES = [ 0x1136D, # .. 0x1136F ; Unknown 0x11370, # .. 0x11374 ; Grantha 0x11375, # .. 0x113FF ; Unknown - 0x11400, # .. 0x11459 ; Newa - 0x1145A, # .. 0x1145A ; Unknown - 0x1145B, # .. 0x1145B ; Newa + 0x11400, # .. 0x1145B ; Newa 0x1145C, # .. 0x1145C ; Unknown - 0x1145D, # .. 0x1145F ; Newa - 0x11460, # .. 0x1147F ; Unknown + 0x1145D, # .. 0x11461 ; Newa + 0x11462, # .. 0x1147F ; Unknown 0x11480, # .. 0x114C7 ; Tirhuta 0x114C8, # .. 0x114CF ; Unknown 0x114D0, # .. 0x114D9 ; Tirhuta @@ -1185,7 +1184,22 @@ RANGES = [ 0x118A0, # .. 0x118F2 ; Warang_Citi 0x118F3, # .. 0x118FE ; Unknown 0x118FF, # .. 0x118FF ; Warang_Citi - 0x11900, # .. 0x1199F ; Unknown + 0x11900, # .. 0x11906 ; Dives_Akuru + 0x11907, # .. 0x11908 ; Unknown + 0x11909, # .. 0x11909 ; Dives_Akuru + 0x1190A, # .. 0x1190B ; Unknown + 0x1190C, # .. 0x11913 ; Dives_Akuru + 0x11914, # .. 0x11914 ; Unknown + 0x11915, # .. 0x11916 ; Dives_Akuru + 0x11917, # .. 0x11917 ; Unknown + 0x11918, # .. 0x11935 ; Dives_Akuru + 0x11936, # .. 0x11936 ; Unknown + 0x11937, # .. 0x11938 ; Dives_Akuru + 0x11939, # .. 0x1193A ; Unknown + 0x1193B, # .. 0x11946 ; Dives_Akuru + 0x11947, # .. 0x1194F ; Unknown + 0x11950, # .. 0x11959 ; Dives_Akuru + 0x1195A, # .. 0x1199F ; Unknown 0x119A0, # .. 0x119A7 ; Nandinagari 0x119A8, # .. 0x119A9 ; Unknown 0x119AA, # .. 0x119D7 ; Nandinagari @@ -1239,7 +1253,9 @@ RANGES = [ 0x11DA0, # .. 0x11DA9 ; Gunjala_Gondi 0x11DAA, # .. 0x11EDF ; Unknown 0x11EE0, # .. 0x11EF8 ; Makasar - 0x11EF9, # .. 0x11FBF ; Unknown + 0x11EF9, # .. 0x11FAF ; Unknown + 0x11FB0, # .. 0x11FB0 ; Lisu + 0x11FB1, # .. 0x11FBF ; Unknown 0x11FC0, # .. 0x11FF1 ; Tamil 0x11FF2, # .. 0x11FFE ; Unknown 0x11FFF, # .. 0x11FFF ; Tamil @@ -1290,11 +1306,17 @@ RANGES = [ 0x16FE0, # .. 0x16FE0 ; Tangut 0x16FE1, # .. 0x16FE1 ; Nushu 0x16FE2, # .. 0x16FE3 ; Common - 0x16FE4, # .. 0x16FFF ; Unknown + 0x16FE4, # .. 0x16FE4 ; Khitan_Small_Script + 0x16FE5, # .. 0x16FEF ; Unknown + 0x16FF0, # .. 0x16FF1 ; Han + 0x16FF2, # .. 0x16FFF ; Unknown 0x17000, # .. 0x187F7 ; Tangut 0x187F8, # .. 0x187FF ; Unknown - 0x18800, # .. 0x18AF2 ; Tangut - 0x18AF3, # .. 0x1AFFF ; Unknown + 0x18800, # .. 0x18AFF ; Tangut + 0x18B00, # .. 0x18CD5 ; Khitan_Small_Script + 0x18CD6, # .. 0x18CFF ; Unknown + 0x18D00, # .. 0x18D08 ; Tangut + 0x18D09, # .. 0x1AFFF ; Unknown 0x1B000, # .. 0x1B000 ; Katakana 0x1B001, # .. 0x1B11E ; Hiragana 0x1B11F, # .. 0x1B14F ; Unknown @@ -1500,12 +1522,8 @@ RANGES = [ 0x1F0D0, # .. 0x1F0D0 ; Unknown 0x1F0D1, # .. 0x1F0F5 ; Common 0x1F0F6, # .. 0x1F0FF ; Unknown - 0x1F100, # .. 0x1F10C ; Common - 0x1F10D, # .. 0x1F10F ; Unknown - 0x1F110, # .. 0x1F16C ; Common - 0x1F16D, # .. 0x1F16F ; Unknown - 0x1F170, # .. 0x1F1AC ; Common - 0x1F1AD, # .. 0x1F1E5 ; Unknown + 0x1F100, # .. 0x1F1AD ; Common + 0x1F1AE, # .. 0x1F1E5 ; Unknown 0x1F1E6, # .. 0x1F1FF ; Common 0x1F200, # .. 0x1F200 ; Hiragana 0x1F201, # .. 0x1F202 ; Common @@ -1518,12 +1536,12 @@ RANGES = [ 0x1F252, # .. 0x1F25F ; Unknown 0x1F260, # .. 0x1F265 ; Common 0x1F266, # .. 0x1F2FF ; Unknown - 0x1F300, # .. 0x1F6D5 ; Common - 0x1F6D6, # .. 0x1F6DF ; Unknown + 0x1F300, # .. 0x1F6D7 ; Common + 0x1F6D8, # .. 0x1F6DF ; Unknown 0x1F6E0, # .. 0x1F6EC ; Common 0x1F6ED, # .. 0x1F6EF ; Unknown - 0x1F6F0, # .. 0x1F6FA ; Common - 0x1F6FB, # .. 0x1F6FF ; Unknown + 0x1F6F0, # .. 0x1F6FC ; Common + 0x1F6FD, # .. 0x1F6FF ; Unknown 0x1F700, # .. 0x1F773 ; Common 0x1F774, # .. 0x1F77F ; Unknown 0x1F780, # .. 0x1F7D8 ; Common @@ -1539,33 +1557,39 @@ RANGES = [ 0x1F860, # .. 0x1F887 ; Common 0x1F888, # .. 0x1F88F ; Unknown 0x1F890, # .. 0x1F8AD ; Common - 0x1F8AE, # .. 0x1F8FF ; Unknown - 0x1F900, # .. 0x1F90B ; Common - 0x1F90C, # .. 0x1F90C ; Unknown - 0x1F90D, # .. 0x1F971 ; Common - 0x1F972, # .. 0x1F972 ; Unknown - 0x1F973, # .. 0x1F976 ; Common - 0x1F977, # .. 0x1F979 ; Unknown - 0x1F97A, # .. 0x1F9A2 ; Common - 0x1F9A3, # .. 0x1F9A4 ; Unknown - 0x1F9A5, # .. 0x1F9AA ; Common - 0x1F9AB, # .. 0x1F9AD ; Unknown - 0x1F9AE, # .. 0x1F9CA ; Common - 0x1F9CB, # .. 0x1F9CC ; Unknown + 0x1F8AE, # .. 0x1F8AF ; Unknown + 0x1F8B0, # .. 0x1F8B1 ; Common + 0x1F8B2, # .. 0x1F8FF ; Unknown + 0x1F900, # .. 0x1F978 ; Common + 0x1F979, # .. 0x1F979 ; Unknown + 0x1F97A, # .. 0x1F9CB ; Common + 0x1F9CC, # .. 0x1F9CC ; Unknown 0x1F9CD, # .. 0x1FA53 ; Common 0x1FA54, # .. 0x1FA5F ; Unknown 0x1FA60, # .. 0x1FA6D ; Common 0x1FA6E, # .. 0x1FA6F ; Unknown - 0x1FA70, # .. 0x1FA73 ; Common - 0x1FA74, # .. 0x1FA77 ; Unknown + 0x1FA70, # .. 0x1FA74 ; Common + 0x1FA75, # .. 0x1FA77 ; Unknown 0x1FA78, # .. 0x1FA7A ; Common 0x1FA7B, # .. 0x1FA7F ; Unknown - 0x1FA80, # .. 0x1FA82 ; Common - 0x1FA83, # .. 0x1FA8F ; Unknown - 0x1FA90, # .. 0x1FA95 ; Common - 0x1FA96, # .. 0x1FFFF ; Unknown - 0x20000, # .. 0x2A6D6 ; Han - 0x2A6D7, # .. 0x2A6FF ; Unknown + 0x1FA80, # .. 0x1FA86 ; Common + 0x1FA87, # .. 0x1FA8F ; Unknown + 0x1FA90, # .. 0x1FAA8 ; Common + 0x1FAA9, # .. 0x1FAAF ; Unknown + 0x1FAB0, # .. 0x1FAB6 ; Common + 0x1FAB7, # .. 0x1FABF ; Unknown + 0x1FAC0, # .. 0x1FAC2 ; Common + 0x1FAC3, # .. 0x1FACF ; Unknown + 0x1FAD0, # .. 0x1FAD6 ; Common + 0x1FAD7, # .. 0x1FAFF ; Unknown + 0x1FB00, # .. 0x1FB92 ; Common + 0x1FB93, # .. 0x1FB93 ; Unknown + 0x1FB94, # .. 0x1FBCA ; Common + 0x1FBCB, # .. 0x1FBEF ; Unknown + 0x1FBF0, # .. 0x1FBF9 ; Common + 0x1FBFA, # .. 0x1FFFF ; Unknown + 0x20000, # .. 0x2A6DD ; Han + 0x2A6DE, # .. 0x2A6FF ; Unknown 0x2A700, # .. 0x2B734 ; Han 0x2B735, # .. 0x2B73F ; Unknown 0x2B740, # .. 0x2B81D ; Han @@ -1575,7 +1599,9 @@ RANGES = [ 0x2CEB0, # .. 0x2EBE0 ; Han 0x2EBE1, # .. 0x2F7FF ; Unknown 0x2F800, # .. 0x2FA1D ; Han - 0x2FA1E, # .. 0xE0000 ; Unknown + 0x2FA1E, # .. 0x2FFFF ; Unknown + 0x30000, # .. 0x3134A ; Han + 0x3134B, # .. 0xE0000 ; Unknown 0xE0001, # .. 0xE0001 ; Common 0xE0002, # .. 0xE001F ; Unknown 0xE0020, # .. 0xE007F ; Common @@ -1632,9 +1658,7 @@ VALUES = [ 'Zzzz', # 0530..0530 ; Unknown 'Armn', # 0531..0556 ; Armenian 'Zzzz', # 0557..0558 ; Unknown - 'Armn', # 0559..0588 ; Armenian - 'Zyyy', # 0589..0589 ; Common - 'Armn', # 058A..058A ; Armenian + 'Armn', # 0559..058A ; Armenian 'Zzzz', # 058B..058C ; Unknown 'Armn', # 058D..058F ; Armenian 'Zzzz', # 0590..0590 ; Unknown @@ -1686,8 +1710,8 @@ VALUES = [ 'Zzzz', # 086B..089F ; Unknown 'Arab', # 08A0..08B4 ; Arabic 'Zzzz', # 08B5..08B5 ; Unknown - 'Arab', # 08B6..08BD ; Arabic - 'Zzzz', # 08BE..08D2 ; Unknown + 'Arab', # 08B6..08C7 ; Arabic + 'Zzzz', # 08C8..08D2 ; Unknown 'Arab', # 08D3..08E1 ; Arabic 'Zyyy', # 08E2..08E2 ; Common 'Arab', # 08E3..08FF ; Arabic @@ -1803,8 +1827,8 @@ VALUES = [ 'Orya', # 0B47..0B48 ; Oriya 'Zzzz', # 0B49..0B4A ; Unknown 'Orya', # 0B4B..0B4D ; Oriya - 'Zzzz', # 0B4E..0B55 ; Unknown - 'Orya', # 0B56..0B57 ; Oriya + 'Zzzz', # 0B4E..0B54 ; Unknown + 'Orya', # 0B55..0B57 ; Oriya 'Zzzz', # 0B58..0B5B ; Unknown 'Orya', # 0B5C..0B5D ; Oriya 'Zzzz', # 0B5E..0B5E ; Unknown @@ -1893,9 +1917,7 @@ VALUES = [ 'Zzzz', # 0CF0..0CF0 ; Unknown 'Knda', # 0CF1..0CF2 ; Kannada 'Zzzz', # 0CF3..0CFF ; Unknown - 'Mlym', # 0D00..0D03 ; Malayalam - 'Zzzz', # 0D04..0D04 ; Unknown - 'Mlym', # 0D05..0D0C ; Malayalam + 'Mlym', # 0D00..0D0C ; Malayalam 'Zzzz', # 0D0D..0D0D ; Unknown 'Mlym', # 0D0E..0D10 ; Malayalam 'Zzzz', # 0D11..0D11 ; Unknown @@ -1908,8 +1930,8 @@ VALUES = [ 'Mlym', # 0D54..0D63 ; Malayalam 'Zzzz', # 0D64..0D65 ; Unknown 'Mlym', # 0D66..0D7F ; Malayalam - 'Zzzz', # 0D80..0D81 ; Unknown - 'Sinh', # 0D82..0D83 ; Sinhala + 'Zzzz', # 0D80..0D80 ; Unknown + 'Sinh', # 0D81..0D83 ; Sinhala 'Zzzz', # 0D84..0D84 ; Unknown 'Sinh', # 0D85..0D96 ; Sinhala 'Zzzz', # 0D97..0D99 ; Unknown @@ -2101,8 +2123,8 @@ VALUES = [ 'Zzzz', # 1A9A..1A9F ; Unknown 'Lana', # 1AA0..1AAD ; Tai_Tham 'Zzzz', # 1AAE..1AAF ; Unknown - 'Zinh', # 1AB0..1ABE ; Inherited - 'Zzzz', # 1ABF..1AFF ; Unknown + 'Zinh', # 1AB0..1AC0 ; Inherited + 'Zzzz', # 1AC1..1AFF ; Unknown 'Bali', # 1B00..1B4B ; Balinese 'Zzzz', # 1B4C..1B4F ; Unknown 'Bali', # 1B50..1B7C ; Balinese @@ -2222,8 +2244,8 @@ VALUES = [ 'Zyyy', # 2900..2B73 ; Common 'Zzzz', # 2B74..2B75 ; Unknown 'Zyyy', # 2B76..2B95 ; Common - 'Zzzz', # 2B96..2B97 ; Unknown - 'Zyyy', # 2B98..2BFF ; Common + 'Zzzz', # 2B96..2B96 ; Unknown + 'Zyyy', # 2B97..2BFF ; Common 'Glag', # 2C00..2C2E ; Glagolitic 'Zzzz', # 2C2F..2C2F ; Unknown 'Glag', # 2C30..2C5E ; Glagolitic @@ -2262,8 +2284,8 @@ VALUES = [ 'Ethi', # 2DD8..2DDE ; Ethiopic 'Zzzz', # 2DDF..2DDF ; Unknown 'Cyrl', # 2DE0..2DFF ; Cyrillic - 'Zyyy', # 2E00..2E4F ; Common - 'Zzzz', # 2E50..2E7F ; Unknown + 'Zyyy', # 2E00..2E52 ; Common + 'Zzzz', # 2E53..2E7F ; Unknown 'Hani', # 2E80..2E99 ; Han 'Zzzz', # 2E9A..2E9A ; Unknown 'Hani', # 2E9B..2EF3 ; Han @@ -2299,8 +2321,7 @@ VALUES = [ 'Hang', # 3131..318E ; Hangul 'Zzzz', # 318F..318F ; Unknown 'Zyyy', # 3190..319F ; Common - 'Bopo', # 31A0..31BA ; Bopomofo - 'Zzzz', # 31BB..31BF ; Unknown + 'Bopo', # 31A0..31BF ; Bopomofo 'Zyyy', # 31C0..31E3 ; Common 'Zzzz', # 31E4..31EF ; Unknown 'Kana', # 31F0..31FF ; Katakana @@ -2313,11 +2334,10 @@ VALUES = [ 'Zyyy', # 32FF..32FF ; Common 'Kana', # 3300..3357 ; Katakana 'Zyyy', # 3358..33FF ; Common - 'Hani', # 3400..4DB5 ; Han - 'Zzzz', # 4DB6..4DBF ; Unknown + 'Hani', # 3400..4DBF ; Han 'Zyyy', # 4DC0..4DFF ; Common - 'Hani', # 4E00..9FEF ; Han - 'Zzzz', # 9FF0..9FFF ; Unknown + 'Hani', # 4E00..9FFC ; Han + 'Zzzz', # 9FFD..9FFF ; Unknown 'Yiii', # A000..A48C ; Yi 'Zzzz', # A48D..A48F ; Unknown 'Yiii', # A490..A4C6 ; Yi @@ -2333,11 +2353,11 @@ VALUES = [ 'Zyyy', # A788..A78A ; Common 'Latn', # A78B..A7BF ; Latin 'Zzzz', # A7C0..A7C1 ; Unknown - 'Latn', # A7C2..A7C6 ; Latin - 'Zzzz', # A7C7..A7F6 ; Unknown - 'Latn', # A7F7..A7FF ; Latin - 'Sylo', # A800..A82B ; Syloti_Nagri - 'Zzzz', # A82C..A82F ; Unknown + 'Latn', # A7C2..A7CA ; Latin + 'Zzzz', # A7CB..A7F4 ; Unknown + 'Latn', # A7F5..A7FF ; Latin + 'Sylo', # A800..A82C ; Syloti_Nagri + 'Zzzz', # A82D..A82F ; Unknown 'Zyyy', # A830..A839 ; Common 'Zzzz', # A83A..A83F ; Unknown 'Phag', # A840..A877 ; Phags_Pa @@ -2390,8 +2410,9 @@ VALUES = [ 'Zyyy', # AB5B..AB5B ; Common 'Latn', # AB5C..AB64 ; Latin 'Grek', # AB65..AB65 ; Greek - 'Latn', # AB66..AB67 ; Latin - 'Zzzz', # AB68..AB6F ; Unknown + 'Latn', # AB66..AB69 ; Latin + 'Zyyy', # AB6A..AB6B ; Common + 'Zzzz', # AB6C..AB6F ; Unknown 'Cher', # AB70..ABBF ; Cherokee 'Mtei', # ABC0..ABED ; Meetei_Mayek 'Zzzz', # ABEE..ABEF ; Unknown @@ -2496,8 +2517,8 @@ VALUES = [ 'Zyyy', # 10137..1013F ; Common 'Grek', # 10140..1018E ; Greek 'Zzzz', # 1018F..1018F ; Unknown - 'Zyyy', # 10190..1019B ; Common - 'Zzzz', # 1019C..1019F ; Unknown + 'Zyyy', # 10190..1019C ; Common + 'Zzzz', # 1019D..1019F ; Unknown 'Grek', # 101A0..101A0 ; Greek 'Zzzz', # 101A1..101CF ; Unknown 'Zyyy', # 101D0..101FC ; Common @@ -2633,11 +2654,19 @@ VALUES = [ 'Rohg', # 10D30..10D39 ; Hanifi_Rohingya 'Zzzz', # 10D3A..10E5F ; Unknown 'Arab', # 10E60..10E7E ; Arabic - 'Zzzz', # 10E7F..10EFF ; Unknown + 'Zzzz', # 10E7F..10E7F ; Unknown + 'Yezi', # 10E80..10EA9 ; Yezidi + 'Zzzz', # 10EAA..10EAA ; Unknown + 'Yezi', # 10EAB..10EAD ; Yezidi + 'Zzzz', # 10EAE..10EAF ; Unknown + 'Yezi', # 10EB0..10EB1 ; Yezidi + 'Zzzz', # 10EB2..10EFF ; Unknown 'Sogo', # 10F00..10F27 ; Old_Sogdian 'Zzzz', # 10F28..10F2F ; Unknown 'Sogd', # 10F30..10F59 ; Sogdian - 'Zzzz', # 10F5A..10FDF ; Unknown + 'Zzzz', # 10F5A..10FAF ; Unknown + 'Chrs', # 10FB0..10FCB ; Chorasmian + 'Zzzz', # 10FCC..10FDF ; Unknown 'Elym', # 10FE0..10FF6 ; Elymaic 'Zzzz', # 10FF7..10FFF ; Unknown 'Brah', # 11000..1104D ; Brahmi @@ -2655,13 +2684,11 @@ VALUES = [ 'Zzzz', # 110FA..110FF ; Unknown 'Cakm', # 11100..11134 ; Chakma 'Zzzz', # 11135..11135 ; Unknown - 'Cakm', # 11136..11146 ; Chakma - 'Zzzz', # 11147..1114F ; Unknown + 'Cakm', # 11136..11147 ; Chakma + 'Zzzz', # 11148..1114F ; Unknown 'Mahj', # 11150..11176 ; Mahajani 'Zzzz', # 11177..1117F ; Unknown - 'Shrd', # 11180..111CD ; Sharada - 'Zzzz', # 111CE..111CF ; Unknown - 'Shrd', # 111D0..111DF ; Sharada + 'Shrd', # 11180..111DF ; Sharada 'Zzzz', # 111E0..111E0 ; Unknown 'Sinh', # 111E1..111F4 ; Sinhala 'Zzzz', # 111F5..111FF ; Unknown @@ -2714,12 +2741,10 @@ VALUES = [ 'Zzzz', # 1136D..1136F ; Unknown 'Gran', # 11370..11374 ; Grantha 'Zzzz', # 11375..113FF ; Unknown - 'Newa', # 11400..11459 ; Newa - 'Zzzz', # 1145A..1145A ; Unknown - 'Newa', # 1145B..1145B ; Newa + 'Newa', # 11400..1145B ; Newa 'Zzzz', # 1145C..1145C ; Unknown - 'Newa', # 1145D..1145F ; Newa - 'Zzzz', # 11460..1147F ; Unknown + 'Newa', # 1145D..11461 ; Newa + 'Zzzz', # 11462..1147F ; Unknown 'Tirh', # 11480..114C7 ; Tirhuta 'Zzzz', # 114C8..114CF ; Unknown 'Tirh', # 114D0..114D9 ; Tirhuta @@ -2749,7 +2774,22 @@ VALUES = [ 'Wara', # 118A0..118F2 ; Warang_Citi 'Zzzz', # 118F3..118FE ; Unknown 'Wara', # 118FF..118FF ; Warang_Citi - 'Zzzz', # 11900..1199F ; Unknown + 'Diak', # 11900..11906 ; Dives_Akuru + 'Zzzz', # 11907..11908 ; Unknown + 'Diak', # 11909..11909 ; Dives_Akuru + 'Zzzz', # 1190A..1190B ; Unknown + 'Diak', # 1190C..11913 ; Dives_Akuru + 'Zzzz', # 11914..11914 ; Unknown + 'Diak', # 11915..11916 ; Dives_Akuru + 'Zzzz', # 11917..11917 ; Unknown + 'Diak', # 11918..11935 ; Dives_Akuru + 'Zzzz', # 11936..11936 ; Unknown + 'Diak', # 11937..11938 ; Dives_Akuru + 'Zzzz', # 11939..1193A ; Unknown + 'Diak', # 1193B..11946 ; Dives_Akuru + 'Zzzz', # 11947..1194F ; Unknown + 'Diak', # 11950..11959 ; Dives_Akuru + 'Zzzz', # 1195A..1199F ; Unknown 'Nand', # 119A0..119A7 ; Nandinagari 'Zzzz', # 119A8..119A9 ; Unknown 'Nand', # 119AA..119D7 ; Nandinagari @@ -2803,7 +2843,9 @@ VALUES = [ 'Gong', # 11DA0..11DA9 ; Gunjala_Gondi 'Zzzz', # 11DAA..11EDF ; Unknown 'Maka', # 11EE0..11EF8 ; Makasar - 'Zzzz', # 11EF9..11FBF ; Unknown + 'Zzzz', # 11EF9..11FAF ; Unknown + 'Lisu', # 11FB0..11FB0 ; Lisu + 'Zzzz', # 11FB1..11FBF ; Unknown 'Taml', # 11FC0..11FF1 ; Tamil 'Zzzz', # 11FF2..11FFE ; Unknown 'Taml', # 11FFF..11FFF ; Tamil @@ -2854,11 +2896,17 @@ VALUES = [ 'Tang', # 16FE0..16FE0 ; Tangut 'Nshu', # 16FE1..16FE1 ; Nushu 'Zyyy', # 16FE2..16FE3 ; Common - 'Zzzz', # 16FE4..16FFF ; Unknown + 'Kits', # 16FE4..16FE4 ; Khitan_Small_Script + 'Zzzz', # 16FE5..16FEF ; Unknown + 'Hani', # 16FF0..16FF1 ; Han + 'Zzzz', # 16FF2..16FFF ; Unknown 'Tang', # 17000..187F7 ; Tangut 'Zzzz', # 187F8..187FF ; Unknown - 'Tang', # 18800..18AF2 ; Tangut - 'Zzzz', # 18AF3..1AFFF ; Unknown + 'Tang', # 18800..18AFF ; Tangut + 'Kits', # 18B00..18CD5 ; Khitan_Small_Script + 'Zzzz', # 18CD6..18CFF ; Unknown + 'Tang', # 18D00..18D08 ; Tangut + 'Zzzz', # 18D09..1AFFF ; Unknown 'Kana', # 1B000..1B000 ; Katakana 'Hira', # 1B001..1B11E ; Hiragana 'Zzzz', # 1B11F..1B14F ; Unknown @@ -3064,12 +3112,8 @@ VALUES = [ 'Zzzz', # 1F0D0..1F0D0 ; Unknown 'Zyyy', # 1F0D1..1F0F5 ; Common 'Zzzz', # 1F0F6..1F0FF ; Unknown - 'Zyyy', # 1F100..1F10C ; Common - 'Zzzz', # 1F10D..1F10F ; Unknown - 'Zyyy', # 1F110..1F16C ; Common - 'Zzzz', # 1F16D..1F16F ; Unknown - 'Zyyy', # 1F170..1F1AC ; Common - 'Zzzz', # 1F1AD..1F1E5 ; Unknown + 'Zyyy', # 1F100..1F1AD ; Common + 'Zzzz', # 1F1AE..1F1E5 ; Unknown 'Zyyy', # 1F1E6..1F1FF ; Common 'Hira', # 1F200..1F200 ; Hiragana 'Zyyy', # 1F201..1F202 ; Common @@ -3082,12 +3126,12 @@ VALUES = [ 'Zzzz', # 1F252..1F25F ; Unknown 'Zyyy', # 1F260..1F265 ; Common 'Zzzz', # 1F266..1F2FF ; Unknown - 'Zyyy', # 1F300..1F6D5 ; Common - 'Zzzz', # 1F6D6..1F6DF ; Unknown + 'Zyyy', # 1F300..1F6D7 ; Common + 'Zzzz', # 1F6D8..1F6DF ; Unknown 'Zyyy', # 1F6E0..1F6EC ; Common 'Zzzz', # 1F6ED..1F6EF ; Unknown - 'Zyyy', # 1F6F0..1F6FA ; Common - 'Zzzz', # 1F6FB..1F6FF ; Unknown + 'Zyyy', # 1F6F0..1F6FC ; Common + 'Zzzz', # 1F6FD..1F6FF ; Unknown 'Zyyy', # 1F700..1F773 ; Common 'Zzzz', # 1F774..1F77F ; Unknown 'Zyyy', # 1F780..1F7D8 ; Common @@ -3103,33 +3147,39 @@ VALUES = [ 'Zyyy', # 1F860..1F887 ; Common 'Zzzz', # 1F888..1F88F ; Unknown 'Zyyy', # 1F890..1F8AD ; Common - 'Zzzz', # 1F8AE..1F8FF ; Unknown - 'Zyyy', # 1F900..1F90B ; Common - 'Zzzz', # 1F90C..1F90C ; Unknown - 'Zyyy', # 1F90D..1F971 ; Common - 'Zzzz', # 1F972..1F972 ; Unknown - 'Zyyy', # 1F973..1F976 ; Common - 'Zzzz', # 1F977..1F979 ; Unknown - 'Zyyy', # 1F97A..1F9A2 ; Common - 'Zzzz', # 1F9A3..1F9A4 ; Unknown - 'Zyyy', # 1F9A5..1F9AA ; Common - 'Zzzz', # 1F9AB..1F9AD ; Unknown - 'Zyyy', # 1F9AE..1F9CA ; Common - 'Zzzz', # 1F9CB..1F9CC ; Unknown + 'Zzzz', # 1F8AE..1F8AF ; Unknown + 'Zyyy', # 1F8B0..1F8B1 ; Common + 'Zzzz', # 1F8B2..1F8FF ; Unknown + 'Zyyy', # 1F900..1F978 ; Common + 'Zzzz', # 1F979..1F979 ; Unknown + 'Zyyy', # 1F97A..1F9CB ; Common + 'Zzzz', # 1F9CC..1F9CC ; Unknown 'Zyyy', # 1F9CD..1FA53 ; Common 'Zzzz', # 1FA54..1FA5F ; Unknown 'Zyyy', # 1FA60..1FA6D ; Common 'Zzzz', # 1FA6E..1FA6F ; Unknown - 'Zyyy', # 1FA70..1FA73 ; Common - 'Zzzz', # 1FA74..1FA77 ; Unknown + 'Zyyy', # 1FA70..1FA74 ; Common + 'Zzzz', # 1FA75..1FA77 ; Unknown 'Zyyy', # 1FA78..1FA7A ; Common 'Zzzz', # 1FA7B..1FA7F ; Unknown - 'Zyyy', # 1FA80..1FA82 ; Common - 'Zzzz', # 1FA83..1FA8F ; Unknown - 'Zyyy', # 1FA90..1FA95 ; Common - 'Zzzz', # 1FA96..1FFFF ; Unknown - 'Hani', # 20000..2A6D6 ; Han - 'Zzzz', # 2A6D7..2A6FF ; Unknown + 'Zyyy', # 1FA80..1FA86 ; Common + 'Zzzz', # 1FA87..1FA8F ; Unknown + 'Zyyy', # 1FA90..1FAA8 ; Common + 'Zzzz', # 1FAA9..1FAAF ; Unknown + 'Zyyy', # 1FAB0..1FAB6 ; Common + 'Zzzz', # 1FAB7..1FABF ; Unknown + 'Zyyy', # 1FAC0..1FAC2 ; Common + 'Zzzz', # 1FAC3..1FACF ; Unknown + 'Zyyy', # 1FAD0..1FAD6 ; Common + 'Zzzz', # 1FAD7..1FAFF ; Unknown + 'Zyyy', # 1FB00..1FB92 ; Common + 'Zzzz', # 1FB93..1FB93 ; Unknown + 'Zyyy', # 1FB94..1FBCA ; Common + 'Zzzz', # 1FBCB..1FBEF ; Unknown + 'Zyyy', # 1FBF0..1FBF9 ; Common + 'Zzzz', # 1FBFA..1FFFF ; Unknown + 'Hani', # 20000..2A6DD ; Han + 'Zzzz', # 2A6DE..2A6FF ; Unknown 'Hani', # 2A700..2B734 ; Han 'Zzzz', # 2B735..2B73F ; Unknown 'Hani', # 2B740..2B81D ; Han @@ -3139,7 +3189,9 @@ VALUES = [ 'Hani', # 2CEB0..2EBE0 ; Han 'Zzzz', # 2EBE1..2F7FF ; Unknown 'Hani', # 2F800..2FA1D ; Han - 'Zzzz', # 2FA1E..E0000 ; Unknown + 'Zzzz', # 2FA1E..2FFFF ; Unknown + 'Hani', # 30000..3134A ; Han + 'Zzzz', # 3134B..E0000 ; Unknown 'Zyyy', # E0001..E0001 ; Common 'Zzzz', # E0002..E001F ; Unknown 'Zyyy', # E0020..E007F ; Common @@ -3172,10 +3224,12 @@ NAMES = { 'Cari': 'Carian', 'Cham': 'Cham', 'Cher': 'Cherokee', + 'Chrs': 'Chorasmian', 'Copt': 'Coptic', 'Cprt': 'Cypriot', 'Cyrl': 'Cyrillic', 'Deva': 'Devanagari', + 'Diak': 'Dives_Akuru', 'Dogr': 'Dogra', 'Dsrt': 'Deseret', 'Dupl': 'Duployan', @@ -3210,6 +3264,7 @@ NAMES = { 'Khar': 'Kharoshthi', 'Khmr': 'Khmer', 'Khoj': 'Khojki', + 'Kits': 'Khitan_Small_Script', 'Knda': 'Kannada', 'Kthi': 'Kaithi', 'Lana': 'Tai_Tham', @@ -3298,6 +3353,7 @@ NAMES = { 'Wcho': 'Wancho', 'Xpeo': 'Old_Persian', 'Xsux': 'Cuneiform', + 'Yezi': 'Yezidi', 'Yiii': 'Yi', 'Zanb': 'Zanabazar_Square', 'Zinh': 'Inherited', diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index affdd2668..a1ab6f122 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -161,7 +161,7 @@ def test_script_extension(): assert unicodedata.script_extension(unichr(0x0378)) == {"Zzzz"} assert unicodedata.script_extension(unichr(0x10FFFF)) == {"Zzzz"} - assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa'} + assert unicodedata.script_extension("\u0660") == {'Arab', 'Thaa', 'Yezi'} assert unicodedata.script_extension("\u0964") == { 'Beng', 'Deva', 'Dogr', 'Gong', 'Gonm', 'Gran', 'Gujr', 'Guru', 'Knda', 'Mahj', 'Mlym', 'Nand', 'Orya', 'Sind', 'Sinh', 'Sylo', 'Takr', 'Taml', diff --git a/requirements.txt b/requirements.txt index 02ca490c1..acff4343a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # extension 'brotlipy' on PyPy brotli==1.0.7; platform_python_implementation != "PyPy" brotlipy==0.7.0; platform_python_implementation == "PyPy" -unicodedata2==12.1.0; python_version < '3.8' and platform_python_implementation != "PyPy" +unicodedata2==13.0.0; python_version < '3.9' and platform_python_implementation != "PyPy" scipy==1.4.1; platform_python_implementation != "PyPy" munkres==1.1.2; platform_python_implementation == "PyPy" zopfli==0.1.6 diff --git a/setup.py b/setup.py index 1e54f6dff..abda7c7e4 100755 --- a/setup.py +++ b/setup.py @@ -52,8 +52,8 @@ extras_require = { # the unicodedata2 extension module doesn't work on PyPy. # Python 3.8 already has Unicode 12.1, so the backport is not needed. ( - "unicodedata2 >= 12.1.0; " - "python_version < '3.8' and platform_python_implementation != 'PyPy'" + "unicodedata2 >= 13.0.0; " + "python_version < '3.9' and platform_python_implementation != 'PyPy'" ), ], # for graphite type tables in ttLib/tables (Silf, Glat, Gloc)