Encode-Guess-Educated

 view release on metacpan or  search on metacpan

lib/Encode/Guess/Educated.pm  view on Meta::CPAN

    0x001F45 =>     491,   #  á½…  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
    0x001F4D =>       6,   #  Ὅ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
    0x001F43 =>       4,   #  ὃ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH DASIA AND VARIA
    0x0003CC =>    8044,   #  ό  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH TONOS
    0x001F78 =>     241,   #  ὸ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMICRON WITH VARIA
    0x0003C0 =>    9528,   #  π  gc=Ll   sc=Greek      GREEK SMALL LETTER PI
    0x0003A0 =>     217,   #  Π  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PI
    0x0003D8 =>     125,   #  Ϙ  gc=Lu   sc=Greek      GREEK LETTER ARCHAIC KOPPA
    0x0003C1 =>   15430,   #  ρ  gc=Ll   sc=Greek      GREEK SMALL LETTER RHO
    0x0003A1 =>      27,   #  Ρ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER RHO
    0x001FE5 =>     476,   #  á¿¥  gc=Ll   sc=Greek      GREEK SMALL LETTER RHO WITH DASIA
    0x001FEC =>       6,   #  Ῥ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER RHO WITH DASIA
    0x0003C3 =>   10221,   #  σ  gc=Ll   sc=Greek      GREEK SMALL LETTER SIGMA
    0x0003A3 =>     313,   #  Σ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER SIGMA
    0x001D6BA =>      13,   #  𝚺  gc=Lu   sc=Common     MATHEMATICAL BOLD CAPITAL SIGMA
    0x0003C2 =>   18113,   #  Ï‚  gc=Ll   sc=Greek      GREEK SMALL LETTER FINAL SIGMA
    0x0003C4 =>   14119,   #  Ï„  gc=Ll   sc=Greek      GREEK SMALL LETTER TAU
    0x0003A4 =>      89,   #  Τ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER TAU
    0x0003C5 =>    4269,   #  Ï…  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON
    0x0003A5 =>      31,   #  Υ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON
    0x001F51 =>    1287,   #  ὑ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DASIA
    0x001F59 =>      14,   #  á½™  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON WITH DASIA
    0x001F55 =>     277,   #  ὕ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA
    0x001F5D =>       5,   #  Ὕ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
    0x0003CD =>    2857,   #  ύ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH TONOS
    0x001F7A =>      32,   #  ὺ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH VARIA
    0x001FE0 =>     771,   #  á¿   gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH VRACHY
    0x0003CB =>       4,   #  Ï‹  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA
    0x0003B0 =>       1,   #  ΰ  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
    0x001FE1 =>     365,   #  á¿¡  gc=Ll   sc=Greek      GREEK SMALL LETTER UPSILON WITH MACRON
    0x0003C6 =>    4597,   #  φ  gc=Ll   sc=Greek      GREEK SMALL LETTER PHI
    0x0003A6 =>      73,   #  Φ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PHI
    0x0003C7 =>    3506,   #  χ  gc=Ll   sc=Greek      GREEK SMALL LETTER CHI
    0x0003A7 =>      90,   #  Χ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER CHI
    0x0003C8 =>     777,   #  ψ  gc=Ll   sc=Greek      GREEK SMALL LETTER PSI
    0x0003A8 =>      29,   #  Ψ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER PSI
    0x0003C9 =>    3872,   #  ω  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA
    0x0003A9 =>      72,   #  Ω  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA
    0x001F61 =>     177,   #  ὡ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH DASIA
    0x001F69 =>       4,   #  Ὡ  gc=Lu   sc=Greek      GREEK CAPITAL LETTER OMEGA WITH DASIA
    0x001F65 =>      70,   #  á½¥  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA
    0x001FA1 =>      47,   #  ᾡ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
    0x0003CE =>     870,   #  ÏŽ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH TONOS
    0x001FF4 =>       6,   #  á¿´  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
    0x001F7C =>      12,   #  á½¼  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH VARIA
    0x001FF3 =>     221,   #  ῳ  gc=Ll   sc=Greek      GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
    0x0003E1 =>       2,   #  Ï¡  gc=Ll   sc=Greek      GREEK SMALL LETTER SAMPI
    0x002C84 =>       2,   #  Ⲅ  gc=Lu   sc=Coptic     COPTIC CAPITAL LETTER GAMMA
    0x002CA4 =>       7,   #  Ⲥ  gc=Lu   sc=Coptic     COPTIC CAPITAL LETTER SIMA
    0x0004A8 =>       1,   #  Ò¨  gc=Lu   sc=Cyrillic   CYRILLIC CAPITAL LETTER ABKHASIAN HA
    0x0005D7 =>       6,   #  ×—  gc=Lo   sc=Hebrew     HEBREW LETTER HET
    0x010907 =>       1,   #  𐤇  gc=Lo   sc=Phoenician PHOENICIAN LETTER HET
    0x01090B =>       1,   #  𐤋  gc=Lo   sc=Phoenician PHOENICIAN LETTER LAMD
    0x010913 =>       1,   #  𐤓  gc=Lo   sc=Phoenician PHOENICIAN LETTER ROSH
    0x00FEE9 =>       1,   #  ﻩ  gc=Lo   sc=Arabic     ARABIC LETTER HEH ISOLATED FORM
    0x0016B9 =>       1,   #  áš¹  gc=Lo   sc=Runic      RUNIC LETTER WUNJO WYNN W
    0x00209F =>      32,   # <unnamed codepoint>
);

my %elsevier_training = (
    0x00202A =>      21,   # <-> gc=Cf   sc=Common     LEFT-TO-RIGHT EMBEDDING
    0x002061 =>     154,   # <-> gc=Cf   sc=Common     FUNCTION APPLICATION
    0x00202B =>       1,   # <-> gc=Cf   sc=Common     RIGHT-TO-LEFT EMBEDDING
    0x002062 =>     143,   # <-> gc=Cf   sc=Common     INVISIBLE TIMES
    0x000092 =>      16,   # <-> gc=Cc   sc=Common     <control>
    0x000341 =>       6,   # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE TONE MARK
    0x000301 =>   57910,   # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE ACCENT
    0x000340 =>       4,   # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE TONE MARK
    0x000300 =>    1230,   # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE ACCENT
    0x000306 =>    1526,   # ◌ ̆  gc=Mn   sc=Inherited  COMBINING BREVE
    0x000302 =>    7848,   # ◌ ̂  gc=Mn   sc=Inherited  COMBINING CIRCUMFLEX ACCENT
    0x00030C =>    1919,   # ◌ ̌  gc=Mn   sc=Inherited  COMBINING CARON
    0x00030A =>     724,   # ◌ ̊  gc=Mn   sc=Inherited  COMBINING RING ABOVE
    0x000342 =>       4,   # ◌ ͂  gc=Mn   sc=Inherited  COMBINING GREEK PERISPOMENI
    0x000308 =>   13366,   # ◌ ̈  gc=Mn   sc=Inherited  COMBINING DIAERESIS
    0x00030B =>     516,   # ◌ ̋  gc=Mn   sc=Inherited  COMBINING DOUBLE ACUTE ACCENT
    0x000303 =>    2475,   # ◌ ̃  gc=Mn   sc=Inherited  COMBINING TILDE
    0x000307 =>   16962,   # ◌ ̇  gc=Mn   sc=Inherited  COMBINING DOT ABOVE
    0x000338 =>      33,   # ◌ ̸  gc=Mn   sc=Inherited  COMBINING LONG SOLIDUS OVERLAY
    0x000327 =>    1947,   # ◌ ̧  gc=Mn   sc=Inherited  COMBINING CEDILLA
    0x000328 =>     498,   # ◌ ̨  gc=Mn   sc=Inherited  COMBINING OGONEK
    0x000304 =>   10335,   # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON
    0x00032C =>       9,   # ◌ ̬  gc=Mn   sc=Inherited  COMBINING CARON BELOW
    0x00033A =>       6,   # ◌ ̺  gc=Mn   sc=Inherited  COMBINING INVERTED BRIDGE BELOW
    0x00033B =>      18,   # ◌ ̻  gc=Mn   sc=Inherited  COMBINING SQUARE BELOW
    0x00033C =>       1,   # ◌ ̼  gc=Mn   sc=Inherited  COMBINING SEAGULL BELOW
    0x000336 =>     178,   # ◌ ̶  gc=Mn   sc=Inherited  COMBINING LONG STROKE OVERLAY
    0x000337 =>      39,   # ◌ ̷  gc=Mn   sc=Inherited  COMBINING SHORT SOLIDUS OVERLAY
    0x0020DD =>      13,   # ◌ ⃝  gc=Me   sc=Inherited  COMBINING ENCLOSING CIRCLE
    0x0020DF =>       6,   # ◌ ⃟  gc=Me   sc=Inherited  COMBINING ENCLOSING DIAMOND
    0x000321 =>       4,   # ◌ ̡  gc=Mn   sc=Inherited  COMBINING PALATALIZED HOOK BELOW
    0x000322 =>      27,   # ◌ ̢  gc=Mn   sc=Inherited  COMBINING RETROFLEX HOOK BELOW
    0x000323 =>      15,   # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW
    0x000326 =>     109,   # ◌ ̦  gc=Mn   sc=Inherited  COMBINING COMMA BELOW
    0x000331 =>    1593,   # ◌ ̱  gc=Mn   sc=Inherited  COMBINING MACRON BELOW
    0x000335 =>     139,   # ◌ ̵  gc=Mn   sc=Inherited  COMBINING SHORT STROKE OVERLAY
    0x0005B9 =>       1,   # ◌ ֹ  gc=Mn   sc=Hebrew     HEBREW POINT HOLAM
    0x0005BC =>       1,   # ◌ ּ  gc=Mn   sc=Hebrew     HEBREW POINT DAGESH OR MAPIQ
    0x000650 =>       1,   # ◌ ِ  gc=Mn   sc=Inherited  ARABIC KASRA
    0x0020D0 =>       3,   # ◌ ⃐  gc=Mn   sc=Inherited  COMBINING LEFT HARPOON ABOVE
    0x0020D1 =>       2,   # ◌ ⃑  gc=Mn   sc=Inherited  COMBINING RIGHT HARPOON ABOVE
    0x0020D7 =>     239,   # ◌ ⃗  gc=Mn   sc=Inherited  COMBINING RIGHT ARROW ABOVE
    0x0020DB =>       7,   # ◌ ⃛  gc=Mn   sc=Inherited  COMBINING THREE DOTS ABOVE
    0x003000 =>       1,   # <-> gc=Zs   sc=Common     IDEOGRAPHIC SPACE
    0x002002 =>       9,   # <-> gc=Zs   sc=Common     EN SPACE
    0x002003 =>      67,   # <-> gc=Zs   sc=Common     EM SPACE
    0x002005 =>      12,   # <-> gc=Zs   sc=Common     FOUR-PER-EM SPACE
    0x002008 =>  162990,   # <-> gc=Zs   sc=Common     PUNCTUATION SPACE
    0x002009 =>    7191,   # <-> gc=Zs   sc=Common     THIN SPACE
    0x00200A =>       2,   # <-> gc=Zs   sc=Common     HAIR SPACE
    0x0000A0 =>  249770,   # <-> gc=Zs   sc=Common     NO-BREAK SPACE
    0x0000B4 =>    1587,   #  ´  gc=Sk   sc=Common     ACUTE ACCENT
    0x000384 =>      82,   #  ΄  gc=Sk   sc=Greek      GREEK TONOS
    0x0002DC =>     316,   #  ˜  gc=Sk   sc=Common     SMALL TILDE
    0x0000AF =>     148,   #  ¯  gc=Sk   sc=Common     MACRON
    0x0002D8 =>       8,   #  ˘  gc=Sk   sc=Common     BREVE
    0x0002D9 =>      53,   #  Ë™  gc=Sk   sc=Common     DOT ABOVE
    0x0000A8 =>    1445,   #  ¨  gc=Sk   sc=Common     DIAERESIS
    0x000385 =>       4,   #  Î…  gc=Sk   sc=Common     GREEK DIALYTIKA TONOS
    0x0002DA =>      69,   #  Ëš  gc=Sk   sc=Common     RING ABOVE
    0x0002DD =>     239,   #  ˝  gc=Sk   sc=Common     DOUBLE ACUTE ACCENT
    0x0000B8 =>      42,   #  ¸  gc=Sk   sc=Common     CEDILLA
    0x0002DB =>       2,   #  Ë›  gc=Sk   sc=Common     OGONEK

lib/Encode/Guess/Educated.pm  view on Meta::CPAN

    0x00C758 =>       4,  #  의 gc=Lo   sc=Hangul     HANGUL SYLLABLE YI
    0x00C774 =>      11,  #  이 gc=Lo   sc=Hangul     HANGUL SYLLABLE I
    0x00C778 =>      14,  #  인 gc=Lo   sc=Hangul     HANGUL SYLLABLE IN
    0x00C77C =>       1,  #  일 gc=Lo   sc=Hangul     HANGUL SYLLABLE IL
    0x00C77D =>       6,  #  읽 gc=Lo   sc=Hangul     HANGUL SYLLABLE ILG
    0x00C785 =>       2,  #  ìž… gc=Lo   sc=Hangul     HANGUL SYLLABLE IB
    0x00C788 =>       3,  #  있 gc=Lo   sc=Hangul     HANGUL SYLLABLE ISS
    0x00C790 =>       3,  #  자 gc=Lo   sc=Hangul     HANGUL SYLLABLE JA
    0x00C798 =>       4,  #  잘 gc=Lo   sc=Hangul     HANGUL SYLLABLE JAL
    0x00C7A5 =>       1,  #  장 gc=Lo   sc=Hangul     HANGUL SYLLABLE JANG
    0x00C801 =>       2,  #  적 gc=Lo   sc=Hangul     HANGUL SYLLABLE JEOG
    0x00C804 =>       4,  #  ì „ gc=Lo   sc=Hangul     HANGUL SYLLABLE JEON
    0x00C815 =>       4,  #  ì • gc=Lo   sc=Hangul     HANGUL SYLLABLE JEONG
    0x00C885 =>       4,  #  종 gc=Lo   sc=Hangul     HANGUL SYLLABLE JONG
    0x00C8FC =>       1,  #  주 gc=Lo   sc=Hangul     HANGUL SYLLABLE JU
    0x00C911 =>       7,  #  중 gc=Lo   sc=Hangul     HANGUL SYLLABLE JUNG
    0x00C9C0 =>       6,  #  ì§€ gc=Lo   sc=Hangul     HANGUL SYLLABLE JI
    0x00CC45 =>       7,  #  ì±… gc=Lo   sc=Hangul     HANGUL SYLLABLE CAEG
    0x00CD5C =>       2,  #  최 gc=Lo   sc=Hangul     HANGUL SYLLABLE COE
    0x00CE58 =>       2,  #  치 gc=Lo   sc=Hangul     HANGUL SYLLABLE CI
    0x00CE5C =>       2,  #  친 gc=Lo   sc=Hangul     HANGUL SYLLABLE CIN
    0x00D0C0 =>       1,  #  타 gc=Lo   sc=Hangul     HANGUL SYLLABLE TA
    0x00D2B9 =>       1,  #  특 gc=Lo   sc=Hangul     HANGUL SYLLABLE TEUG
    0x00D3B8 =>       1,  #  편 gc=Lo   sc=Hangul     HANGUL SYLLABLE PYEON
    0x00D558 =>       7,  #  하 gc=Lo   sc=Hangul     HANGUL SYLLABLE HA
    0x00D559 =>       4,  #  í•™ gc=Lo   sc=Hangul     HANGUL SYLLABLE HAG
    0x00D55C =>      24,  #  한 gc=Lo   sc=Hangul     HANGUL SYLLABLE HAN
    0x00D560 =>       1,  #  í•  gc=Lo   sc=Hangul     HANGUL SYLLABLE HAL
    0x00D56D =>       2,  #  í•­ gc=Lo   sc=Hangul     HANGUL SYLLABLE HANG
    0x00D574 =>       4,  #  í•´ gc=Lo   sc=Hangul     HANGUL SYLLABLE HAE
    0x00D638 =>       2,  #  호 gc=Lo   sc=Hangul     HANGUL SYLLABLE HO
    0x00D6C4 =>       1,  #  후 gc=Lo   sc=Hangul     HANGUL SYLLABLE HU
    0x00FF95 =>       1,  #  ユ  gc=Lo   sc=Katakana   HALFWIDTH KATAKANA LETTER YU
    0x006240 =>       2,  #  所 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-6240
    0x006587 =>       2,  #  æ–‡ gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-6587
    0x006709 =>       2,  #  有 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-6709
    0x00689D =>       2,  #  條 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-689D
    0x007368 =>       1,  #  獨 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-7368
    0x007974 =>       1,  #  祴 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-7974
    0x008230 =>       1,  #  舰 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8230
    0x008713 =>       3,  #  蜓 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-8713
    0x009792 =>       1,  #  éž’ gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-9792
    0x009794 =>       1,  #  éž” gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-9794
    0x0036E7 =>       1,  #  ã›§ gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36E7
    0x0036E8 =>       1,  #  㛨 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36E8
    0x0036E9 =>       1,  #  㛩 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36E9
    0x0036EA =>       1,  #  㛪 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EA
    0x0036EB =>       1,  #  㛫 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EB
    0x0036EC =>       1,  #  㛬 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EC
    0x0036ED =>       1,  #  ã›­ gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36ED
    0x0036EE =>       1,  #  ã›® gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EE
    0x0036EF =>       1,  #  㛯 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-36EF
    0x003B12 =>       1,  #  㬒 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-3B12
    0x003B21 =>       1,  #  㬡 gc=Lo   sc=Han        CJK UNIFIED IDEOGRAPH-3B21
    0x000085 =>     264,  # <--->gc=Cc   sc=Common     <control>
    0x000086 =>      14,  # <--->gc=Cc   sc=Common     <control>
    0x002061 =>   10097,  # <--->gc=Cf   sc=Common     FUNCTION APPLICATION
    0x00206C =>      17,  # <--->gc=Cf   sc=Common     INHIBIT ARABIC FORM SHAPING
    0x00206A =>       1,  # <--->gc=Cf   sc=Common     INHIBIT SYMMETRIC SWAPPING
    0x002062 =>     204,  # <--->gc=Cf   sc=Common     INVISIBLE TIMES
    0x00202A =>       1,  # <--->gc=Cf   sc=Common     LEFT-TO-RIGHT EMBEDDING
    0x00200E =>       5,  # <--->gc=Cf   sc=Common     LEFT-TO-RIGHT MARK
    0x0000AD =>    3587,  # <--->gc=Cf   sc=Common     SOFT HYPHEN
    0x00FEFF =>       9,  # <--->gc=Cf   sc=Common     ZERO WIDTH NO-BREAK SPACE
    0x00200B =>     128,  # <--->gc=Cf   sc=Common     ZERO WIDTH SPACE
    0x00200D =>      59,  # <--->gc=Cf   sc=Inherited  ZERO WIDTH JOINER
    0x100002 =>       2,  # <--->gc=Co   sc=Unknown    <unnamed code point in block=Supplementary Private Use Area-B>
    0x002028 =>   10940,  # <--->gc=Zl   sc=Common     LINE SEPARATOR
    0x002003 =>  602377,  # <--->gc=Zs   sc=Common     EM SPACE
    0x002000 =>       1,  # <--->gc=Zs   sc=Common     EN QUAD
    0x002002 =>    8517,  # <--->gc=Zs   sc=Common     EN SPACE
    0x002007 =>     422,  # <--->gc=Zs   sc=Common     FIGURE SPACE
    0x002005 =>   21027,  # <--->gc=Zs   sc=Common     FOUR-PER-EM SPACE
    0x00200A =>  491842,  # <--->gc=Zs   sc=Common     HAIR SPACE
    0x003000 =>      17,  # <--->gc=Zs   sc=Common     IDEOGRAPHIC SPACE
    0x00205F =>      28,  # <--->gc=Zs   sc=Common     MEDIUM MATHEMATICAL SPACE
    0x00202F =>    1682,  # <--->gc=Zs   sc=Common     NARROW NO-BREAK SPACE
    0x0000A0 => 1065594,  # <--->gc=Zs   sc=Common     NO-BREAK SPACE
    0x002008 =>     702,  # <--->gc=Zs   sc=Common     PUNCTUATION SPACE
    0x002006 =>      90,  # <--->gc=Zs   sc=Common     SIX-PER-EM SPACE
    0x002009 =>  420888,  # <--->gc=Zs   sc=Common     THIN SPACE
    0x002004 =>      26,  # <--->gc=Zs   sc=Common     THREE-PER-EM SPACE
    0x0020DE =>     217,  # ◌ ⃞  gc=Me   sc=Inherited  COMBINING ENCLOSING SQUARE
    0x000597 =>       2,  # ◌ ֗  gc=Mn   sc=Hebrew     HEBREW ACCENT REVIA
    0x0005BF =>       2,  # ◌ ֿ  gc=Mn   sc=Hebrew     HEBREW POINT RAFE
    0x000652 =>       1,  # ◌ ْ  gc=Mn   sc=Inherited  ARABIC SUKUN
    0x000301 =>      40,  # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE ACCENT
    0x000341 =>       5,  # ◌ ́  gc=Mn   sc=Inherited  COMBINING ACUTE TONE MARK
    0x000306 =>      19,  # ◌ ̆  gc=Mn   sc=Inherited  COMBINING BREVE
    0x00030C =>       3,  # ◌ ̌  gc=Mn   sc=Inherited  COMBINING CARON
    0x000327 =>       8,  # ◌ ̧  gc=Mn   sc=Inherited  COMBINING CEDILLA
    0x000302 =>    1249,  # ◌ ̂  gc=Mn   sc=Inherited  COMBINING CIRCUMFLEX ACCENT
    0x000308 =>       6,  # ◌ ̈  gc=Mn   sc=Inherited  COMBINING DIAERESIS
    0x000307 =>     458,  # ◌ ̇  gc=Mn   sc=Inherited  COMBINING DOT ABOVE
    0x000358 =>       3,  # ◌ ͘  gc=Mn   sc=Inherited  COMBINING DOT ABOVE RIGHT
    0x000323 =>       7,  # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW
    0x000323 =>       6,  # ◌ ̣  gc=Mn   sc=Inherited  COMBINING DOT BELOW
    0x00030B =>       3,  # ◌ ̋  gc=Mn   sc=Inherited  COMBINING DOUBLE ACUTE ACCENT
    0x000300 =>      85,  # ◌ ̀  gc=Mn   sc=Inherited  COMBINING GRAVE ACCENT
    0x000344 =>       1,  # ◌ ̈́  gc=Mn   sc=Inherited  COMBINING GREEK DIALYTIKA TONOS
    0x000343 =>       1,  # ◌ ̓  gc=Mn   sc=Inherited  COMBINING GREEK KORONIS
    0x000342 =>      55,  # ◌ ͂  gc=Mn   sc=Inherited  COMBINING GREEK PERISPOMENI
    0x000311 =>       9,  # ◌ ̑  gc=Mn   sc=Inherited  COMBINING INVERTED BREVE
    0x000332 =>     150,  # ◌ ̲  gc=Mn   sc=Inherited  COMBINING LOW LINE
    0x000304 =>     624,  # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON
    0x000304 =>       1,  # ◌ ̄  gc=Mn   sc=Inherited  COMBINING MACRON
    0x000328 =>       1,  # ◌ ̨  gc=Mn   sc=Inherited  COMBINING OGONEK
    0x000305 =>    1093,  # ◌ ̅  gc=Mn   sc=Inherited  COMBINING OVERLINE
    0x0020D7 =>     335,  # ◌ ⃗  gc=Mn   sc=Inherited  COMBINING RIGHT ARROW ABOVE
    0x0020D1 =>       8,  # ◌ ⃑  gc=Mn   sc=Inherited  COMBINING RIGHT HARPOON ABOVE
    0x00030A =>      35,  # ◌ ̊  gc=Mn   sc=Inherited  COMBINING RING ABOVE
    0x000337 =>       6,  # ◌ ̷  gc=Mn   sc=Inherited  COMBINING SHORT SOLIDUS OVERLAY
    0x000335 =>      11,  # ◌ ̵  gc=Mn   sc=Inherited  COMBINING SHORT STROKE OVERLAY
    0x0020DB =>       3,  # ◌ ⃛  gc=Mn   sc=Inherited  COMBINING THREE DOTS ABOVE
    0x000303 =>     440,  # ◌ ̃  gc=Mn   sc=Inherited  COMBINING TILDE
    0x00FE00 =>      12,  # ◌ ︀  gc=Mn   sc=Inherited  VARIATION SELECTOR-1
    0x001036 =>       1,  # ◌ ံ  gc=Mn   sc=Myanmar    MYANMAR SIGN ANUSVARA
    0x000EBC =>       1,  # ◌ ຼ  gc=Mn   sc=Lao        LAO SEMIVOWEL SIGN LO
    0x000F9E =>       1,  # ◌ ྞ  gc=Mn   sc=Tibetan    TIBETAN SUBJOINED LETTER NNA

);



( run in 0.387 second using v1.01-cache-2.11-cpan-71847e10f99 )