You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
385 lines
8.0 KiB
385 lines
8.0 KiB
# © 2016 and later: Unicode, Inc. and others.
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
|
#
|
|
# File: Latin_InterIndic.txt
|
|
# Generated from CLDR
|
|
#
|
|
|
|
# Latin-InterIndic
|
|
#:: NFD;
|
|
#\u0E00 reserved
|
|
#consonants
|
|
$chandrabindu=\uE001;
|
|
$anusvara=\uE002;
|
|
$visarga=\uE003;
|
|
#\u0E004 reserved
|
|
# w←vowel→ represents the stand-alone form
|
|
$wa=\uE005;
|
|
$waa=\uE006;
|
|
$wi=\uE007;
|
|
$wii=\uE008;
|
|
$wu=\uE009;
|
|
$wuu=\uE00A;
|
|
$wr=\uE00B;
|
|
$wl=\uE00C;
|
|
$wce=\uE00D; # LETTER CANDRA E
|
|
$wse=\uE00E; # LETTER SHORT E
|
|
$we=\uE00F; # ए LETTER E
|
|
$wai=\uE010;
|
|
$wco=\uE011; # LETTER CANDRA O
|
|
$wso=\uE012; # LETTER SHORT O
|
|
$wo=\uE013; # ओ LETTER O
|
|
$wau=\uE014;
|
|
$ka=\uE015;
|
|
$kha=\uE016;
|
|
$ga=\uE017;
|
|
$gha=\uE018;
|
|
$nga=\uE019;
|
|
$ca=\uE01A;
|
|
$cha=\uE01B;
|
|
$ja=\uE01C;
|
|
$jha=\uE01D;
|
|
$nya=\uE01E;
|
|
$tta=\uE01F;
|
|
$ttha=\uE020;
|
|
$dda=\uE021;
|
|
$ddha=\uE022;
|
|
$nna=\uE023;
|
|
$ta=\uE024;
|
|
$tha=\uE025;
|
|
$da=\uE026;
|
|
$dha=\uE027;
|
|
$na=\uE028;
|
|
$ena=\uE029; #compatibility
|
|
$pa=\uE02A;
|
|
$pha=\uE02B;
|
|
$ba=\uE02C;
|
|
$bha=\uE02D;
|
|
$ma=\uE02E;
|
|
$ya=\uE02F;
|
|
$ra=\uE030;
|
|
$rra=\uE031;
|
|
$la=\uE032;
|
|
$lla=\uE033;
|
|
$ela=\uE034; #compatibility
|
|
$va=\uE035;
|
|
$vva=\uE081;
|
|
$sha=\uE036;
|
|
$ssa=\uE037;
|
|
$sa=\uE038;
|
|
$ha=\uE039;
|
|
#\u093A Reserved
|
|
#\u093B Reserved
|
|
$nukta=\uE03C;
|
|
$avagraha=\uE03D; # SIGN AVAGRAHA
|
|
# ←vowel→ represents the dependent form
|
|
$aa=\uE03E;
|
|
$i=\uE03F;
|
|
$ii=\uE040;
|
|
$u=\uE041;
|
|
$uu=\uE042;
|
|
$rh=\uE043;
|
|
$rrh=\uE044;
|
|
$ce=\uE045; #VOWEL SIGN CANDRA E
|
|
$se=\uE046; #VOWEL SIGN SHORT E
|
|
$e=\uE047;
|
|
$ai=\uE048;
|
|
$co=\uE049; # VOWEL SIGN CANDRA O
|
|
$so=\uE04A; # VOWEL SIGN SHORT O
|
|
$o=\uE04B; # ो
|
|
$au=\uE04C;
|
|
$virama=\uE04D;
|
|
# \u094E Reserved
|
|
# \u094F Reserved
|
|
$om = \uE050; # OM
|
|
# \u0951→; # UNMAPPED STRESS SIGN UDATTA
|
|
# \u0952→; # UNMAPPED STRESS SIGN ANUDATTA
|
|
# \u0953→; # UNMAPPED GRAVE ACCENT
|
|
# \u0954→; # UNMAPPED ACUTE ACCENT
|
|
$lm = \uE055;# Telugu Length Mark
|
|
$ailm=\uE056;# AI Length Mark
|
|
$aulm=\uE057;# AU Length Mark
|
|
#urdu compatibity forms
|
|
$uka=\uE058;
|
|
$ukha=\uE059;
|
|
$ugha=\uE05A;
|
|
$ujha=\uE05B;
|
|
$uddha=\uE05C;
|
|
$udha=\uE05D;
|
|
$ufa=\uE05E;
|
|
$uya=\uE05F;
|
|
$wrr=\uE060;
|
|
$wll=\uE061;
|
|
$lh=\uE062;
|
|
$llh=\uE063;
|
|
$danda=\uE064;
|
|
$doubleDanda=\uE065;
|
|
$zero=\uE066; # DIGIT ZERO
|
|
$one=\uE067; # DIGIT ONE
|
|
$two=\uE068; # DIGIT TWO
|
|
$three=\uE069; # DIGIT THREE
|
|
$four=\uE06A; # DIGIT FOUR
|
|
$five=\uE06B; # DIGIT FIVE
|
|
$six=\uE06C; # DIGIT SIX
|
|
$seven=\uE06D; # DIGIT SEVEN
|
|
$eight=\uE06E; # DIGIT EIGHT
|
|
$nine=\uE06F; # DIGIT NINE
|
|
$dgs=\uE082;
|
|
# For all other scripts
|
|
$ecp0=\uE070;
|
|
$ecp1=\uE071;
|
|
$ecp2=\uE072;
|
|
$ecp3=\uE073;
|
|
$ecp4=\uE074;
|
|
$ecp5=\uE075;
|
|
$ecp6=\uE076;
|
|
$ecp7=\uE077;
|
|
$ecp8=\uE078;
|
|
$ecp9=\uE079;
|
|
$ecpA=\uE07A;
|
|
$ecpB=\uE07B;
|
|
$ecpC=\uE07C;
|
|
$ecpD=\uE07D;
|
|
$ecpE=\uE07E;
|
|
$ecpF=\uE07F;
|
|
# Khanda-ta
|
|
$kta=\uE083;
|
|
# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
|
|
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
|
|
$depVowelBelow=[\uE041-\uE044];
|
|
$endThing=[$danda$doubleDanda];
|
|
# $x was originally called '§'; $z was '%'
|
|
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
|
|
$z=[bcdfghjklmnpqrstvwxyz];
|
|
$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
|
|
\u0315 → $avagraha;
|
|
\u0303→$chandrabindu$anusvara;
|
|
m\u0310→$chandrabindu;
|
|
h\u0323→$visarga;
|
|
x→$ka$virama$sa;
|
|
# convert to independent forms at start of word or syllable:
|
|
# dependent forms for roundtrip
|
|
\u0314a\u0304→$aa;
|
|
\u0314ai→$ai;
|
|
\u0314au→$au;
|
|
\u0314ii→$ii;
|
|
\u0314i\u0304→$ii;
|
|
\u0314i→$i;
|
|
\u0314u\u0304→$uu;
|
|
\u0314u→$u;
|
|
\u0314r\u0325\u0304→$rrh;
|
|
\u0314r\u0325→$rh;
|
|
\u0314l\u0325\u0304→$llh;
|
|
\u0314lh→$lh;
|
|
\u0314l\u0325→$lh;
|
|
\u0314e\u0304→$e;
|
|
\u0314o\u0304→$o;
|
|
\u0314a→;
|
|
\u0314e\u0306→$ce;
|
|
\u0314o\u0306→$co;
|
|
\u0314e→$se;
|
|
\u0314o→$so;
|
|
# preceeded by consonants
|
|
$consonants{ a\u0304→$aa;
|
|
$consonants{ ai→$ai;
|
|
$consonants{ au→$au;
|
|
$consonants{ ii→$ii;
|
|
$consonants{ i\u0304→$ii;
|
|
$consonants{ i→$i;
|
|
$consonants{ u\u0304→$uu;
|
|
$consonants{ u→$u;
|
|
$consonants{ r\u0325\u0304→$rrh;
|
|
$consonants{ r\u0325a→$rh;
|
|
$consonants{ r\u0325→$rh;
|
|
$consonants{ l\u0325\u0304→$llh;
|
|
$consonants{ lh→$lh;
|
|
$consonants{ l\u0325→$lh;
|
|
$consonants{ e\u0304→$e;
|
|
$consonants{ o\u0304→$o;
|
|
$consonants{ e\u0306→$ce;
|
|
$consonants{ o\u0306→$co;
|
|
$consonants{ e→$se;
|
|
$consonants{ o→$so;
|
|
# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
|
|
a\u0304→$waa;
|
|
ai→$wai;
|
|
au→$wau;
|
|
i\u0304→$wii;
|
|
i→$wi;
|
|
u\u0304→$wuu;
|
|
u→$wu;
|
|
r\u0325\u0304→$wrr;
|
|
r\u0325→$wr;
|
|
l\u0325\u0304→$wll;
|
|
lh→$wl;
|
|
l\u0325→$wl;
|
|
e\u0304→$we;
|
|
o\u0304→$wo;
|
|
a→$wa;
|
|
e\u0306→$wce;
|
|
o\u0306→$wco;
|
|
e→$wse;
|
|
''om→$om;
|
|
o→$wso;
|
|
# rules for anusvara
|
|
n}r\u0325 → $na|$virama;
|
|
n}l\u0325 → $na|$virama;
|
|
n}na → $na|$virama;
|
|
n\u0307}[kg] → $anusvara;
|
|
n\u0307}n\u0307 → $anusvara;
|
|
n\u0304}[cj] → $anusvara;
|
|
n\u0304}n\u0303 → $anusvara;
|
|
n\u0323}[tdn]\u0323 → $anusvara;
|
|
n}[tdn] → $anusvara;
|
|
m}[pbm] → $anusvara;
|
|
n}[ylvshr] → $anusvara;
|
|
m\u0307 → $anusvara;
|
|
#urdu compatibility
|
|
q→$uka|$virama;
|
|
k\u0331h\u0331→$ukha |$virama;
|
|
g\u0307→ $ugha | $virama;
|
|
z → $ujha |$virama;
|
|
f → $ufa|$virama;
|
|
t\u0331→$kta;
|
|
# dev
|
|
y\u0307→$uya|$virama;
|
|
l\u0331→$ela|$virama;
|
|
n\u0331→$ena|$virama;
|
|
n\u0307→$nga|$virama;
|
|
n\u0303→$nya|$virama;
|
|
n\u0323→$nna|$virama;
|
|
t\u0323h→$ttha|$virama;
|
|
t\u0323→$tta|$virama;
|
|
r\u0323h→$udha|$virama;
|
|
r\u0323→$uddha|$virama;
|
|
d\u0323h→$ddha|$virama;
|
|
d\u0323→$dda|$virama;
|
|
kh→$kha|$virama;
|
|
k→$ka|$virama;
|
|
gh→$gha|$virama;
|
|
g→$ga|$virama;
|
|
ch→$cha|$virama;
|
|
c→$ca|$virama;
|
|
jh→$jha|$virama;
|
|
j→$ja|$virama;
|
|
ny→$nya|$virama;
|
|
tth→$ttha|$virama;
|
|
ddh→$ddha|$virama;
|
|
th→$tha|$virama;
|
|
t→$ta|$virama;
|
|
dh→$dha|$virama;
|
|
d→$da|$virama;
|
|
n→$na|$virama;
|
|
ph→$pha|$virama;
|
|
p→$pa|$virama;
|
|
bh→$bha|$virama;
|
|
b→$ba|$virama;
|
|
m→$ma|$virama;
|
|
y→$ya|$virama;
|
|
r\u0331→$rra|$virama;
|
|
r→$ra|$virama;
|
|
l\u0323→$lla|$virama;
|
|
l→$la|$virama;
|
|
v→$va|$virama;
|
|
w\u0307→$vva|$virama;
|
|
w→$va|$virama;
|
|
sh→$sha|$virama;
|
|
ss→$ssa|$virama;
|
|
s\u0323→$ssa|$virama;
|
|
s\u0301→$sha|$virama;
|
|
s→$sa|$virama;
|
|
h→$ha|$virama;
|
|
'.'→$danda;
|
|
$danda'.'→$doubleDanda;
|
|
$depVowelAbove{'~'→$anusvara;
|
|
$depVowelBelow{'~'→$chandrabindu;
|
|
# convert to dependent forms after consonant with no vowel:
|
|
# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
|
|
#$virama aa→$aa;
|
|
$virama a\u0304→$aa;
|
|
$virama ai→$ai;
|
|
$virama au→$au;
|
|
$virama ii→$ii;
|
|
$virama i\u0304→$ii;
|
|
$virama i→$i;
|
|
#$virama uu→$uu;
|
|
$virama u\u0304→$uu;
|
|
$virama u→$u;
|
|
#$virama rrh→$rrh;
|
|
$virama r\u0325\u0304→$rrh;
|
|
#$virama rh→$rh;
|
|
$virama r\u0325a→$rh;
|
|
$virama r\u0325→$rh;
|
|
$virama l\u0325\u0304→$llh;
|
|
$virama lh→$lh;
|
|
$virama l\u0325→$lh;
|
|
$virama e\u0304→$e;
|
|
$virama o\u0304→$o;
|
|
$virama a→;
|
|
$virama e\u0306→$ce;
|
|
$virama o\u0306→$co;
|
|
$virama e→$se;
|
|
$virama o→$so;
|
|
# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
|
|
#$virama''aa→$waa;
|
|
$virama''a\u0304→$waa;
|
|
$virama''ai→$wai;
|
|
$virama''au→$wau;
|
|
#$virama''ii→$wii;
|
|
$virama''i\u0304→$wii;
|
|
$virama''i→$wi;
|
|
#$virama''uu→$wuu;
|
|
$virama''u\u0304→$wuu;
|
|
$virama''u→$wu;
|
|
#$virama''rrh→$wrr;
|
|
$virama''r\u0325\u0304→$wrr;
|
|
#$virama''rh→$wr;
|
|
$virama''r\u0325→$wr;
|
|
$virama''l\u0325\u0304→$wll;
|
|
#$virama''lh→$wl;
|
|
$virama''l\u0325→$wl;
|
|
$virama''e\u0304→$we;
|
|
$virama''o\u0304→$wo;
|
|
$virama''a→$wa;
|
|
$virama''e\u0306→$wce;
|
|
$virama''o\u0306→$wco;
|
|
$virama''e→$wse;
|
|
$virama''o→$wso;
|
|
# no virama
|
|
''a\u0304→$waa;
|
|
''ai→$wai;
|
|
''au→$wau;
|
|
''i\u0304→$wii;
|
|
''i→$wi;
|
|
''u\u0304→$wuu;
|
|
''u→$wu;
|
|
''r\u0325\u0304→$wrr;
|
|
''r\u0325→$wr;
|
|
''l\u0325\u0304→$wll;
|
|
''l\u0325→$wl;
|
|
''e\u0304→$we;
|
|
''o\u0304→$wo;
|
|
''a→$wa;
|
|
''e\u0306→$wce;
|
|
''o\u0306→$wco;
|
|
''e→$wse;
|
|
''o→$wso;
|
|
$virama } [$z] → $virama;
|
|
$virama } ' ' → $virama ;
|
|
$virama}$endThing→;
|
|
ʔ→$dgs; # Glottal Stop
|
|
0→$zero;
|
|
1→$one;
|
|
2→$two;
|
|
3→$three;
|
|
4→$four;
|
|
5→$five;
|
|
6→$six;
|
|
7→$seven;
|
|
8→$eight;
|
|
9→$nine;
|
|
''→;
|
|
#:: NFC (NFD) ;
|
|
|