You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
392 lines
7.7 KiB
392 lines
7.7 KiB
<?xml version="1.0" encoding="UTF-8" ?>
|
|
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
|
|
<!--
|
|
Copyright © 1991-2013 Unicode, Inc.
|
|
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
|
|
For terms of use, see http://www.unicode.org/copyright.html
|
|
-->
|
|
<supplementalData>
|
|
<version number="$Revision$"/>
|
|
<transforms>
|
|
<transform source="Latin" target="InterIndic" direction="forward" visibility="internal">
|
|
<tRule>
|
|
# Latin-InterIndic
|
|
#:: NFD;
|
|
#\u0E00 reserved
|
|
#consonants
|
|
$chandrabindu=\uE001;
|
|
$anusvara=\uE002;
|
|
$visarga=\uE003;
|
|
#\u0E004 reserved
|
|
# w←vowel→ represents the stand-alone form
|
|
$wa=\uE005;
|
|
$waa=\uE006;
|
|
$wi=\uE007;
|
|
$wii=\uE008;
|
|
$wu=\uE009;
|
|
$wuu=\uE00A;
|
|
$wr=\uE00B;
|
|
$wl=\uE00C;
|
|
$wce=\uE00D; # LETTER CANDRA E
|
|
$wse=\uE00E; # LETTER SHORT E
|
|
$we=\uE00F; # ए LETTER E
|
|
$wai=\uE010;
|
|
$wco=\uE011; # LETTER CANDRA O
|
|
$wso=\uE012; # LETTER SHORT O
|
|
$wo=\uE013; # ओ LETTER O
|
|
$wau=\uE014;
|
|
$ka=\uE015;
|
|
$kha=\uE016;
|
|
$ga=\uE017;
|
|
$gha=\uE018;
|
|
$nga=\uE019;
|
|
$ca=\uE01A;
|
|
$cha=\uE01B;
|
|
$ja=\uE01C;
|
|
$jha=\uE01D;
|
|
$nya=\uE01E;
|
|
$tta=\uE01F;
|
|
$ttha=\uE020;
|
|
$dda=\uE021;
|
|
$ddha=\uE022;
|
|
$nna=\uE023;
|
|
$ta=\uE024;
|
|
$tha=\uE025;
|
|
$da=\uE026;
|
|
$dha=\uE027;
|
|
$na=\uE028;
|
|
$ena=\uE029; #compatibility
|
|
$pa=\uE02A;
|
|
$pha=\uE02B;
|
|
$ba=\uE02C;
|
|
$bha=\uE02D;
|
|
$ma=\uE02E;
|
|
$ya=\uE02F;
|
|
$ra=\uE030;
|
|
$rra=\uE031;
|
|
$la=\uE032;
|
|
$lla=\uE033;
|
|
$ela=\uE034; #compatibility
|
|
$va=\uE035;
|
|
$vva=\uE081;
|
|
$sha=\uE036;
|
|
$ssa=\uE037;
|
|
$sa=\uE038;
|
|
$ha=\uE039;
|
|
#\u093A Reserved
|
|
#\u093B Reserved
|
|
$nukta=\uE03C;
|
|
$avagraha=\uE03D; # SIGN AVAGRAHA
|
|
# ←vowel→ represents the dependent form
|
|
$aa=\uE03E;
|
|
$i=\uE03F;
|
|
$ii=\uE040;
|
|
$u=\uE041;
|
|
$uu=\uE042;
|
|
$rh=\uE043;
|
|
$rrh=\uE044;
|
|
$ce=\uE045; #VOWEL SIGN CANDRA E
|
|
$se=\uE046; #VOWEL SIGN SHORT E
|
|
$e=\uE047;
|
|
$ai=\uE048;
|
|
$co=\uE049; # VOWEL SIGN CANDRA O
|
|
$so=\uE04A; # VOWEL SIGN SHORT O
|
|
$o=\uE04B; # ो
|
|
$au=\uE04C;
|
|
$virama=\uE04D;
|
|
# \u094E Reserved
|
|
# \u094F Reserved
|
|
$om = \uE050; # OM
|
|
# ॑→; # UNMAPPED STRESS SIGN UDATTA
|
|
# ॒→; # UNMAPPED STRESS SIGN ANUDATTA
|
|
# ॓→; # UNMAPPED GRAVE ACCENT
|
|
# ॔→; # UNMAPPED ACUTE ACCENT
|
|
$lm = \uE055;# Telugu Length Mark
|
|
$ailm=\uE056;# AI Length Mark
|
|
$aulm=\uE057;# AU Length Mark
|
|
#urdu compatibity forms
|
|
$uka=\uE058;
|
|
$ukha=\uE059;
|
|
$ugha=\uE05A;
|
|
$ujha=\uE05B;
|
|
$uddha=\uE05C;
|
|
$udha=\uE05D;
|
|
$ufa=\uE05E;
|
|
$uya=\uE05F;
|
|
$wrr=\uE060;
|
|
$wll=\uE061;
|
|
$lh=\uE062;
|
|
$llh=\uE063;
|
|
$danda=\uE064;
|
|
$doubleDanda=\uE065;
|
|
$zero=\uE066; # DIGIT ZERO
|
|
$one=\uE067; # DIGIT ONE
|
|
$two=\uE068; # DIGIT TWO
|
|
$three=\uE069; # DIGIT THREE
|
|
$four=\uE06A; # DIGIT FOUR
|
|
$five=\uE06B; # DIGIT FIVE
|
|
$six=\uE06C; # DIGIT SIX
|
|
$seven=\uE06D; # DIGIT SEVEN
|
|
$eight=\uE06E; # DIGIT EIGHT
|
|
$nine=\uE06F; # DIGIT NINE
|
|
$dgs=\uE082;
|
|
# For all other scripts
|
|
$ecp0=\uE070;
|
|
$ecp1=\uE071;
|
|
$ecp2=\uE072;
|
|
$ecp3=\uE073;
|
|
$ecp4=\uE074;
|
|
$ecp5=\uE075;
|
|
$ecp6=\uE076;
|
|
$ecp7=\uE077;
|
|
$ecp8=\uE078;
|
|
$ecp9=\uE079;
|
|
$ecpA=\uE07A;
|
|
$ecpB=\uE07B;
|
|
$ecpC=\uE07C;
|
|
$ecpD=\uE07D;
|
|
$ecpE=\uE07E;
|
|
$ecpF=\uE07F;
|
|
# Khanda-ta
|
|
$kta=\uE083;
|
|
# ॰→; # nothing in Latin maps to InterIndic ABBREVIATION SIGN
|
|
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
|
|
$depVowelBelow=[\uE041-\uE044];
|
|
$endThing=[$danda$doubleDanda];
|
|
# $x was originally called '§'; $z was '%'
|
|
$x=[$virama$aa$ai$au$ii$i$uu$u$rrh$rh$lh$e$o$se$ce$so$co];
|
|
$z=[bcdfghjklmnpqrstvwxyz];
|
|
$consonants=[[$ka-$ha]$z[क-ह][ক-হ][ਕ-ਹ][ક-હ][କ-ହ][க-ஹ][క-హ][ಕ-ಹ][ക-ഹ]];
|
|
̕ → $avagraha;
|
|
̃→$chandrabindu$anusvara;
|
|
m̐→$chandrabindu;
|
|
ḥ→$visarga;
|
|
x→$ka$virama$sa;
|
|
# convert to independent forms at start of word or syllable:
|
|
# dependent forms for roundtrip
|
|
̔ā→$aa;
|
|
̔ai→$ai;
|
|
̔au→$au;
|
|
̔ii→$ii;
|
|
̔ī→$ii;
|
|
̔i→$i;
|
|
̔ū→$uu;
|
|
̔u→$u;
|
|
̔r̥̄→$rrh;
|
|
̔r̥→$rh;
|
|
̔l̥̄→$llh;
|
|
̔lh→$lh;
|
|
̔l̥→$lh;
|
|
̔ē→$e;
|
|
̔ō→$o;
|
|
̔a→;
|
|
̔ĕ→$ce;
|
|
̔ŏ→$co;
|
|
̔e→$se;
|
|
̔o→$so;
|
|
# preceeded by consonants
|
|
$consonants{ ā→$aa;
|
|
$consonants{ ai→$ai;
|
|
$consonants{ au→$au;
|
|
$consonants{ ii→$ii;
|
|
$consonants{ ī→$ii;
|
|
$consonants{ i→$i;
|
|
$consonants{ ū→$uu;
|
|
$consonants{ u→$u;
|
|
$consonants{ r̥̄→$rrh;
|
|
$consonants{ r̥a→$rh;
|
|
$consonants{ r̥→$rh;
|
|
$consonants{ l̥̄→$llh;
|
|
$consonants{ lh→$lh;
|
|
$consonants{ l̥→$lh;
|
|
$consonants{ ē→$e;
|
|
$consonants{ ō→$o;
|
|
$consonants{ ĕ→$ce;
|
|
$consonants{ ŏ→$co;
|
|
$consonants{ e→$se;
|
|
$consonants{ o→$so;
|
|
# e.g. keai -→ {ka}{e}{wai}; k'ai -→ {ka}{wai}; (ai) -→ ({wai})
|
|
ā→$waa;
|
|
ai→$wai;
|
|
au→$wau;
|
|
ī→$wii;
|
|
i→$wi;
|
|
ū→$wuu;
|
|
u→$wu;
|
|
r̥̄→$wrr;
|
|
r̥→$wr;
|
|
l̥̄→$wll;
|
|
lh→$wl;
|
|
l̥→$wl;
|
|
ē→$we;
|
|
ō→$wo;
|
|
a→$wa;
|
|
ĕ→$wce;
|
|
ŏ→$wco;
|
|
e→$wse;
|
|
''om→$om;
|
|
o→$wso;
|
|
# rules for anusvara
|
|
n}r̥ → $na|$virama;
|
|
n}l̥ → $na|$virama;
|
|
n}na → $na|$virama;
|
|
ṅ}[kg] → $anusvara;
|
|
ṅ}ṅ → $anusvara;
|
|
n̄}[cj] → $anusvara;
|
|
n̄}ñ → $anusvara;
|
|
ṇ}[tdn]̣ → $anusvara;
|
|
n}[tdn] → $anusvara;
|
|
m}[pbm] → $anusvara;
|
|
n}[ylvshr] → $anusvara;
|
|
ṁ → $anusvara;
|
|
#urdu compatibility
|
|
q→$uka|$virama;
|
|
ḵẖ→$ukha |$virama;
|
|
ġ→ $ugha | $virama;
|
|
z → $ujha |$virama;
|
|
f → $ufa|$virama;
|
|
ṯ→$kta;
|
|
# dev
|
|
ẏ→$uya|$virama;
|
|
ḻ→$ela|$virama;
|
|
ṉ→$ena|$virama;
|
|
ṅ→$nga|$virama;
|
|
ñ→$nya|$virama;
|
|
ṇ→$nna|$virama;
|
|
ṭh→$ttha|$virama;
|
|
ṭ→$tta|$virama;
|
|
ṛh→$udha|$virama;
|
|
ṛ→$uddha|$virama;
|
|
ḍh→$ddha|$virama;
|
|
ḍ→$dda|$virama;
|
|
kh→$kha|$virama;
|
|
k→$ka|$virama;
|
|
gh→$gha|$virama;
|
|
g→$ga|$virama;
|
|
ch→$cha|$virama;
|
|
c→$ca|$virama;
|
|
jh→$jha|$virama;
|
|
j→$ja|$virama;
|
|
ny→$nya|$virama;
|
|
tth→$ttha|$virama;
|
|
ddh→$ddha|$virama;
|
|
th→$tha|$virama;
|
|
t→$ta|$virama;
|
|
dh→$dha|$virama;
|
|
d→$da|$virama;
|
|
n→$na|$virama;
|
|
ph→$pha|$virama;
|
|
p→$pa|$virama;
|
|
bh→$bha|$virama;
|
|
b→$ba|$virama;
|
|
m→$ma|$virama;
|
|
y→$ya|$virama;
|
|
ṟ→$rra|$virama;
|
|
r→$ra|$virama;
|
|
ḷ→$lla|$virama;
|
|
l→$la|$virama;
|
|
v→$va|$virama;
|
|
ẇ→$vva|$virama;
|
|
w→$va|$virama;
|
|
sh→$sha|$virama;
|
|
ss→$ssa|$virama;
|
|
ṣ→$ssa|$virama;
|
|
ś→$sha|$virama;
|
|
s→$sa|$virama;
|
|
h→$ha|$virama;
|
|
'.'→$danda;
|
|
$danda'.'→$doubleDanda;
|
|
$depVowelAbove{'~'→$anusvara;
|
|
$depVowelBelow{'~'→$chandrabindu;
|
|
# convert to dependent forms after consonant with no vowel:
|
|
# e.g. kai -→ {ka}{virama}ai -→ {ka}{ai}
|
|
#$virama aa→$aa;
|
|
$virama ā→$aa;
|
|
$virama ai→$ai;
|
|
$virama au→$au;
|
|
$virama ii→$ii;
|
|
$virama ī→$ii;
|
|
$virama i→$i;
|
|
#$virama uu→$uu;
|
|
$virama ū→$uu;
|
|
$virama u→$u;
|
|
#$virama rrh→$rrh;
|
|
$virama r̥̄→$rrh;
|
|
#$virama rh→$rh;
|
|
$virama r̥a→$rh;
|
|
$virama r̥→$rh;
|
|
$virama l̥̄→$llh;
|
|
$virama lh→$lh;
|
|
$virama l̥→$lh;
|
|
$virama ē→$e;
|
|
$virama ō→$o;
|
|
$virama a→;
|
|
$virama ĕ→$ce;
|
|
$virama ŏ→$co;
|
|
$virama e→$se;
|
|
$virama o→$so;
|
|
# otherwise convert independent forms when separated by ': k'ai -→ {ka}{virama}{wai}
|
|
#$virama''aa→$waa;
|
|
$virama''ā→$waa;
|
|
$virama''ai→$wai;
|
|
$virama''au→$wau;
|
|
#$virama''ii→$wii;
|
|
$virama''ī→$wii;
|
|
$virama''i→$wi;
|
|
#$virama''uu→$wuu;
|
|
$virama''ū→$wuu;
|
|
$virama''u→$wu;
|
|
#$virama''rrh→$wrr;
|
|
$virama''r̥̄→$wrr;
|
|
#$virama''rh→$wr;
|
|
$virama''r̥→$wr;
|
|
$virama''l̥̄→$wll;
|
|
#$virama''lh→$wl;
|
|
$virama''l̥→$wl;
|
|
$virama''ē→$we;
|
|
$virama''ō→$wo;
|
|
$virama''a→$wa;
|
|
$virama''ĕ→$wce;
|
|
$virama''ŏ→$wco;
|
|
$virama''e→$wse;
|
|
$virama''o→$wso;
|
|
# no virama
|
|
''ā→$waa;
|
|
''ai→$wai;
|
|
''au→$wau;
|
|
''ī→$wii;
|
|
''i→$wi;
|
|
''ū→$wuu;
|
|
''u→$wu;
|
|
''r̥̄→$wrr;
|
|
''r̥→$wr;
|
|
''l̥̄→$wll;
|
|
''l̥→$wl;
|
|
''ē→$we;
|
|
''ō→$wo;
|
|
''a→$wa;
|
|
''ĕ→$wce;
|
|
''ŏ→$wco;
|
|
''e→$wse;
|
|
''o→$wso;
|
|
$virama } [$z] → $virama;
|
|
$virama } ' ' → $virama ;
|
|
$virama}$endThing→;
|
|
ʔ→$dgs; # Glottal Stop
|
|
0→$zero;
|
|
1→$one;
|
|
2→$two;
|
|
3→$three;
|
|
4→$four;
|
|
5→$five;
|
|
6→$six;
|
|
7→$seven;
|
|
8→$eight;
|
|
9→$nine;
|
|
''→;
|
|
#:: NFC (NFD) ;
|
|
</tRule>
|
|
</transform>
|
|
</transforms>
|
|
</supplementalData>
|