|
|
|
|
<?xml version="1.0" encoding="UTF-8" ?>
|
|
|
|
|
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
|
|
|
|
|
<!--
|
|
|
|
|
Copyright © 1991-2016 Unicode, Inc.
|
|
|
|
|
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
|
|
|
|
|
For terms of use, see http://www.unicode.org/copyright.html
|
|
|
|
|
-->
|
|
|
|
|
<supplementalData>
|
|
|
|
|
<version number="$Revision$"/>
|
|
|
|
|
<transforms>
|
|
|
|
|
<transform source="und_FONIPA" target="ar" direction="forward" alias="ar-t-und-fonipa">
|
|
|
|
|
<tRule><![CDATA[
|
|
|
|
|
# Vowels
|
|
|
|
|
# ------
|
|
|
|
|
# In these rules, we produce ي و ا both for short and for long vowels.
|
|
|
|
|
# This would be wrong for writing Arabic, but when transliterating
|
|
|
|
|
# foreign words and names, it is strongly preferred to vowel marks.
|
|
|
|
|
# However, we emit short schwa [ə] and a few other, schwa-like vowels.
|
|
|
|
|
|
|
|
|
|
$IVowel = [i ɪ e {e̞}];
|
|
|
|
|
$UVowel = [y {ɨ} {ʉ} ɯ u ʏ {ɪ̈} {ʊ̈} {ɯ̽} {ʊ} ø ɤ o {ø̞} {ɤ̞} {o̞} ɞ ɔ w {w̥} ʍ ʷ];
|
|
|
|
|
$AVowel = [ɛ œ ɜ ʌ æ ɐ a ɶ {ä} {ɒ̈} ɑ ɒ];
|
|
|
|
|
$SchwaVowel = [ɘ ɵ ə {ɵ̞}];
|
|
|
|
|
$Vowel = [$IVowel $UVowel $AVowel $SchwaVowel];
|
|
|
|
|
$Click = [ʘ ɋ ǀ ʇ ǃ ʗ ǂ ʄ ǁ ʖ];
|
|
|
|
|
$Boundary = [^[:L:][:M:][:N:]];
|
|
|
|
|
|
|
|
|
|
::NFD;
|
|
|
|
|
[ʰ ʱ ʼ ̃ ̰ ̋ ́ ̄ ̀ ̏ ̌ ̂ ˥ ˦ ˧ ˨ ˩ ꜜ ꜛ ↗ ↘ ͡ ͜ ̯] → ;
|
|
|
|
|
ʲ → j;
|
|
|
|
|
ᵐ → m;
|
|
|
|
|
ⁿ → n;
|
|
|
|
|
ᵑ → ŋ;
|
|
|
|
|
::NFC;
|
|
|
|
|
|
|
|
|
|
# TODO: Diphthongs probably need more work.
|
|
|
|
|
|
|
|
|
|
# Romanian [sekujesk] → [sekujask], for emitting سيكوياسك not سيكويسك
|
|
|
|
|
$UVowel [j $IVowel] [e {e̞} $SchwaVowel] → uia;
|
|
|
|
|
|
|
|
|
|
# Kazakh Аягөз [ɑjɑɡy̯ʉz] → [ɑjɑɡiuz], to emit TODO
|
|
|
|
|
yʉ → iu;
|
|
|
|
|
|
|
|
|
|
::NULL;
|
|
|
|
|
|
|
|
|
|
# Vowels
|
|
|
|
|
$Boundary {ʔ? $IVowel ː} → إِي;
|
|
|
|
|
$Boundary {ʔ? $IVowel} → إِ;
|
|
|
|
|
{$IVowel ʔ} $Boundary → ئ;
|
|
|
|
|
{$IVowel ː ʔ} $Boundary → يء;
|
|
|
|
|
{$IVowel ː ʔ} [$Vowel] → ئ;
|
|
|
|
|
$IVowel ː? → ي;
|
|
|
|
|
|
|
|
|
|
$Boundary {ʔ? $UVowel ː} → أو;
|
|
|
|
|
$Boundary {ʔ? $UVowel} → أ;
|
|
|
|
|
{$UVowel ʔ} $Boundary → ؤ;
|
|
|
|
|
{$UVowel ː ʔ} $Boundary → وء;
|
|
|
|
|
$UVowel ː? → و;
|
|
|
|
|
|
|
|
|
|
$Boundary {ʔ? $AVowel ː} → آ;
|
|
|
|
|
$Boundary {ʔ? $AVowel} → أ;
|
|
|
|
|
{$AVowel ʔ} $Boundary → أ;
|
|
|
|
|
{$AVowel ː ʔ} $Boundary → اء;
|
|
|
|
|
$AVowel ː? ʔ $AVowel ː? → اءا;
|
|
|
|
|
$AVowel ː? → ا;
|
|
|
|
|
|
|
|
|
|
$Boundary {ʔ? $SchwaVowel ː} → إِي;
|
|
|
|
|
$Boundary {ʔ? $SchwaVowel} → أ;
|
|
|
|
|
$SchwaVowel ː → ي;
|
|
|
|
|
$SchwaVowel → ;
|
|
|
|
|
|
|
|
|
|
# TODO: Handle glottal stop.
|
|
|
|
|
ʔ → ;
|
|
|
|
|
|
|
|
|
|
# Shadda for long (geminated) consonants
|
|
|
|
|
ː → ّ;
|
|
|
|
|
|
|
|
|
|
# Affricates
|
|
|
|
|
[{t͡ʃ} ʧ] → تْش;
|
|
|
|
|
|
|
|
|
|
# Clicks
|
|
|
|
|
[ɡ g ɠ k] $Click → كْش;
|
|
|
|
|
$Click → تْش;
|
|
|
|
|
|
|
|
|
|
# Nasal stops
|
|
|
|
|
[{m̥} m ɱ] → م;
|
|
|
|
|
[{n̼̊} {n̼} {n̥} n {ɳ̊} ɳ {ɲ̊} {ɲ̥} ɲ] → ن;
|
|
|
|
|
[{ŋ̊} ŋ {ɴ̥} ɴ] k → نك;
|
|
|
|
|
[{ŋ̊} ŋ {ɴ̥} ɴ] [ɡ g ɠ]? → نْغ;
|
|
|
|
|
|
|
|
|
|
# Non-nasal stops
|
|
|
|
|
[p b {p̪} {b̪} ɓ] → ب;
|
|
|
|
|
[{d̼} d ɗ ᶑ] → د;
|
|
|
|
|
[{t̼} t] → ت;
|
|
|
|
|
[ʈ] → ط;
|
|
|
|
|
[ɖ] → ض;
|
|
|
|
|
c → تْش;
|
|
|
|
|
ɟ → دج;
|
|
|
|
|
k → ك;
|
|
|
|
|
[ɡ g ɠ] → غ;
|
|
|
|
|
[q ɢ ʡ ʛ] → ق;
|
|
|
|
|
|
|
|
|
|
# Sibilant fricatives
|
|
|
|
|
s → س;
|
|
|
|
|
z → ز;
|
|
|
|
|
[ʃ ʂ ɕ ʄ] → ش;
|
|
|
|
|
[ʒ ʐ ʑ] → ج;
|
|
|
|
|
|
|
|
|
|
# Non-sibilant fricatives
|
|
|
|
|
[ɸ f v] → ف;
|
|
|
|
|
β → ب;
|
|
|
|
|
[{θ̼} θ {θ̱}] → ث;
|
|
|
|
|
[{ð̼} ð {ð̠}] → ذ;
|
|
|
|
|
ç → ش;
|
|
|
|
|
ʝ $IVowel? ː? → ي;
|
|
|
|
|
[x χ] → خ;
|
|
|
|
|
[ɣ ʁ] → غ;
|
|
|
|
|
ħ → ح;
|
|
|
|
|
ʕ → ع;
|
|
|
|
|
[h ɦ {ʔ̞}] → ه;
|
|
|
|
|
|
|
|
|
|
# Approximants, trills, flaps
|
|
|
|
|
ʋ → و;
|
|
|
|
|
ʙ → بر;
|
|
|
|
|
{r̝} → رش;
|
|
|
|
|
[{ɹ̥} {ɹ} {ɻ̊} {ɻ} {ɾ̥} ɾ {ɽ̊} ɽ {r̼} {r̥} r] → ر;
|
|
|
|
|
[{ʀ̥} ʀ] → غ;
|
|
|
|
|
ʜ → ح;
|
|
|
|
|
ʢ → ع;
|
|
|
|
|
j $IVowel? ː? → ي;
|
|
|
|
|
|
|
|
|
|
# Laterals
|
|
|
|
|
ɬ → شْل;
|
|
|
|
|
ɮ → جْل;
|
|
|
|
|
{[{ʎ̥} ʎ]} [^ $IVowel j ʝ] → لي;
|
|
|
|
|
[{l̼} {l̥} l {ɭ̊} ɭ {ʎ̥} ʎ] → ل;
|
|
|
|
|
[ʟ {ʟ̠}] → غ;
|
|
|
|
|
|
|
|
|
|
# Independent pass for misc cleanup.
|
|
|
|
|
::NULL;
|
|
|
|
|
|
|
|
|
|
# Strip off syllable markers
|
|
|
|
|
\. → ;
|
|
|
|
|
|
|
|
|
|
# Sequences of three or more ووو look very confusing; we shorten them.
|
|
|
|
|
# Polish Darłowo [darwɔvɔ] → داروو → داروووو
|
|
|
|
|
ووو+ → وو;
|
|
|
|
|
]]></tRule>
|
|
|
|
|
</transform>
|
|
|
|
|
</transforms>
|
|
|
|
|
</supplementalData>
|