|
|
<?xml version="1.0" encoding="UTF-8" ?>
|
|
|
<!DOCTYPE supplementalData SYSTEM "../../common/dtd/ldmlSupplemental.dtd">
|
|
|
<!--
|
|
|
Copyright © 1991-2015 Unicode, Inc.
|
|
|
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
|
|
|
For terms of use, see http://www.unicode.org/copyright.html
|
|
|
-->
|
|
|
<supplementalData>
|
|
|
<version number="$Revision$"/>
|
|
|
<transforms>
|
|
|
<transform source="sat_Olck" target="sat_FONIPA" direction="forward" alias="sat-fonipa-t-sat-olck">
|
|
|
<tRule><![CDATA[
|
|
|
# Santali (Ol Chiki) → Santali (International Phonetic Alphabet)
|
|
|
|
|
|
|
|
|
# Output
|
|
|
# ------
|
|
|
# m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː
|
|
|
# p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ
|
|
|
# s sː h
|
|
|
# d͡ʒ
|
|
|
# ɽ r
|
|
|
# l lː
|
|
|
# w wː w̃ w̃ː
|
|
|
#
|
|
|
# i iː ĩ ĩː u uː ũ ũː
|
|
|
# e eː ẽ ẽː ə əː ə̃ ə̃ː o oː õ õː
|
|
|
# ɛ ɛː ɛ̃ ɛ̃ː ɔ ɔː ɔ̃ ɔ̃ː
|
|
|
# a aː ã ãː
|
|
|
|
|
|
|
|
|
# References
|
|
|
# ----------
|
|
|
# [1] Michael Everson: Final proposal to encode the Ol Chiki script
|
|
|
# in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R,
|
|
|
# September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf
|
|
|
#
|
|
|
# [2] George L. Campbell: Compendium of the World's Languages.
|
|
|
# Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000.
|
|
|
# Pages 1454 to 1458.
|
|
|
|
|
|
|
|
|
# Notes
|
|
|
# -----
|
|
|
# According to [1] (page 3), ᱽ can only follow the four ejective
|
|
|
# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become
|
|
|
# ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d͡ʒ/, and ᱜᱽ /ɡ/. In online texts, however,
|
|
|
# we have occasionally encountered ᱽ following non-ejective plosives,
|
|
|
# for example after ᱯ /p/. These might possibly be typos. Our rules
|
|
|
# try to be resilient and handle ᱯᱽ as /b/.
|
|
|
#
|
|
|
# According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal”
|
|
|
# consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually
|
|
|
# ejective, not glottal). In online texts, however, we have frequently
|
|
|
# encountered ᱼ following non-ejective consonants.
|
|
|
|
|
|
$inword = [[:L:][:M:]];
|
|
|
|
|
|
# Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG.
|
|
|
ᱹᱸ → ᱺ ;
|
|
|
ᱸᱹ → ᱺ ;
|
|
|
::null();
|
|
|
|
|
|
# To simplify the rules below, enforce a uniform ordering of marks.
|
|
|
ᱻᱹ → ᱹᱻ ;
|
|
|
ᱻᱸ → ᱸᱻ ;
|
|
|
ᱻᱺ → ᱺᱻ ;
|
|
|
ᱼᱹ → ᱹᱼ ;
|
|
|
ᱼᱸ → ᱸᱼ ;
|
|
|
ᱼᱺ → ᱺᱼ ;
|
|
|
::null();
|
|
|
|
|
|
# Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating
|
|
|
# long phonemes, presumably because the graphemes look similar in some fonts.
|
|
|
# Since phaarkaa is used for voicing ejectives and plosives (which cannot
|
|
|
# be lenghtened), we rewrite phaarkaa to relaa.
|
|
|
[ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ;
|
|
|
::null();
|
|
|
|
|
|
ᱚᱹᱻ → ɔː ;
|
|
|
ᱚᱹ → ɔ ;
|
|
|
ᱚᱸᱻ → ɔ̃ː ;
|
|
|
ᱚᱸ → ɔ̃ ;
|
|
|
ᱚᱺᱻ → ɔ̃ː ;
|
|
|
ᱚᱺ → ɔ̃ ;
|
|
|
ᱚᱻ → ɔː ;
|
|
|
ᱚ → ɔ ;
|
|
|
|
|
|
ᱛᱼ → t ;
|
|
|
ᱛᱷ → tʰ ;
|
|
|
ᱛᱽ → d ;
|
|
|
$inword {ᱛ} → d ;
|
|
|
ᱛ → t ;
|
|
|
|
|
|
ᱜᱼ → kʼ ;
|
|
|
ᱜᱷ → kʰ ;
|
|
|
ᱜᱽ → ɡ ;
|
|
|
$inword {ᱜ} → ɡ ;
|
|
|
ᱜ → kʼ ;
|
|
|
|
|
|
ᱝᱻ → ŋː ;
|
|
|
ᱝ → ŋ ;
|
|
|
|
|
|
ᱞᱻ → lː ;
|
|
|
ᱞ → l ;
|
|
|
|
|
|
ᱟᱹᱻ → əː ;
|
|
|
ᱟᱹ → ə ;
|
|
|
ᱟᱸᱻ → ãː ;
|
|
|
ᱟᱸ → ã ;
|
|
|
ᱟᱺᱻ → ə̃ː ;
|
|
|
ᱟᱺ → ə̃ ;
|
|
|
ᱟᱻ → aː ;
|
|
|
ᱟ → a ;
|
|
|
|
|
|
ᱠᱼ → k ;
|
|
|
ᱠᱷ → kʰ ;
|
|
|
ᱠᱽ → ɡ ;
|
|
|
ᱠ → k ;
|
|
|
|
|
|
ᱡᱼ → cʼ ;
|
|
|
ᱡᱷ → cʰ ;
|
|
|
ᱡᱽ → d͡ʒ ;
|
|
|
$inword {ᱡ} → d͡ʒ ;
|
|
|
ᱡ → cʼ ;
|
|
|
|
|
|
ᱢᱻ → mː ;
|
|
|
ᱢ → m ;
|
|
|
|
|
|
# According to [1], ᱣ is sometimes /v/ and sometimes /w/.
|
|
|
# TODO: Find out if there is a rule for this.
|
|
|
ᱣᱸ → w̃ ;
|
|
|
ᱣ → w ;
|
|
|
|
|
|
ᱤᱹᱻ → iː ;
|
|
|
ᱤᱹ → i ;
|
|
|
ᱤᱸᱻ → ĩː ;
|
|
|
ᱤᱸ → ĩ ;
|
|
|
ᱤᱺᱻ → ĩː ;
|
|
|
ᱤᱺ → ĩ ;
|
|
|
ᱤᱻ → iː ;
|
|
|
ᱤ → i ;
|
|
|
|
|
|
ᱥᱻ → sː ;
|
|
|
ᱥ → s ;
|
|
|
|
|
|
# According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/.
|
|
|
# TODO: Find out if there is a rule for this.
|
|
|
ᱦ → h ;
|
|
|
|
|
|
ᱧᱻ → ɲː ;
|
|
|
ᱧ → ɲ ;
|
|
|
|
|
|
ᱨᱻ → r ;
|
|
|
ᱨ → r ;
|
|
|
|
|
|
ᱩᱹᱻ → uː ;
|
|
|
ᱩᱹ → u ;
|
|
|
ᱩᱸᱻ → ũː ;
|
|
|
ᱩᱸ → ũ ;
|
|
|
ᱩᱺᱻ → ũː ;
|
|
|
ᱩᱺ → ũ ;
|
|
|
ᱩᱻ → uː ;
|
|
|
ᱩ → u ;
|
|
|
|
|
|
ᱪᱼ → c ;
|
|
|
ᱪᱷ → cʰ ;
|
|
|
ᱪᱽ → d͡ʒ ;
|
|
|
ᱪ → c ;
|
|
|
|
|
|
ᱫᱼ → tʼ ;
|
|
|
ᱫᱷ → tʰ ;
|
|
|
ᱫᱽ → d ;
|
|
|
$inword {ᱫ} → d ;
|
|
|
ᱫ → tʼ ;
|
|
|
|
|
|
ᱬᱻ → ɳː ;
|
|
|
ᱬ → ɳ ;
|
|
|
|
|
|
# TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify.
|
|
|
ᱭ → h ;
|
|
|
|
|
|
ᱮᱹᱻ → ɛː ;
|
|
|
ᱮᱹ → ɛ ;
|
|
|
ᱮᱺᱻ → ɛ̃ː ;
|
|
|
ᱮᱺ → ɛ̃ ;
|
|
|
ᱮᱸᱻ → ẽː ;
|
|
|
ᱮᱸ → ẽ ;
|
|
|
ᱮᱻ → eː ;
|
|
|
ᱮ → e ;
|
|
|
|
|
|
ᱯᱼ → p ;
|
|
|
ᱯᱷ → pʰ ;
|
|
|
ᱯᱽ → b ;
|
|
|
ᱯ → p ;
|
|
|
|
|
|
ᱰᱷ → ɖʰ ;
|
|
|
ᱰ → ɖ ;
|
|
|
|
|
|
ᱱᱻ → nː ;
|
|
|
ᱱ → n ;
|
|
|
|
|
|
ᱲᱻ → ɽ ;
|
|
|
ᱲ → ɽ ;
|
|
|
|
|
|
ᱳᱸᱻ → õː ;
|
|
|
ᱳᱸ → õ ;
|
|
|
ᱳᱻ → oː ;
|
|
|
ᱳ → o ;
|
|
|
|
|
|
ᱴᱼ → ʈ ;
|
|
|
ᱴᱷ → ʈʰ ;
|
|
|
ᱴᱽ → ɖ ;
|
|
|
ᱴ → ʈ ;
|
|
|
|
|
|
ᱵᱼ → pʼ ;
|
|
|
ᱵᱷ → bʰ ;
|
|
|
ᱵᱽ → b ;
|
|
|
$inword {ᱵ} → b ;
|
|
|
ᱵ → pʼ ;
|
|
|
|
|
|
ᱶᱻ → w̃ː ;
|
|
|
ᱶ → w̃ ;
|
|
|
|
|
|
]]></tRule>
|
|
|
</transform>
|
|
|
</transforms>
|
|
|
</supplementalData>
|