|
|
# © 2016 and later: Unicode, Inc. and others.
|
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
|
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
|
|
#
|
|
|
# File: Hira_Kana.txt
|
|
|
# Generated from CLDR
|
|
|
#
|
|
|
|
|
|
# note: a global filter is more efficient, but MUST include all source chars
|
|
|
:: [[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]];
|
|
|
:: NFKC (NFC);
|
|
|
# Hiragana-Katakana
|
|
|
# This is largely a one-to-one mapping, but it has a
|
|
|
# few kinks:
|
|
|
# 1. The Katakana va/vi/ve/vo (30F7-30FA) have no
|
|
|
# Hiragana equivalents. We use Hiragana wa/wi/we/wo
|
|
|
# (308F-3092) with a voicing mark (3099), which is
|
|
|
# semantically equivalent. However, this is a non-
|
|
|
# roundtripping transformation.
|
|
|
# 2. The Katakana small ka/ke (30F5,30F6) have no
|
|
|
# Hiragana equiavlents. We convert them to normal
|
|
|
# Hiragana ka/ke (304B,3051). This is a one-way
|
|
|
# information-losing transformation and precludes
|
|
|
# round-tripping of 30F5 and 30F6.
|
|
|
# 3. The combining marks 3099-309C are in the Hiragana
|
|
|
# block, but they apply to Katakana as well, so we
|
|
|
# leave them untouched.
|
|
|
# 4. The Katakana prolonged sound mark 30FC doubles the
|
|
|
# preceding vowel. This is a one-way information-
|
|
|
# losing transformation from Katakana to Hiragana.
|
|
|
# 5. The Katakana middle dot separates words in foreign
|
|
|
# expressions; we leave this unmodified.
|
|
|
# The above points preclude successful round-trip
|
|
|
# transformations of arbitrary input text. However,
|
|
|
# they provide naturalistic results that should conform
|
|
|
# to user expectations.
|
|
|
# Combining equivalents va/vi/ve/vo
|
|
|
わ\u3099 ↔ ヷ;
|
|
|
ゐ\u3099 ↔ ヸ;
|
|
|
ゑ\u3099 ↔ ヹ;
|
|
|
を\u3099 ↔ ヺ;
|
|
|
# One-to-one mappings, main block
|
|
|
# 3041:3094 ↔ 30A1:30F4
|
|
|
# 309D,E ↔ 30FD,E
|
|
|
ぁ ↔ ァ;
|
|
|
あ ↔ ア;
|
|
|
ぃ ↔ ィ;
|
|
|
い ↔ イ;
|
|
|
ぅ ↔ ゥ;
|
|
|
う ↔ ウ;
|
|
|
ぇ ↔ ェ;
|
|
|
え ↔ エ;
|
|
|
ぉ ↔ ォ;
|
|
|
お ↔ オ;
|
|
|
か ↔ カ;
|
|
|
が ↔ ガ;
|
|
|
き ↔ キ;
|
|
|
ぎ ↔ ギ;
|
|
|
く ↔ ク;
|
|
|
ぐ ↔ グ;
|
|
|
け ↔ ケ;
|
|
|
げ ↔ ゲ;
|
|
|
こ ↔ コ;
|
|
|
ご ↔ ゴ;
|
|
|
さ ↔ サ;
|
|
|
ざ ↔ ザ;
|
|
|
し ↔ シ;
|
|
|
じ ↔ ジ;
|
|
|
す ↔ ス;
|
|
|
ず ↔ ズ;
|
|
|
せ ↔ セ;
|
|
|
ぜ ↔ ゼ;
|
|
|
そ ↔ ソ;
|
|
|
ぞ ↔ ゾ;
|
|
|
た ↔ タ;
|
|
|
だ ↔ ダ;
|
|
|
ち ↔ チ;
|
|
|
ぢ ↔ ヂ;
|
|
|
っ ↔ ッ;
|
|
|
つ ↔ ツ;
|
|
|
づ ↔ ヅ;
|
|
|
て ↔ テ;
|
|
|
で ↔ デ;
|
|
|
と ↔ ト;
|
|
|
ど ↔ ド;
|
|
|
な ↔ ナ;
|
|
|
に ↔ ニ;
|
|
|
ぬ ↔ ヌ;
|
|
|
ね ↔ ネ;
|
|
|
の ↔ ノ;
|
|
|
は ↔ ハ;
|
|
|
ば ↔ バ;
|
|
|
ぱ ↔ パ;
|
|
|
ひ ↔ ヒ;
|
|
|
び ↔ ビ;
|
|
|
ぴ ↔ ピ;
|
|
|
ふ ↔ フ;
|
|
|
ぶ ↔ ブ;
|
|
|
ぷ ↔ プ;
|
|
|
へ ↔ ヘ;
|
|
|
べ ↔ ベ;
|
|
|
ぺ ↔ ペ;
|
|
|
ほ ↔ ホ;
|
|
|
ぼ ↔ ボ;
|
|
|
ぽ ↔ ポ;
|
|
|
ま ↔ マ;
|
|
|
み ↔ ミ;
|
|
|
む ↔ ム;
|
|
|
め ↔ メ;
|
|
|
も ↔ モ;
|
|
|
ゃ ↔ ャ;
|
|
|
や ↔ ヤ;
|
|
|
ゅ ↔ ュ;
|
|
|
ゆ ↔ ユ;
|
|
|
ょ ↔ ョ;
|
|
|
よ ↔ ヨ;
|
|
|
ら ↔ ラ;
|
|
|
り ↔ リ;
|
|
|
る ↔ ル;
|
|
|
れ ↔ レ;
|
|
|
ろ ↔ ロ;
|
|
|
ゎ ↔ ヮ;
|
|
|
わ ↔ ワ;
|
|
|
ゐ ↔ ヰ;
|
|
|
ゑ ↔ ヱ;
|
|
|
を ↔ ヲ;
|
|
|
ん ↔ ン;
|
|
|
ゔ ↔ ヴ;
|
|
|
ゝ ↔ ヽ;
|
|
|
ゞ ↔ ヾ;
|
|
|
# One-way Katakana-Hiragana xform of small K ka/ke to
|
|
|
# normal H ka/ke.
|
|
|
か ← ヵ;
|
|
|
け ← ヶ;
|
|
|
# Katakana followed by a prolonged sound mark 30FC has
|
|
|
# its final vowel doubled. This is a Katakana-Hiragana
|
|
|
# one-way information-losing transformation. We
|
|
|
# include the small Katakana (e.g., small A 3041) and
|
|
|
# do not distinguish them from their large
|
|
|
# counterparts. It doesn't make sense to double a
|
|
|
# small counterpart vowel as a small Hiragana vowel, so
|
|
|
# we don't do so. In natural text this should never
|
|
|
# occur anyway. If a 30FC is seen without a preceding
|
|
|
# vowel sound (e.g., after n 30F3) we do not change it.
|
|
|
### $long = ー;
|
|
|
# The following categories are Hiragana, not Katakana
|
|
|
# as might be expected, since by the time we get to the
|
|
|
# 30FC, the preceding character will have already been
|
|
|
# transformed to Hiragana.
|
|
|
# {The following mechanically generated from the
|
|
|
# Unicode 3.0 data:}
|
|
|
$xa = [ \
|
|
|
ぁ あ か が さ ざ \
|
|
|
た だ な は ば ぱ \
|
|
|
ま ゃ や ら ゎ わ \
|
|
|
];
|
|
|
$xi = [ \
|
|
|
ぃ い き ぎ し じ \
|
|
|
ち ぢ に ひ び ぴ \
|
|
|
み り ゐ \
|
|
|
];
|
|
|
$xu = [ \
|
|
|
ぅ う く ぐ す ず \
|
|
|
っ つ づ ぬ ふ ぶ \
|
|
|
ぷ む ゅ ゆ る ゔ \
|
|
|
];
|
|
|
$xe = [ \
|
|
|
ぇ え け げ せ ぜ \
|
|
|
て で ね へ べ ぺ \
|
|
|
め れ ゑ \
|
|
|
];
|
|
|
$xo = [ \
|
|
|
ぉ お こ ご そ ぞ \
|
|
|
と ど の ほ ぼ ぽ \
|
|
|
も ょ よ ろ を \
|
|
|
];
|
|
|
あ ← $xa {ー};
|
|
|
い ← $xi {ー};
|
|
|
う ← $xu {ー};
|
|
|
え ← $xe {ー};
|
|
|
お ← $xo {ー};
|
|
|
:: NFC (NFKC) ;
|
|
|
# note: a global filter is more efficient, but MUST include all source chars!!
|
|
|
:: ([[\u0000-\u007E 、。 \u3099-゜ ァ-ー 。-゚ー[:Hiragana:] [:Katakana:] [:nonspacing mark:]]-[\u309B \u309C]]);
|
|
|
# eof
|
|
|
|