|
|
# © 2016 and later: Unicode, Inc. and others.
|
|
|
# License & terms of use: http://www.unicode.org/copyright.html
|
|
|
# Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
|
|
|
#
|
|
|
# File: Grek_Latn.txt
|
|
|
# Generated from CLDR
|
|
|
#
|
|
|
|
|
|
# Rules are predicated on running NFD first, and NFC afterwards
|
|
|
# :: [\u0000-\u007F \u0370-Ͽ [:Greek:] [:nonspacing mark:]] ;
|
|
|
# MINIMAL FILTER GENERATED FOR: Greek-Latin
|
|
|
:: [;µ·ÄËÏÖÜäëïöüÿ-āĒ-ēĪ-īŌ-ōŪ-ūŸǕ-ǜǞ-ǣǬ-ǭȪ-ȭȰ-ȳ\u0304\u0308\u0313-\u0314\u0342-\u0345ͺ;Ά-ΊΌΎ-ΡΣ-ώϐ-ϗϛϝϟϡϣϥϧϩϫϭϯ-ϵϷ-\u07FBЁЇёїӒ-ӓӚ-ӟӢ-ӧӪ-ӱӴ-ӵӸ-ӹḔ-ḗḠ-ḡḦ-ḧḮ-ḯḸ-ḹṎ-ṓṜ-ṝṺ-ṻẄ-ẅẌ-ẍẗἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼι῁-ῄῆ-ῌ῏-ΐῖ-Ί῟-Ῥῲ-ῴῶ-ῼΩϹ] ;
|
|
|
:: NFD (NFC) ;
|
|
|
# TEST CASES
|
|
|
# Ὀλίγοι ἔμφονες πολλῶν ἀφρόνων φοβερώτεροι — Πλάτωνος
|
|
|
# ᾂ ᾒ ᾢ ᾃ ᾓ ᾣ
|
|
|
# ᾳ ῃ ῳ ὃ ὄ
|
|
|
# ὠς ὡς ὢς ὣς
|
|
|
# Ὠς Ὡς Ὢς Ὣς
|
|
|
# ὨΣ ὩΣ ὪΣ ὫΣ
|
|
|
# Ạ, ạ, Ẹ, ẹ, Ọ, ọ
|
|
|
# Useful variables
|
|
|
$lower = [[:latin:][:greek:] & [:Ll:]];
|
|
|
$glower = [[:greek:] & [:Ll:]];
|
|
|
$upper = [[:latin:][:greek:] & [:Lu:]] ;
|
|
|
$accent = [:M:] ;
|
|
|
# NOTE: restrict to just the Greek & Latin accents that we care about
|
|
|
# TODO: broaden out once interation is fixed
|
|
|
$accentMinus = [ [\u0300-\u0345] & [:M:] - [\u0338]] ;
|
|
|
$macron = \u0304 ;
|
|
|
$ddot = \u0308 ;
|
|
|
$ddotmac = [$ddot$macron];
|
|
|
$lcgvowel = [αεηιουω] ;
|
|
|
$ucgvowel = [ΑΕΗΙΟΥΩ] ;
|
|
|
$gvowel = [$lcgvowel $ucgvowel] ;
|
|
|
$lcgvowelC = [$lcgvowel $accent] ;
|
|
|
$evowel = [aeiouyAEIOUY];
|
|
|
$evowel2 = [iuyIUY];
|
|
|
$vowel = [ $evowel $gvowel] ;
|
|
|
$gammaLike = [ΓΚΞΧγκξχϰ] ;
|
|
|
$egammaLike = [GKXCgkxc] ;
|
|
|
$smooth = \u0313 ;
|
|
|
$rough = \u0314 ;
|
|
|
$iotasub = \u0345 ;
|
|
|
$evowel_i = [$evowel-[iI]] ;
|
|
|
$evowel2_i = [uyUY];
|
|
|
$underbar = \u0331;
|
|
|
$afterLetter = [:L:] [[:M:]\']* ;
|
|
|
$beforeLetter = [[:M:]\']* [:L:] ;
|
|
|
$beforeLower = $accent * $lower ;
|
|
|
$notLetter = [^[:L:][:M:]] ;
|
|
|
$under = \u0331;
|
|
|
# Fix punctuation
|
|
|
# preserve original
|
|
|
\: ↔ \: $under ;
|
|
|
\? ↔ \? $under ;
|
|
|
\; ↔ \? ;
|
|
|
· ↔ \: ;
|
|
|
# CIRCUMFLEX: convert greek circumflex to normal one. Could use tilde or inverted breve
|
|
|
\u0342 ↔ \u0302 ;
|
|
|
# IOTA: convert iota subscript to iota
|
|
|
# first make previous alpha long!
|
|
|
$accent_minus = [[$accent]-[$iotasub$macron]];
|
|
|
Α } $accent_minus * $iotasub → | Α $macron ;
|
|
|
α } $accent_minus * $iotasub → | α $macron ;
|
|
|
# now convert to uppercase if after uppercase, ow to lowercase
|
|
|
$upper $accent * { $iotasub → I ;
|
|
|
$iotasub → i ;
|
|
|
| $1 $iotasub ← ($evowel $macron $accentMinus *) i ;
|
|
|
| $1 $iotasub ← ($evowel $macron $accentMinus *) I ;
|
|
|
# BREATHING
|
|
|
# Convert rough breathing to h, and move before letters.
|
|
|
# Make A ` x = → H a x
|
|
|
Α ($macron?) $rough } $beforeLower → H | α $1;
|
|
|
Ε $rough } $beforeLower → H | ε;
|
|
|
Η $rough } $beforeLower → H | η ;
|
|
|
Ι ($ddot?) $rough } $beforeLower → H | ι $1;
|
|
|
Ο $rough } $beforeLower → H | ο ;
|
|
|
Υ $rough } $beforeLower → H | υ ;
|
|
|
Ω ($ddot?) $rough } $beforeLower → H | ω $1;
|
|
|
# Make A x ` = → H a x
|
|
|
Α ($glower $macron?) $rough → H | α $1 ;
|
|
|
Ε ($glower) $rough → H | ε $1 ;
|
|
|
Η ($glower) $rough → H | η $1 ;
|
|
|
Ι ($glower $ddot?) $rough → H | ι $1 ;
|
|
|
Ο ($glower) $rough → H | ο $1 ;
|
|
|
Υ ($glower) $rough → H | υ $1 ;
|
|
|
Ω ($glower $ddot?) $rough → H | ω $1 ;
|
|
|
#Otherwise, make x ` into h x and X ` into H X
|
|
|
($lcgvowel + $ddotmac? ) $rough → h | $1 ;
|
|
|
($gvowel + $ddotmac? ) $rough → H | $1 ;
|
|
|
# Go backwards with H
|
|
|
| $1 $rough ← h ($evowel $macron $ddot? $evowel2_i $macron?) ;
|
|
|
| $1 $rough ← h ($evowel $ddot? $evowel2 $macron?) ;
|
|
|
| $1 $rough ← h ($evowel $macron? $ddot?) ;
|
|
|
| $1 $rough ← H ([AEIOUY] $macron $ddot? $evowel2_i $macron?) ;
|
|
|
| $1 $rough ← H ([AEIOUY] $ddot? $evowel2 $macron?) ;
|
|
|
| $1 $rough ← H ([AEIOUY] $macron? $ddot?) ;
|
|
|
# titlecase, have to fix individually
|
|
|
# in the future, we should add &uppercase() to make this easier
|
|
|
| A $1 $rough ← H a ($macron $ddot? $evowel2_i $macron?) ;
|
|
|
| E $1 $rough ← H e ($macron $ddot? $evowel2_i $macron?) ;
|
|
|
| I $1 $rough ← H i ($macron $ddot? $evowel2_i $macron?) ;
|
|
|
| O $1 $rough ← H o ($macron $ddot? $evowel2_i $macron?) ;
|
|
|
| U $1 $rough ← H u ($macron $ddot? $evowel2_i $macron?) ;
|
|
|
| Y $1 $rough ← H y ($macron $ddot? $evowel2_i $macron?) ;
|
|
|
| A $1 $rough ← H a ($ddot? $evowel2 $macron?) ;
|
|
|
| E $1 $rough ← H e ($ddot? $evowel2 $macron?) ;
|
|
|
| I $1 $rough ← H i ($ddot? $evowel2 $macron?) ;
|
|
|
| O $1 $rough ← H o ($ddot? $evowel2 $macron?) ;
|
|
|
| U $1 $rough ← H u ($ddot? $evowel2 $macron?) ;
|
|
|
| Y $1 $rough ← H y ($ddot? $evowel2 $macron?) ;
|
|
|
| A $1 $rough ← H a ($macron? $ddot? ) ;
|
|
|
| E $1 $rough ← H e ($macron? $ddot? ) ;
|
|
|
| I $1 $rough ← H i ($macron? $ddot? ) ;
|
|
|
| O $1 $rough ← H o ($macron? $ddot? ) ;
|
|
|
| U $1 $rough ← H u ($macron? $ddot? ) ;
|
|
|
| Y $1 $rough ← H y ($macron? $ddot? ) ;
|
|
|
# Now do smooth
|
|
|
#delete smooth breathing for Latin
|
|
|
$smooth → ;
|
|
|
# insert in Greek
|
|
|
# the assumption is that all Marks are on letters.
|
|
|
| $1 $smooth ← $notLetter { ([rR]) } [^hH$smooth$rough] ;
|
|
|
| $1 $smooth ← $notLetter { ($evowel $macron? $evowel2 $macron?) } [^$smooth$rough] ;
|
|
|
| $1 $smooth ← $notLetter { ($evowel $macron?) } [^$evowel2$smooth$rough] ;
|
|
|
# TODO: preserve smooth/rough breathing if not
|
|
|
# on initial vowel sequence
|
|
|
# need to have these up here so the rules don't mask
|
|
|
# remove now superfluous macron when returning
|
|
|
Α ← A $macron ;
|
|
|
α ← a $macron ;
|
|
|
η ↔ e $macron ;
|
|
|
Η ↔ E $macron ;
|
|
|
φ ↔ ph ;
|
|
|
Ψ } $beforeLower ↔ Ps ;
|
|
|
Ψ ↔ PS ;
|
|
|
Φ } $beforeLower ↔ Ph ;
|
|
|
Φ ↔ PH ;
|
|
|
ψ ↔ ps ;
|
|
|
ω ↔ o $macron ;
|
|
|
Ω ↔ O $macron;
|
|
|
# NORMAL
|
|
|
α ↔ a ;
|
|
|
Α ↔ A ;
|
|
|
β ↔ b ;
|
|
|
Β ↔ B ;
|
|
|
γ } $gammaLike ↔ n } $egammaLike ;
|
|
|
γ ↔ g ;
|
|
|
Γ } $gammaLike ↔ N } $egammaLike ;
|
|
|
Γ ↔ G ;
|
|
|
δ ↔ d ;
|
|
|
Δ ↔ D ;
|
|
|
ε ↔ e ;
|
|
|
Ε ↔ E ;
|
|
|
ζ ↔ z ;
|
|
|
Ζ ↔ Z ;
|
|
|
θ ↔ th ;
|
|
|
Θ } $beforeLower ↔ Th ;
|
|
|
Θ ↔ TH ;
|
|
|
ι ↔ i ;
|
|
|
Ι ↔ I ;
|
|
|
κ ↔ k ;
|
|
|
Κ ↔ K ;
|
|
|
λ ↔ l ;
|
|
|
Λ ↔ L ;
|
|
|
μ ↔ m ;
|
|
|
Μ ↔ M ;
|
|
|
ν } $gammaLike → n\' ;
|
|
|
ν ↔ n ;
|
|
|
Ν } $gammaLike ↔ N\' ;
|
|
|
Ν ↔ N ;
|
|
|
ξ ↔ x ;
|
|
|
Ξ ↔ X ;
|
|
|
ο ↔ o ;
|
|
|
Ο ↔ O ;
|
|
|
π ↔ p ;
|
|
|
Π ↔ P ;
|
|
|
ρ $rough ↔ rh;
|
|
|
Ρ $rough } $beforeLower ↔ Rh ;
|
|
|
Ρ $rough ↔ RH ;
|
|
|
ρ ↔ r ;
|
|
|
Ρ ↔ R ;
|
|
|
# insert separator before things that turn into s
|
|
|
[Pp] { } [ςσΣϷϸϺϻ] → \' ;
|
|
|
# special S variants
|
|
|
Ϸ ↔ S\u030C ; # Ϸ GREEK CAPITAL LETTER SHO Uppercase_Letter Grek - L
|
|
|
ϸ ↔ s\u030C ; #ϸ GREEK SMALL LETTER SHO Lowercase_Letter Grek - L
|
|
|
Ϻ ↔ S\u0302 ; # Ϻ GREEK CAPITAL LETTER SAN Uppercase_Letter Grek - L
|
|
|
ϻ ↔ s\u0302 ; # ϻ GREEK SMALL LETTER SAN Lowercase_Letter Grek - L
|
|
|
# underbar means exception
|
|
|
# before a letter, initial
|
|
|
ς } $beforeLetter ↔ s $underbar } $beforeLetter;
|
|
|
σ } $beforeLetter ↔ s } $beforeLetter;
|
|
|
# otherwise, after a letter = final
|
|
|
$afterLetter { σ ↔ $afterLetter { s $underbar;
|
|
|
$afterLetter { ς ↔ $afterLetter { s ;
|
|
|
# otherwise (isolated) = initial
|
|
|
ς ↔ s $underbar;
|
|
|
σ ↔ s ;
|
|
|
# [Pp] { Σ ↔ \'S ;
|
|
|
Σ ↔ S ;
|
|
|
τ ↔ t ;
|
|
|
Τ ↔ T ;
|
|
|
$vowel {υ } ↔ u ;
|
|
|
υ ↔ y ;
|
|
|
$vowel { Υ ↔ U ;
|
|
|
Υ ↔ Y ;
|
|
|
χ ↔ ch ;
|
|
|
Χ } $beforeLower ↔ Ch ;
|
|
|
Χ ↔ CH ;
|
|
|
# Completeness for ASCII
|
|
|
$ignore = [[:Mark:]''] * ;
|
|
|
| k ← c ;
|
|
|
| ph ← f ;
|
|
|
| i ← j ;
|
|
|
| k ← q ;
|
|
|
| b ← v } $vowel ;
|
|
|
| b ← w } $vowel;
|
|
|
| u ← v ;
|
|
|
| u ← w;
|
|
|
| K ← C ;
|
|
|
| Ph ← F ;
|
|
|
| I ← J ;
|
|
|
| K ← Q ;
|
|
|
| B ← V } $vowel ;
|
|
|
| B ← W } $vowel ;
|
|
|
| U ← V ;
|
|
|
| U ← W ;
|
|
|
$rough } $ignore [:UppercaseLetter:] → H ;
|
|
|
$ignore [:UppercaseLetter:] { $rough → H ;
|
|
|
$rough ← H ;
|
|
|
$rough ↔ h ;
|
|
|
# Completeness for Greek
|
|
|
ϐ → | β ;
|
|
|
ϑ → | θ ;
|
|
|
ϒ → | Υ ;
|
|
|
ϕ → | φ ;
|
|
|
ϖ → | π ;
|
|
|
ϰ → | κ ;
|
|
|
ϱ → | ρ ;
|
|
|
ϲ → | σ ;
|
|
|
Ϲ → | Σ; #U+03F9 GREEK CAPITAL LUNATE SIGMA SYMBOL
|
|
|
ϳ → j ;
|
|
|
ϴ → | Θ ;
|
|
|
ϵ → | ε ;
|
|
|
µ → | μ ;
|
|
|
ͺ → i;
|
|
|
# delete any trailing ' marks used for roundtripping
|
|
|
← [Ππ] { \' } [Ss] ;
|
|
|
← [Νν] { \' } $egammaLike ;
|
|
|
::NFC (NFD) ;
|
|
|
# ([\u0000-\u007F [:Latin:] [:Greek:] [:nonspacing mark:]]) ;
|
|
|
# ([\u0000-\u007F · [:Latin:] [:nonspacing mark:]]) ;
|
|
|
# MINIMAL FILTER GENERATED FOR: Latin-Greek BACKWARD
|
|
|
:: ( [':?A-Za-zÀ-ÅÇ-ÏÑ-ÖÙ-Ýà-åç-ïñ-öù-ýÿ-ďĒ-ĥĨ-İĴ-ķĹ-ľŃ-ňŌ-őŔ-ťŨ-žƠ-ơƯ-ưǍ-ǜǞ-ǣǦ-ǰǴ-ǵǸ-țȞ-ȟȦ-ȳ\u0300-\u0337\u0339-\u0345΅-ΆΈ-ΊΌΎ-ΐΪ-ΰϊ-ώϓ-ϔЀ-ЁЃЇЌ-ЎЙйѐ-ёѓїќ-ўѶ-ѷӁ-ӂӐ-ӓӖ-ӗӚ-ӟӢ-ӧӪ-ӵӸ-ӹḀ-ẙẛẠ-ỹἀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼ῁-ῄῆ-ΐῖ-Ί῝-΅ῲ-ῴῶ-ῼK-Å] ) ;
|
|
|
|