You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

427 lines
24 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<!--
Copyright © 1991-2015 Unicode, Inc.
CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
For terms of use, see http://www.unicode.org/copyright.html
-->
<ldml>
<identity>
<version number="$Revision$"/>
<language type="root"/>
</identity>
<segmentations>
<segmentation type="GraphemeClusterBreak">
<variables>
<!-- VARIABLES -->
<variable id="$CR">\p{Grapheme_Cluster_Break=CR}</variable>
<variable id="$LF">\p{Grapheme_Cluster_Break=LF}</variable>
<variable id="$Control">\p{Grapheme_Cluster_Break=Control}</variable>
<variable id="$Extend">\p{Grapheme_Cluster_Break=Extend}</variable>
<variable id="$ZWJ">\p{Grapheme_Cluster_Break=ZWJ}</variable>
<variable id="$RI">\p{Grapheme_Cluster_Break=Regional_Indicator}</variable>
<variable id="$Prepend">\p{Grapheme_Cluster_Break=Prepend}</variable>
<variable id="$SpacingMark">\p{Grapheme_Cluster_Break=SpacingMark}</variable>
<variable id="$L">\p{Grapheme_Cluster_Break=L}</variable>
<variable id="$V">\p{Grapheme_Cluster_Break=V}</variable>
<variable id="$T">\p{Grapheme_Cluster_Break=T}</variable>
<variable id="$LV">\p{Grapheme_Cluster_Break=LV}</variable>
<variable id="$LVT">\p{Grapheme_Cluster_Break=LVT}</variable>
<!-- Note: The following may overlap with the above -->
<variable id="$Virama">[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&amp;\p{Indic_Syllabic_Category=Virama}]</variable>
<variable id="$LinkingConsonant">[\p{Gujr}\p{sc=Telu}\p{sc=Mlym}\p{sc=Orya}\p{sc=Beng}\p{sc=Deva}&amp;\p{Indic_Syllabic_Category=Consonant}]</variable>
<variable id="$ExtPict">\p{Extended_Pictographic}</variable>
<variable id="$ExtCccZwj">[[$Extend-\p{ccc=0}] $ZWJ]</variable>
</variables>
<segmentRules>
<!-- RULES -->
<!-- Break at the start and end of text, unless the text is empty. -->
<!-- Do not break between a CR and LF. Otherwise, break before and after controls. -->
<rule id="3"> $CR × $LF </rule>
<rule id="4"> ( $Control | $CR | $LF ) ÷ </rule>
<rule id="5"> ÷ ( $Control | $CR | $LF ) </rule>
<!-- Do not break Hangul syllable sequences. -->
<rule id="6"> $L × ( $L | $V | $LV | $LVT ) </rule>
<rule id="7"> ( $LV | $V ) × ( $V | $T ) </rule>
<rule id="8"> ( $LVT | $T) × $T </rule>
<rule id="9"> × ($Extend | $ZWJ) </rule>
<!-- Only for extended grapheme clusters: Do not break before SpacingMarks, or after Prepend characters. -->
<rule id="9.1"> × $SpacingMark </rule>
<rule id="9.2"> $Prepend × </rule>
<rule id="9.3"> $LinkingConsonant $ExtCccZwj* $Virama $ExtCccZwj* × $LinkingConsonant </rule>
<!-- Do not break within emoji zwj sequences. -->
<rule id="11"> $ExtPict $Extend* $ZWJ × $ExtPict </rule>
<!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. -->
<rule id="12"> ^ ($RI $RI)* $RI × $RI </rule>
<rule id="13"> [^$RI] ($RI $RI)* $RI × $RI </rule>
<!-- Otherwise, break everywhere. -->
</segmentRules>
</segmentation>
<segmentation type="LineBreak">
<variables>
<!-- VARIABLES -->
<variable id="$AI">\p{Line_Break=Ambiguous}</variable>
<variable id="$AL">\p{Line_Break=Alphabetic}</variable>
<variable id="$B2">\p{Line_Break=Break_Both}</variable>
<variable id="$BA">\p{Line_Break=Break_After}</variable>
<variable id="$BB">\p{Line_Break=Break_Before}</variable>
<variable id="$BK">\p{Line_Break=Mandatory_Break}</variable>
<variable id="$CB">\p{Line_Break=Contingent_Break}</variable>
<variable id="$CL">\p{Line_Break=Close_Punctuation}</variable>
<variable id="$CP">\p{Line_Break=CP}</variable>
<variable id="$CP30">[$CP - [\p{ea=F}\p{ea=W}\p{ea=H}]]</variable>
<variable id="$CM1">\p{Line_Break=Combining_Mark}</variable>
<variable id="$CR">\p{Line_Break=Carriage_Return}</variable>
<variable id="$EX">\p{Line_Break=Exclamation}</variable>
<variable id="$GL">\p{Line_Break=Glue}</variable>
<variable id="$H2">\p{Line_Break=H2}</variable>
<variable id="$H3">\p{Line_Break=H3}</variable>
<variable id="$HL">\p{Line_Break=HL}</variable>
<variable id="$HY">\p{Line_Break=Hyphen}</variable>
<variable id="$ID">\p{Line_Break=Ideographic}</variable>
<variable id="$IN">\p{Line_Break=Inseparable}</variable>
<variable id="$IS">\p{Line_Break=Infix_Numeric}</variable>
<variable id="$JL">\p{Line_Break=JL}</variable>
<variable id="$JT">\p{Line_Break=JT}</variable>
<variable id="$JV">\p{Line_Break=JV}</variable>
<variable id="$LF">\p{Line_Break=Line_Feed}</variable>
<variable id="$NL">\p{Line_Break=Next_Line}</variable>
<variable id="$NS">\p{Line_Break=Nonstarter}</variable>
<variable id="$NU">\p{Line_Break=Numeric}</variable>
<variable id="$OP">\p{Line_Break=Open_Punctuation}</variable>
<variable id="$OP30">[$OP - [\p{ea=F}\p{ea=W}\p{ea=H}]]</variable>
<variable id="$PO">\p{Line_Break=Postfix_Numeric}</variable>
<variable id="$PR">\p{Line_Break=Prefix_Numeric}</variable>
<variable id="$QU">\p{Line_Break=Quotation}</variable>
<variable id="$SA">\p{Line_Break=Complex_Context}</variable>
<variable id="$SG">\p{Line_Break=Surrogate}</variable>
<variable id="$SP">\p{Line_Break=Space}</variable>
<variable id="$SY">\p{Line_Break=Break_Symbols}</variable>
<variable id="$WJ">\p{Line_Break=Word_Joiner}</variable>
<variable id="$XX">\p{Line_Break=Unknown}</variable>
<variable id="$ZW">\p{Line_Break=ZWSpace}</variable>
<variable id="$CJ">\p{Line_Break=Conditional_Japanese_Starter}</variable>
<variable id="$RI">\p{Line_Break=Regional_Indicator}</variable>
<variable id="$EB">\p{Line_Break=E_Base}</variable>
<variable id="$EM">\p{Line_Break=E_Modifier}</variable>
<variable id="$ZWJ_O">\p{Line_Break=ZWJ}</variable>
<variable id="$ZWJ">\p{Line_Break=ZWJ}</variable>
<!-- SPECIAL EXTENSIONS -->
<variable id="$CM">[$CM1 $ZWJ]</variable>
<!-- LB 1 Assign a line breaking class to each code point of the input. -->
<!-- Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm. -->
<!-- NOTE: CB is ok to fall through, but must handle others here. -->
<variable id="$AL">[$AI $AL $SG $XX $SA]</variable>
<variable id="$NS">[$NS $CJ]</variable>
<!-- WARNING: Fixes for Rule 9 -->
<!-- Treat X (CM|ZWJ* as if it were X. -->
<!-- Where X is any line break class except SP, BK, CR, LF, NL or ZW. -->
<variable id="$X">$CM*</variable>
<!-- MACROS -->
<variable id="$Spec1_">[$SP $BK $CR $LF $NL $ZW]</variable>
<variable id="$Spec2_">[^ $SP $BK $CR $LF $NL $ZW]</variable>
<variable id="$Spec3a_">[^ $SP $BA $HY $CM]</variable>
<variable id="$Spec3b_">[^ $BA $HY $CM]</variable>
<variable id="$Spec4_">[^ $NU $CM]</variable>
<variable id="$Spec5_">[$BK $CB $CR $LF $NL $SP $ZW]</variable>
<variable id="$AI">($AI $X)</variable>
<variable id="$AL">($AL $X)</variable>
<variable id="$B2">($B2 $X)</variable>
<variable id="$BA">($BA $X)</variable>
<variable id="$BB">($BB $X)</variable>
<variable id="$CB">($CB $X)</variable>
<variable id="$CL">($CL $X)</variable>
<variable id="$CP">($CP $X)</variable>
<variable id="$CP30">($CP30 $X)</variable>
<variable id="$CM">($CM $X)</variable>
<variable id="$EX">($EX $X)</variable>
<variable id="$GL">($GL $X)</variable>
<variable id="$H2">($H2 $X)</variable>
<variable id="$H3">($H3 $X)</variable>
<variable id="$HL">($HL $X)</variable>
<variable id="$HY">($HY $X)</variable>
<variable id="$ID">($ID $X)</variable>
<variable id="$IN">($IN $X)</variable>
<variable id="$IS">($IS $X)</variable>
<variable id="$JL">($JL $X)</variable>
<variable id="$JT">($JT $X)</variable>
<variable id="$JV">($JV $X)</variable>
<variable id="$NS">($NS $X)</variable>
<variable id="$NU">($NU $X)</variable>
<variable id="$OP">($OP $X)</variable>
<variable id="$OP30">($OP30 $X)</variable>
<variable id="$PO">($PO $X)</variable>
<variable id="$PR">($PR $X)</variable>
<variable id="$QU">($QU $X)</variable>
<variable id="$SA">($SA $X)</variable>
<variable id="$SG">($SG $X)</variable>
<variable id="$SY">($SY $X)</variable>
<variable id="$WJ">($WJ $X)</variable>
<variable id="$XX">($XX $X)</variable>
<variable id="$RI">($RI $X)</variable>
<variable id="$EB">($EB $X)</variable>
<variable id="$EM">($EM $X)</variable>
<variable id="$ZWJ">($ZWJ $X)</variable>
<!-- OUT OF ORDER ON PURPOSE -->
<!-- LB 10 Treat any remaining combining mark as AL. -->
<variable id="$AL">($AL | ^ $CM | (?&lt;=$Spec1_) $CM)</variable>
</variables>
<segmentRules>
<!-- RULES -->
<!-- LB 4 Always break after hard line breaks (but never between CR and LF). -->
<rule id="4"> $BK ÷ </rule>
<!-- LB 5 Treat CR followed by LF, as well as CR, LF and NL as hard line breaks. -->
<rule id="5.01"> $CR × $LF </rule>
<rule id="5.02"> $CR ÷ </rule>
<rule id="5.03"> $LF ÷ </rule>
<rule id="5.04"> $NL ÷ </rule>
<!-- LB 6 Do not break before hard line breaks. -->
<rule id="6"> × ( $BK | $CR | $LF | $NL ) </rule>
<!-- LB 7 Do not break before spaces or zero-width space. -->
<rule id="7.01"> × $SP </rule>
<rule id="7.02"> × $ZW </rule>
<!-- LB 8 Break before any character following a zero-width space, even if one or more spaces intervene. -->
<rule id="8"> $ZW $SP* ÷ </rule>
<!-- LB 8a Don't break between ZWJ and IDs (for use in Emoji ZWJ sequences) -->
<rule id="8.1"> $ZWJ_O × </rule>
<!-- LB 9 Do not break a combining character sequence; treat it as if it has the LB class of the base character -->
<!-- in all of the following rules. (Where X is any line break class except SP, BK, CR, LF, NL or ZW.) -->
<rule id="9"> $Spec2_ × $CM </rule>
<!-- LB 11 Do not break before or after WORD JOINER and related characters. -->
<rule id="11.01"> × $WJ </rule>
<rule id="11.02"> $WJ × </rule>
<!-- LB 12 Do not break after NBSP and related characters. -->
<rule id="12"> $GL × </rule>
<rule id="12.1"> $Spec3a_ × $GL </rule>
<rule id="12.2"> $Spec3b_ $CM+ × $GL </rule>
<rule id="12.3"> ^ $CM+ × $GL </rule>
<!-- LB 13 Do not break before \u2018]\u2019 or \u2018!\u2019 or \u2018;\u2019 or \u2018/\u2019, even after spaces. -->
<!-- Using customization 7. -->
<rule id="13.01"> × $EX </rule>
<rule id="13.02"> $Spec4_ × ($CL | $CP | $IS | $SY) </rule>
<rule id="13.03"> $Spec4_ $CM+ × ($CL | $CP | $IS | $SY) </rule>
<rule id="13.04"> ^ $CM+ × ($CL | $CP | $IS | $SY) </rule>
<!-- LB 14 Do not break after \u2018[\u2019, even after spaces. -->
<rule id="14"> $OP $SP* × </rule>
<!-- LB 15 Do not break within \u2018\"[\u2019, even with intervening spaces. -->
<rule id="15"> $QU $SP* × $OP </rule>
<!-- LB 16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces. -->
<rule id="16"> ($CL | $CP) $SP* × $NS </rule>
<!-- LB 17 Do not break within \u2018\u2014\u2014\u2019, even with intervening spaces. -->
<rule id="17"> $B2 $SP* × $B2 </rule>
<!-- LB 18 Break after spaces. -->
<rule id="18"> $SP ÷ </rule>
<!-- LB 19 Do not break before or after \u2018\"\u2019. -->
<rule id="19.01"> × $QU </rule>
<rule id="19.02"> $QU × </rule>
<!-- LB 20 Break before and after unresolved CB. -->
<rule id="20.01"> ÷ $CB </rule>
<rule id="20.02"> $CB ÷ </rule>
<!-- LB 20.9 Don't break between Hyphens and Letters when there is a break preceding the hyphen. -->
<!-- Originally added as a Finnish tailoring, now promoted to default CLDR behavior. -->
<!-- Must be before LB 21. Note: this is not default UAX-14 behaviour. See ICU issue ICU-8151. -->
<!-- (Unlike in ICU, here we just check a limited set of known breaks, ignoring some cases like LB 14). -->
<rule id="20.09"> $Spec5_ $HY × $AL </rule>
<!-- LB 21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana and other non-starters, or after acute accents. -->
<rule id="21.01"> × $BA </rule>
<rule id="21.02"> × $HY </rule>
<rule id="21.03"> × $NS </rule>
<rule id="21.04"> $BB × </rule>
<!-- LB 21a Don't break after Hebrew + Hyphen. -->
<rule id="21.1"> $HL ($HY | $BA) × </rule>
<!-- LB 21b Dont break between Solidus and Hebrew letters. -->
<rule id="21.2"> $SY × $HL </rule>
<!-- LB 22 Do not break before ellipsis. -->
<rule id="22.01"> × $IN </rule>
<!-- LB 23 Do not break between digits and letters. -->
<rule id="23.02"> ($AL | $HL) × $NU </rule>
<rule id="23.03"> $NU × ($AL | $HL) </rule>
<!-- LB 24 Do not break between prefix and letters or ideographs. -->
<rule id="23.12"> $PR × ($ID | $EB | $EM) </rule>
<rule id="23.13"> ($ID | $EB | $EM) × $PO </rule>
<!-- LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix. -->
<rule id="24.02"> ($PR | $PO) × ($AL | $HL) </rule>
<rule id="24.03"> ($AL | $HL) × ($PR | $PO) </rule>
<!-- Using customization 7 -->
<!-- LB Alternative: ( PR | PO) ? ( OP | HY ) ? NU (NU | SY | IS) * (CL | CP) ? ( PR | PO) ? -->
<!-- Insert × every place it could go. However, make sure that at least one thing is concrete, otherwise would cause $NU to not break before or after -->
<rule id="25.01"> ($PR | $PO) × ( $OP | $HY )? $NU </rule>
<rule id="25.02"> ( $OP | $HY ) × $NU </rule>
<rule id="25.03"> $NU × ($NU | $SY | $IS) </rule>
<rule id="25.04"> $NU ($NU | $SY | $IS)* × ($NU | $SY | $IS | $CL | $CP) </rule>
<rule id="25.05"> $NU ($NU | $SY | $IS)* ($CL | $CP)? × ($PO | $PR) </rule>
<!-- LB 26 Do not break a Korean syllable. -->
<rule id="26.01"> $JL × $JL | $JV | $H2 | $H3 </rule>
<rule id="26.02"> $JV | $H2 × $JV | $JT </rule>
<rule id="26.03"> $JT | $H3 × $JT </rule>
<!-- LB 27 Treat a Korean Syllable Block the same as ID. -->
<rule id="27.01"> $JL | $JV | $JT | $H2 | $H3 × $IN </rule>
<rule id="27.02"> $JL | $JV | $JT | $H2 | $H3 × $PO </rule>
<rule id="27.03"> $PR × $JL | $JV | $JT | $H2 | $H3 </rule>
<!-- LB 28 Do not break between alphabetics (\"at\"). -->
<rule id="28"> ($AL | $HL) × ($AL | $HL) </rule>
<!-- LB 29 Do not break between numeric punctuation and alphabetics (\"e.g.\"). -->
<rule id="29"> $IS × ($AL | $HL) </rule>
<!-- LB 30 Do not break between letters, numbers or ordinary symbols and non-East-Asian opening or closing punctuation. -->
<rule id="30.01"> ($AL | $HL | $NU) × $OP30 </rule>
<rule id="30.02"> $CP30 × ($AL | $HL | $NU) </rule>
<!-- LB 30a Break between two Regional Indicators if and only if there is an even number of them before the point being considered. -->
<rule id="30.11"> ^ ($RI $RI)* $RI × $RI </rule>
<rule id="30.12"> [^$RI] ($RI $RI)* $RI × $RI </rule>
<rule id="30.13"> $RI ÷ $RI </rule>
<rule id="30.2"> $EB × $EM </rule>
</segmentRules>
</segmentation>
<segmentation type="SentenceBreak">
<variables>
<!-- VARIABLES -->
<variable id="$CR">\p{Sentence_Break=CR}</variable>
<variable id="$LF">\p{Sentence_Break=LF}</variable>
<variable id="$Extend">\p{Sentence_Break=Extend}</variable>
<variable id="$Format">\p{Sentence_Break=Format}</variable>
<variable id="$Sep">\p{Sentence_Break=Sep}</variable>
<variable id="$Sp">\p{Sentence_Break=Sp}</variable>
<variable id="$Lower">\p{Sentence_Break=Lower}</variable>
<variable id="$Upper">\p{Sentence_Break=Upper}</variable>
<variable id="$OLetter">\p{Sentence_Break=OLetter}</variable>
<variable id="$Numeric">\p{Sentence_Break=Numeric}</variable>
<variable id="$ATerm">\p{Sentence_Break=ATerm}</variable>
<variable id="$STerm">\p{Sentence_Break=STerm}</variable>
<variable id="$Close">\p{Sentence_Break=Close}</variable>
<variable id="$SContinue">\p{Sentence_Break=SContinue}</variable>
<variable id="$Any">.</variable>
<!-- SPECIAL EXTENSIONS -->
<!-- WARNING: For Rule 5, now add format and extend to everything but Sep, Format, and Extend -->
<variable id="$FE">[$Format $Extend]</variable>
<variable id="$NotPreLower_">[^ $OLetter $Upper $Lower $Sep $CR $LF $STerm $ATerm]</variable>
<variable id="$Sp">($Sp $FE*)</variable>
<variable id="$Lower">($Lower $FE*)</variable>
<variable id="$Upper">($Upper $FE*)</variable>
<variable id="$OLetter">($OLetter $FE*)</variable>
<variable id="$Numeric">($Numeric $FE*)</variable>
<variable id="$ATerm">($ATerm $FE*)</variable>
<variable id="$STerm">($STerm $FE*)</variable>
<variable id="$Close">($Close $FE*)</variable>
<variable id="$SContinue">($SContinue $FE*)</variable>
<!-- MACROS -->
<variable id="$ParaSep">($Sep | $CR | $LF)</variable>
<variable id="$SATerm">($STerm | $ATerm)</variable>
</variables>
<segmentRules>
<!-- RULES -->
<!-- Break at the start and end of text, unless the text is empty. -->
<!-- Do not break within CRLF. -->
<rule id="3"> $CR × $LF </rule>
<!-- Break after paragraph separators. -->
<rule id="4"> $ParaSep ÷ </rule>
<!-- Ignore Format and Extend characters, except after sot, ParaSep, and within CRLF. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend) -->
<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
<!-- AND add format and extend in all variables definitions that appear after this point! -->
<rule id="5"> × [$Format $Extend] </rule>
<!-- Do not break after full stop in certain contexts. [See note below.] -->
<!-- Do not break after ambiguous terminators like period, if immediately followed by a number or lowercase letter, -->
<!-- is between uppercase letters, or if the first following letter (optionally after certain punctuation) is lowercase. -->
<!-- For example, a period may be an abbreviation or numeric period, and not mark the end of a sentence. -->
<rule id="6"> $ATerm × $Numeric </rule>
<rule id="7"> ($Upper | $Lower) $ATerm × $Upper </rule>
<rule id="8"> $ATerm $Close* $Sp* × $NotPreLower_* $Lower </rule>
<rule id="8.1"> $SATerm $Close* $Sp* × ($SContinue | $SATerm) </rule>
<!-- Break after sentence terminators, but include closing punctuation, trailing spaces, and any paragraph separator. [See note below.] Include closing punctuation, trailing spaces, and (optionally) a paragraph separator. -->
<rule id="9"> $SATerm $Close* × ( $Close | $Sp | $ParaSep ) </rule>
<!-- Note the fix to $Sp*, $Sep? -->
<rule id="10"> $SATerm $Close* $Sp* × ( $Sp | $ParaSep ) </rule>
<rule id="11"> $SATerm $Close* $Sp* $ParaSep? ÷ </rule>
<!-- Otherwise, do not break -->
<rule id="998"> × $Any </rule>
</segmentRules>
</segmentation>
<segmentation type="WordBreak">
<variables>
<!-- VARIABLES -->
<variable id="$CR">\p{Word_Break=CR}</variable>
<variable id="$LF">\p{Word_Break=LF}</variable>
<variable id="$Newline">\p{Word_Break=Newline}</variable>
<variable id="$Extend">\p{Word_Break=Extend}</variable>
<!-- Now normal variables -->
<variable id="$Format">\p{Word_Break=Format}</variable>
<variable id="$Katakana">\p{Word_Break=Katakana}</variable>
<variable id="$ALetter">\p{Word_Break=ALetter}</variable>
<variable id="$MidLetter">\p{Word_Break=MidLetter}</variable>
<variable id="$MidNum">\p{Word_Break=MidNum}</variable>
<variable id="$MidNumLet">\p{Word_Break=MidNumLet}</variable>
<variable id="$Numeric">\p{Word_Break=Numeric}</variable>
<variable id="$ExtendNumLet">\p{Word_Break=ExtendNumLet}</variable>
<variable id="$RI">\p{Word_Break=Regional_Indicator}</variable>
<variable id="$Hebrew_Letter">\p{Word_Break=Hebrew_Letter}</variable>
<variable id="$Double_Quote">\p{Word_Break=Double_Quote}</variable>
<variable id="$Single_Quote">\p{Word_Break=Single_Quote}</variable>
<variable id="$ZWJ">\p{Word_Break=ZWJ}</variable>
<!-- Note: The following may overlap with the above -->
<variable id="$ExtPict">\p{Extended_Pictographic}</variable>
<variable id="$WSegSpace">\p{Word_Break=WSegSpace}</variable>
<!-- MACROS -->
<variable id="$AHLetter">($ALetter | $Hebrew_Letter)</variable>
<variable id="$MidNumLetQ">($MidNumLet | $Single_Quote)</variable>
<!-- SPECIAL EXTENSIONS -->
<!-- Add format and extend to everything -->
<variable id="$FE">[$Format $Extend $ZWJ]</variable>
<variable id="$NotBreak_">[^ $Newline $CR $LF ]</variable>
<variable id="$Katakana">($Katakana $FE*)</variable>
<variable id="$ALetter">($ALetter $FE*)</variable>
<variable id="$MidLetter">($MidLetter $FE*)</variable>
<variable id="$MidNum">($MidNum $FE*)</variable>
<variable id="$MidNumLet">($MidNumLet $FE*)</variable>
<variable id="$Numeric">($Numeric $FE*)</variable>
<variable id="$ExtendNumLet">($ExtendNumLet $FE*)</variable>
<variable id="$RI">($RI $FE*)</variable>
<variable id="$Hebrew_Letter">($Hebrew_Letter $FE*)</variable>
<variable id="$Double_Quote">($Double_Quote $FE*)</variable>
<variable id="$Single_Quote">($Single_Quote $FE*)</variable>
<variable id="$AHLetter">($AHLetter $FE*)</variable>
<variable id="$MidNumLetQ">($MidNumLetQ $FE*)</variable>
</variables>
<segmentRules>
<!-- RULES -->
<!-- Break at the start and end of text, unless the text is empty. -->
<!-- Do not break within CRLF. -->
<rule id="3"> $CR × $LF </rule>
<!-- Otherwise break before and after Newlines (including CR and LF) -->
<rule id="3.1"> ($Newline | $CR | $LF) ÷ </rule>
<rule id="3.2"> ÷ ($Newline | $CR | $LF) </rule>
<!-- Do not break within emoji zwj sequences. -->
<rule id="3.3"> $ZWJ × $ExtPict </rule>
<rule id="3.4"> $WSegSpace × $WSegSpace </rule>
<!-- Ignore Format and Extend characters, except after sot, CR, LF, and Newline. (See Section 6.2, Replacing Ignore Rules.) This also has the effect of: Any × (Format | Extend) -->
<!-- WARNING: Implemented as don't break before format (except after linebreaks), -->
<!-- AND add format and extend in all variables definitions that appear after this point! -->
<rule id="4"> $NotBreak_ × [$Format $Extend $ZWJ] </rule>
<!-- VANILLA RULES -->
<!-- Do not break between most letters. -->
<rule id="5"> $AHLetter × $AHLetter </rule>
<!-- Do not break letters across certain punctuation. -->
<rule id="6"> $AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter </rule>
<rule id="7"> $AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter </rule>
<rule id="7.1"> $Hebrew_Letter × $Single_Quote </rule>
<rule id="7.2"> $Hebrew_Letter × $Double_Quote $Hebrew_Letter </rule>
<rule id="7.3"> $Hebrew_Letter $Double_Quote × $Hebrew_Letter </rule>
<!-- Do not break within sequences of digits, or digits adjacent to letters (“3a”, or “A3”). -->
<rule id="8"> $Numeric × $Numeric </rule>
<rule id="9"> $AHLetter × $Numeric </rule>
<rule id="10"> $Numeric × $AHLetter </rule>
<!-- Do not break within sequences, such as “3.2” or “3,456.789”. -->
<rule id="11"> $Numeric ($MidNum | $MidNumLetQ) × $Numeric </rule>
<rule id="12"> $Numeric × ($MidNum | $MidNumLetQ) $Numeric </rule>
<!-- Do not break between Katakana. -->
<rule id="13"> $Katakana × $Katakana </rule>
<!-- Do not break from extenders. -->
<rule id="13.1"> ($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet </rule>
<rule id="13.2"> $ExtendNumLet × ($AHLetter | $Numeric | $Katakana) </rule>
<!-- Do not break within emoji flag sequences. That is, do not break between regional indicator (RI) symbols if there is an odd number of RI characters before the break point. -->
<rule id="15"> ^ ($RI $RI)* $RI × $RI </rule>
<rule id="16"> [^$RI] ($RI $RI)* $RI × $RI </rule>
<!-- Otherwise, break everywhere (including around ideographs). -->
</segmentRules>
</segmentation>
</segmentations>
</ldml>