// © 2017 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING // Allow implicit conversion from char16_t* to UnicodeString for this file: // Helpful in toString methods and elsewhere. #define UNISTR_FROM_STRING_EXPLICIT #define UNISTR_FROM_CHAR_EXPLICIT #include "uassert.h" #include "number_patternstring.h" #include "unicode/utf16.h" #include "number_utils.h" #include "number_roundingutils.h" #include "number_mapper.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode& status) { patternInfo.consumePattern(patternString, status); } DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, UErrorCode& status) { DecimalFormatProperties properties; parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); return properties; } DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, UErrorCode& status) { return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); } void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, IgnoreRounding ignoreRounding, UErrorCode& status) { parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); } char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { const Endpoints& endpoints = getEndpoints(flags); if (index < 0 || index >= endpoints.end - endpoints.start) { UPRV_UNREACHABLE; } return pattern.charAt(endpoints.start + index); } int32_t ParsedPatternInfo::length(int32_t flags) const { return getLengthFromEndpoints(getEndpoints(flags)); } int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { return endpoints.end - endpoints.start; } UnicodeString ParsedPatternInfo::getString(int32_t flags) const { const Endpoints& endpoints = getEndpoints(flags); if (endpoints.start == endpoints.end) { return UnicodeString(); } // Create a new UnicodeString return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); } const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { bool prefix = (flags & AFFIX_PREFIX) != 0; bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; bool padding = (flags & AFFIX_PADDING) != 0; if (isNegative && padding) { return negative.paddingEndpoints; } else if (padding) { return positive.paddingEndpoints; } else if (prefix && isNegative) { return negative.prefixEndpoints; } else if (prefix) { return positive.prefixEndpoints; } else if (isNegative) { return negative.suffixEndpoints; } else { return positive.suffixEndpoints; } } bool ParsedPatternInfo::positiveHasPlusSign() const { return positive.hasPlusSign; } bool ParsedPatternInfo::hasNegativeSubpattern() const { return fHasNegativeSubpattern; } bool ParsedPatternInfo::negativeHasMinusSign() const { return negative.hasMinusSign; } bool ParsedPatternInfo::hasCurrencySign() const { return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); } bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { return AffixUtils::containsType(pattern, type, status); } bool ParsedPatternInfo::hasBody() const { return positive.integerTotal > 0; } ///////////////////////////////////////////////////// /// BEGIN RECURSIVE DESCENT PARSER IMPLEMENTATION /// ///////////////////////////////////////////////////// UChar32 ParsedPatternInfo::ParserState::peek() { if (offset == pattern.length()) { return -1; } else { return pattern.char32At(offset); } } UChar32 ParsedPatternInfo::ParserState::next() { int codePoint = peek(); offset += U16_LENGTH(codePoint); return codePoint; } void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { if (U_FAILURE(status)) { return; } this->pattern = patternString; // This class is not intended for writing twice! // Use move assignment to overwrite instead. U_ASSERT(state.offset == 0); // pattern := subpattern (';' subpattern)? currentSubpattern = &positive; consumeSubpattern(status); if (U_FAILURE(status)) { return; } if (state.peek() == u';') { state.next(); // consume the ';' // Don't consume the negative subpattern if it is empty (trailing ';') if (state.peek() != -1) { fHasNegativeSubpattern = true; currentSubpattern = &negative; consumeSubpattern(status); if (U_FAILURE(status)) { return; } } } if (state.peek() != -1) { state.toParseException(u"Found unquoted special character"); status = U_UNQUOTED_SPECIAL; } } void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { // subpattern := literals? number exponent? literals? consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); if (U_FAILURE(status)) { return; } consumeAffix(currentSubpattern->prefixEndpoints, status); if (U_FAILURE(status)) { return; } consumePadding(PadPosition::UNUM_PAD_AFTER_PREFIX, status); if (U_FAILURE(status)) { return; } consumeFormat(status); if (U_FAILURE(status)) { return; } consumeExponent(status); if (U_FAILURE(status)) { return; } consumePadding(PadPosition::UNUM_PAD_BEFORE_SUFFIX, status); if (U_FAILURE(status)) { return; } consumeAffix(currentSubpattern->suffixEndpoints, status); if (U_FAILURE(status)) { return; } consumePadding(PadPosition::UNUM_PAD_AFTER_SUFFIX, status); if (U_FAILURE(status)) { return; } } void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { if (state.peek() != u'*') { return; } if (currentSubpattern->hasPadding) { state.toParseException(u"Cannot have multiple pad specifiers"); status = U_MULTIPLE_PAD_SPECIFIERS; return; } currentSubpattern->paddingLocation = paddingLocation; currentSubpattern->hasPadding = true; state.next(); // consume the '*' currentSubpattern->paddingEndpoints.start = state.offset; consumeLiteral(status); currentSubpattern->paddingEndpoints.end = state.offset; } void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { // literals := { literal } endpoints.start = state.offset; while (true) { switch (state.peek()) { case u'#': case u'@': case u';': case u'*': case u'.': case u',': case u'0': case u'1': case u'2': case u'3': case u'4': case u'5': case u'6': case u'7': case u'8': case u'9': case -1: // Characters that cannot appear unquoted in a literal // break outer; goto after_outer; case u'%': currentSubpattern->hasPercentSign = true; break; case u'‰': currentSubpattern->hasPerMilleSign = true; break; case u'¤': currentSubpattern->hasCurrencySign = true; break; case u'-': currentSubpattern->hasMinusSign = true; break; case u'+': currentSubpattern->hasPlusSign = true; break; default: break; } consumeLiteral(status); if (U_FAILURE(status)) { return; } } after_outer: endpoints.end = state.offset; } void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { if (state.peek() == -1) { state.toParseException(u"Expected unquoted literal but found EOL"); status = U_PATTERN_SYNTAX_ERROR; return; } else if (state.peek() == u'\'') { state.next(); // consume the starting quote while (state.peek() != u'\'') { if (state.peek() == -1) { state.toParseException(u"Expected quoted literal but found EOL"); status = U_PATTERN_SYNTAX_ERROR; return; } else { state.next(); // consume a quoted character } } state.next(); // consume the ending quote } else { // consume a non-quoted literal character state.next(); } } void ParsedPatternInfo::consumeFormat(UErrorCode& status) { consumeIntegerFormat(status); if (U_FAILURE(status)) { return; } if (state.peek() == u'.') { state.next(); // consume the decimal point currentSubpattern->hasDecimal = true; currentSubpattern->widthExceptAffixes += 1; consumeFractionFormat(status); if (U_FAILURE(status)) { return; } } } void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { // Convenience reference: ParsedSubpatternInfo& result = *currentSubpattern; while (true) { switch (state.peek()) { case u',': result.widthExceptAffixes += 1; result.groupingSizes <<= 16; break; case u'#': if (result.integerNumerals > 0) { state.toParseException(u"# cannot follow 0 before decimal point"); status = U_UNEXPECTED_TOKEN; return; } result.widthExceptAffixes += 1; result.groupingSizes += 1; if (result.integerAtSigns > 0) { result.integerTrailingHashSigns += 1; } else { result.integerLeadingHashSigns += 1; } result.integerTotal += 1; break; case u'@': if (result.integerNumerals > 0) { state.toParseException(u"Cannot mix 0 and @"); status = U_UNEXPECTED_TOKEN; return; } if (result.integerTrailingHashSigns > 0) { state.toParseException(u"Cannot nest # inside of a run of @"); status = U_UNEXPECTED_TOKEN; return; } result.widthExceptAffixes += 1; result.groupingSizes += 1; result.integerAtSigns += 1; result.integerTotal += 1; break; case u'0': case u'1': case u'2': case u'3': case u'4': case u'5': case u'6': case u'7': case u'8': case u'9': if (result.integerAtSigns > 0) { state.toParseException(u"Cannot mix @ and 0"); status = U_UNEXPECTED_TOKEN; return; } result.widthExceptAffixes += 1; result.groupingSizes += 1; result.integerNumerals += 1; result.integerTotal += 1; if (!result.rounding.isZeroish() || state.peek() != u'0') { result.rounding.appendDigit(static_cast(state.peek() - u'0'), 0, true); } break; default: goto after_outer; } state.next(); // consume the symbol } after_outer: // Disallow patterns with a trailing ',' or with two ',' next to each other auto grouping1 = static_cast (result.groupingSizes & 0xffff); auto grouping2 = static_cast ((result.groupingSizes >> 16) & 0xffff); auto grouping3 = static_cast ((result.groupingSizes >> 32) & 0xffff); if (grouping1 == 0 && grouping2 != -1) { state.toParseException(u"Trailing grouping separator is invalid"); status = U_UNEXPECTED_TOKEN; return; } if (grouping2 == 0 && grouping3 != -1) { state.toParseException(u"Grouping width of zero is invalid"); status = U_PATTERN_SYNTAX_ERROR; return; } } void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { // Convenience reference: ParsedSubpatternInfo& result = *currentSubpattern; int32_t zeroCounter = 0; while (true) { switch (state.peek()) { case u'#': result.widthExceptAffixes += 1; result.fractionHashSigns += 1; result.fractionTotal += 1; zeroCounter++; break; case u'0': case u'1': case u'2': case u'3': case u'4': case u'5': case u'6': case u'7': case u'8': case u'9': if (result.fractionHashSigns > 0) { state.toParseException(u"0 cannot follow # after decimal point"); status = U_UNEXPECTED_TOKEN; return; } result.widthExceptAffixes += 1; result.fractionNumerals += 1; result.fractionTotal += 1; if (state.peek() == u'0') { zeroCounter++; } else { result.rounding .appendDigit(static_cast(state.peek() - u'0'), zeroCounter, false); zeroCounter = 0; } break; default: return; } state.next(); // consume the symbol } } void ParsedPatternInfo::consumeExponent(UErrorCode& status) { // Convenience reference: ParsedSubpatternInfo& result = *currentSubpattern; if (state.peek() != u'E') { return; } if ((result.groupingSizes & 0xffff0000L) != 0xffff0000L) { state.toParseException(u"Cannot have grouping separator in scientific notation"); status = U_MALFORMED_EXPONENTIAL_PATTERN; return; } state.next(); // consume the E result.widthExceptAffixes++; if (state.peek() == u'+') { state.next(); // consume the + result.exponentHasPlusSign = true; result.widthExceptAffixes++; } while (state.peek() == u'0') { state.next(); // consume the 0 result.exponentZeros += 1; result.widthExceptAffixes++; } } /////////////////////////////////////////////////// /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// /////////////////////////////////////////////////// void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties& properties, IgnoreRounding ignoreRounding, UErrorCode& status) { if (pattern.length() == 0) { // Backwards compatibility requires that we reset to the default values. // TODO: Only overwrite the properties that "saveToProperties" normally touches? properties.clear(); return; } ParsedPatternInfo patternInfo; parseToPatternInfo(pattern, patternInfo, status); if (U_FAILURE(status)) { return; } patternInfoToProperties(properties, patternInfo, ignoreRounding, status); } void PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, IgnoreRounding _ignoreRounding, UErrorCode& status) { // Translate from PatternParseResult to Properties. // Note that most data from "negative" is ignored per the specification of DecimalFormat. const ParsedSubpatternInfo& positive = patternInfo.positive; bool ignoreRounding; if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { ignoreRounding = false; } else if (_ignoreRounding == IGNORE_ROUNDING_IF_CURRENCY) { ignoreRounding = positive.hasCurrencySign; } else { U_ASSERT(_ignoreRounding == IGNORE_ROUNDING_ALWAYS); ignoreRounding = true; } // Grouping settings auto grouping1 = static_cast (positive.groupingSizes & 0xffff); auto grouping2 = static_cast ((positive.groupingSizes >> 16) & 0xffff); auto grouping3 = static_cast ((positive.groupingSizes >> 32) & 0xffff); if (grouping2 != -1) { properties.groupingSize = grouping1; properties.groupingUsed = true; } else { properties.groupingSize = -1; properties.groupingUsed = false; } if (grouping3 != -1) { properties.secondaryGroupingSize = grouping2; } else { properties.secondaryGroupingSize = -1; } // For backwards compatibility, require that the pattern emit at least one min digit. int minInt, minFrac; if (positive.integerTotal == 0 && positive.fractionTotal > 0) { // patterns like ".##" minInt = 0; minFrac = uprv_max(1, positive.fractionNumerals); } else if (positive.integerNumerals == 0 && positive.fractionNumerals == 0) { // patterns like "#.##" minInt = 1; minFrac = 0; } else { minInt = positive.integerNumerals; minFrac = positive.fractionNumerals; } // Rounding settings // Don't set basic rounding when there is a currency sign; defer to CurrencyUsage if (positive.integerAtSigns > 0) { properties.minimumFractionDigits = -1; properties.maximumFractionDigits = -1; properties.roundingIncrement = 0.0; properties.minimumSignificantDigits = positive.integerAtSigns; properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; } else if (!positive.rounding.isZeroish()) { if (!ignoreRounding) { properties.minimumFractionDigits = minFrac; properties.maximumFractionDigits = positive.fractionTotal; properties.roundingIncrement = positive.rounding.toDouble(); } else { properties.minimumFractionDigits = -1; properties.maximumFractionDigits = -1; properties.roundingIncrement = 0.0; } properties.minimumSignificantDigits = -1; properties.maximumSignificantDigits = -1; } else { if (!ignoreRounding) { properties.minimumFractionDigits = minFrac; properties.maximumFractionDigits = positive.fractionTotal; properties.roundingIncrement = 0.0; } else { properties.minimumFractionDigits = -1; properties.maximumFractionDigits = -1; properties.roundingIncrement = 0.0; } properties.minimumSignificantDigits = -1; properties.maximumSignificantDigits = -1; } // If the pattern ends with a '.' then force the decimal point. if (positive.hasDecimal && positive.fractionTotal == 0) { properties.decimalSeparatorAlwaysShown = true; } else { properties.decimalSeparatorAlwaysShown = false; } // Scientific notation settings if (positive.exponentZeros > 0) { properties.exponentSignAlwaysShown = positive.exponentHasPlusSign; properties.minimumExponentDigits = positive.exponentZeros; if (positive.integerAtSigns == 0) { // patterns without '@' can define max integer digits, used for engineering notation properties.minimumIntegerDigits = positive.integerNumerals; properties.maximumIntegerDigits = positive.integerTotal; } else { // patterns with '@' cannot define max integer digits properties.minimumIntegerDigits = 1; properties.maximumIntegerDigits = -1; } } else { properties.exponentSignAlwaysShown = false; properties.minimumExponentDigits = -1; properties.minimumIntegerDigits = minInt; properties.maximumIntegerDigits = -1; } // Compute the affix patterns (required for both padding and affixes) UnicodeString posPrefix = patternInfo.getString(AffixPatternProvider::AFFIX_PREFIX); UnicodeString posSuffix = patternInfo.getString(0); // Padding settings if (positive.hasPadding) { // The width of the positive prefix and suffix templates are included in the padding int paddingWidth = positive.widthExceptAffixes + AffixUtils::estimateLength(posPrefix, status) + AffixUtils::estimateLength(posSuffix, status); properties.formatWidth = paddingWidth; UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); if (rawPaddingString.length() == 1) { properties.padString = rawPaddingString; } else if (rawPaddingString.length() == 2) { if (rawPaddingString.charAt(0) == u'\'') { properties.padString.setTo(u"'", -1); } else { properties.padString = rawPaddingString; } } else { properties.padString = UnicodeString(rawPaddingString, 1, rawPaddingString.length() - 2); } properties.padPosition = positive.paddingLocation; } else { properties.formatWidth = -1; properties.padString.setToBogus(); properties.padPosition.nullify(); } // Set the affixes // Always call the setter, even if the prefixes are empty, especially in the case of the // negative prefix pattern, to prevent default values from overriding the pattern. properties.positivePrefixPattern = posPrefix; properties.positiveSuffixPattern = posSuffix; if (patternInfo.fHasNegativeSubpattern) { properties.negativePrefixPattern = patternInfo.getString( AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN | AffixPatternProvider::AFFIX_PREFIX); properties.negativeSuffixPattern = patternInfo.getString( AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN); } else { properties.negativePrefixPattern.setToBogus(); properties.negativeSuffixPattern.setToBogus(); } // Set the magnitude multiplier if (positive.hasPercentSign) { properties.magnitudeMultiplier = 2; } else if (positive.hasPerMilleSign) { properties.magnitudeMultiplier = 3; } else { properties.magnitudeMultiplier = 0; } } /////////////////////////////////////////////////////////////////// /// End PatternStringParser.java; begin PatternStringUtils.java /// /////////////////////////////////////////////////////////////////// // Determine whether a given roundingIncrement should be ignored for formatting // based on the current maxFrac value (maximum fraction digits). For example a // roundingIncrement of 0.01 should be ignored if maxFrac is 1, but not if maxFrac // is 2 or more. Note that roundingIncrements are rounded in significance, so // a roundingIncrement of 0.006 is treated like 0.01 for this determination, i.e. // it should not be ignored if maxFrac is 2 or more (but a roundingIncrement of // 0.005 is treated like 0.001 for significance). This is the reason for the // initial doubling below. // roundIncr must be non-zero. bool PatternStringUtils::ignoreRoundingIncrement(double roundIncr, int32_t maxFrac) { if (maxFrac < 0) { return false; } int32_t frac = 0; roundIncr *= 2.0; for (frac = 0; frac <= maxFrac && roundIncr <= 1.0; frac++, roundIncr *= 10.0); return (frac > maxFrac); } UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, UErrorCode& status) { UnicodeString sb; // Convenience references // The uprv_min() calls prevent DoS int32_t dosMax = 100; int32_t grouping1 = uprv_max(0, uprv_min(properties.groupingSize, dosMax)); int32_t grouping2 = uprv_max(0, uprv_min(properties.secondaryGroupingSize, dosMax)); bool useGrouping = properties.groupingUsed; int32_t paddingWidth = uprv_min(properties.formatWidth, dosMax); NullableValue paddingLocation = properties.padPosition; UnicodeString paddingString = properties.padString; int32_t minInt = uprv_max(0, uprv_min(properties.minimumIntegerDigits, dosMax)); int32_t maxInt = uprv_min(properties.maximumIntegerDigits, dosMax); int32_t minFrac = uprv_max(0, uprv_min(properties.minimumFractionDigits, dosMax)); int32_t maxFrac = uprv_min(properties.maximumFractionDigits, dosMax); int32_t minSig = uprv_min(properties.minimumSignificantDigits, dosMax); int32_t maxSig = uprv_min(properties.maximumSignificantDigits, dosMax); bool alwaysShowDecimal = properties.decimalSeparatorAlwaysShown; int32_t exponentDigits = uprv_min(properties.minimumExponentDigits, dosMax); bool exponentShowPlusSign = properties.exponentSignAlwaysShown; AutoAffixPatternProvider affixProvider(properties, status); // Prefixes sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_PREFIX)); int32_t afterPrefixPos = sb.length(); // Figure out the grouping sizes. if (!useGrouping) { grouping1 = 0; grouping2 = 0; } else if (grouping1 == grouping2) { grouping1 = 0; } int32_t groupingLength = grouping1 + grouping2 + 1; // Figure out the digits we need to put in the pattern. double roundingInterval = properties.roundingIncrement; UnicodeString digitsString; int32_t digitsStringScale = 0; if (maxSig != uprv_min(dosMax, -1)) { // Significant Digits. while (digitsString.length() < minSig) { digitsString.append(u'@'); } while (digitsString.length() < maxSig) { digitsString.append(u'#'); } } else if (roundingInterval != 0.0 && !ignoreRoundingIncrement(roundingInterval,maxFrac)) { // Rounding Interval. digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval, nullptr); // TODO: Check for DoS here? DecimalQuantity incrementQuantity; incrementQuantity.setToDouble(roundingInterval); incrementQuantity.adjustMagnitude(-digitsStringScale); incrementQuantity.roundToMagnitude(0, kDefaultMode, status); UnicodeString str = incrementQuantity.toPlainString(); if (str.charAt(0) == u'-') { // TODO: Unsupported operation exception or fail silently? digitsString.append(str, 1, str.length() - 1); } else { digitsString.append(str); } } while (digitsString.length() + digitsStringScale < minInt) { digitsString.insert(0, u'0'); } while (-digitsStringScale < minFrac) { digitsString.append(u'0'); digitsStringScale--; } // Write the digits to the string builder int32_t m0 = uprv_max(groupingLength, digitsString.length() + digitsStringScale); m0 = (maxInt != dosMax) ? uprv_max(maxInt, m0) - 1 : m0 - 1; int32_t mN = (maxFrac != dosMax) ? uprv_min(-maxFrac, digitsStringScale) : digitsStringScale; for (int32_t magnitude = m0; magnitude >= mN; magnitude--) { int32_t di = digitsString.length() + digitsStringScale - magnitude - 1; if (di < 0 || di >= digitsString.length()) { sb.append(u'#'); } else { sb.append(digitsString.charAt(di)); } // Decimal separator if (magnitude == 0 && (alwaysShowDecimal || mN < 0)) { sb.append(u'.'); } if (!useGrouping) { continue; } // Least-significant grouping separator if (magnitude > 0 && magnitude == grouping1) { sb.append(u','); } // All other grouping separators if (magnitude > grouping1 && grouping2 > 0 && (magnitude - grouping1) % grouping2 == 0) { sb.append(u','); } } // Exponential notation if (exponentDigits != uprv_min(dosMax, -1)) { sb.append(u'E'); if (exponentShowPlusSign) { sb.append(u'+'); } for (int32_t i = 0; i < exponentDigits; i++) { sb.append(u'0'); } } // Suffixes int32_t beforeSuffixPos = sb.length(); sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_POS_SUFFIX)); // Resolve Padding if (paddingWidth > 0 && !paddingLocation.isNull()) { while (paddingWidth - sb.length() > 0) { sb.insert(afterPrefixPos, u'#'); beforeSuffixPos++; } int32_t addedLength; switch (paddingLocation.get(status)) { case PadPosition::UNUM_PAD_BEFORE_PREFIX: addedLength = escapePaddingString(paddingString, sb, 0, status); sb.insert(0, u'*'); afterPrefixPos += addedLength + 1; beforeSuffixPos += addedLength + 1; break; case PadPosition::UNUM_PAD_AFTER_PREFIX: addedLength = escapePaddingString(paddingString, sb, afterPrefixPos, status); sb.insert(afterPrefixPos, u'*'); afterPrefixPos += addedLength + 1; beforeSuffixPos += addedLength + 1; break; case PadPosition::UNUM_PAD_BEFORE_SUFFIX: escapePaddingString(paddingString, sb, beforeSuffixPos, status); sb.insert(beforeSuffixPos, u'*'); break; case PadPosition::UNUM_PAD_AFTER_SUFFIX: sb.append(u'*'); escapePaddingString(paddingString, sb, sb.length(), status); break; } if (U_FAILURE(status)) { return sb; } } // Negative affixes // Ignore if the negative prefix pattern is "-" and the negative suffix is empty if (affixProvider.get().hasNegativeSubpattern()) { sb.append(u';'); sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_PREFIX)); // Copy the positive digit format into the negative. // This is optional; the pattern is the same as if '#' were appended here instead. // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. // See http://bugs.icu-project.org/trac/ticket/13707 UnicodeString copy(sb); sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); sb.append(affixProvider.get().getString(AffixPatternProvider::AFFIX_NEG_SUFFIX)); } return sb; } int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, UErrorCode& status) { (void) status; if (input.length() == 0) { input.setTo(kFallbackPaddingString, -1); } int startLength = output.length(); if (input.length() == 1) { if (input.compare(u"'", -1) == 0) { output.insert(startIndex, u"''", -1); } else { output.insert(startIndex, input); } } else { output.insert(startIndex, u'\''); int offset = 1; for (int i = 0; i < input.length(); i++) { // it's okay to deal in chars here because the quote mark is the only interesting thing. char16_t ch = input.charAt(i); if (ch == u'\'') { output.insert(startIndex + offset, u"''", -1); offset += 2; } else { output.insert(startIndex + offset, ch); offset += 1; } } output.insert(startIndex + offset, u'\''); } return output.length() - startLength; } UnicodeString PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, bool toLocalized, UErrorCode& status) { // Construct a table of strings to be converted between localized and standard. static constexpr int32_t LEN = 21; UnicodeString table[LEN][2]; int standIdx = toLocalized ? 0 : 1; int localIdx = toLocalized ? 1 : 0; table[0][standIdx] = u"%"; table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); table[1][standIdx] = u"‰"; table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); table[2][standIdx] = u"."; table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); table[3][standIdx] = u","; table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); table[4][standIdx] = u"-"; table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); table[5][standIdx] = u"+"; table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); table[6][standIdx] = u";"; table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); table[7][standIdx] = u"@"; table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); table[8][standIdx] = u"E"; table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); table[9][standIdx] = u"*"; table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); table[10][standIdx] = u"#"; table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); for (int i = 0; i < 10; i++) { table[11 + i][standIdx] = u'0' + i; table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); } // Special case: quotes are NOT allowed to be in any localIdx strings. // Substitute them with '’' instead. for (int32_t i = 0; i < LEN; i++) { table[i][localIdx].findAndReplace(u'\'', u'’'); } // Iterate through the string and convert. // State table: // 0 => base state // 1 => first char inside a quoted sequence in input and output string // 2 => inside a quoted sequence in input and output string // 3 => first char after a close quote in input string; // close quote still needs to be written to output string // 4 => base state in input string; inside quoted sequence in output string // 5 => first char inside a quoted sequence in input string; // inside quoted sequence in output string UnicodeString result; int state = 0; for (int offset = 0; offset < input.length(); offset++) { UChar ch = input.charAt(offset); // Handle a quote character (state shift) if (ch == u'\'') { if (state == 0) { result.append(u'\''); state = 1; continue; } else if (state == 1) { result.append(u'\''); state = 0; continue; } else if (state == 2) { state = 3; continue; } else if (state == 3) { result.append(u'\''); result.append(u'\''); state = 1; continue; } else if (state == 4) { state = 5; continue; } else { U_ASSERT(state == 5); result.append(u'\''); result.append(u'\''); state = 4; continue; } } if (state == 0 || state == 3 || state == 4) { for (auto& pair : table) { // Perform a greedy match on this symbol string UnicodeString temp = input.tempSubString(offset, pair[0].length()); if (temp == pair[0]) { // Skip ahead past this region for the next iteration offset += pair[0].length() - 1; if (state == 3 || state == 4) { result.append(u'\''); state = 0; } result.append(pair[1]); goto continue_outer; } } // No replacement found. Check if a special quote is necessary for (auto& pair : table) { UnicodeString temp = input.tempSubString(offset, pair[1].length()); if (temp == pair[1]) { if (state == 0) { result.append(u'\''); state = 4; } result.append(ch); goto continue_outer; } } // Still nothing. Copy the char verbatim. (Add a close quote if necessary) if (state == 3 || state == 4) { result.append(u'\''); state = 0; } result.append(ch); } else { U_ASSERT(state == 1 || state == 2 || state == 5); result.append(ch); state = 2; } continue_outer:; } // Resolve final quotes if (state == 3 || state == 4) { result.append(u'\''); state = 0; } if (state != 0) { // Malformed localized pattern: unterminated quote status = U_PATTERN_SYNTAX_ERROR; } return result; } void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, PatternSignType patternSignType, StandardPlural::Form plural, bool perMilleReplacesPercent, UnicodeString& output) { // Should the output render '+' where '-' would normally appear in the pattern? bool plusReplacesMinusSign = (patternSignType == PATTERN_SIGN_TYPE_POS_SIGN) && !patternInfo.positiveHasPlusSign(); // Should we use the affix from the negative subpattern? // (If not, we will use the positive subpattern.) bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && (patternSignType == PATTERN_SIGN_TYPE_NEG || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign)); // Resolve the flags for the affix pattern. int flags = 0; if (useNegativeAffixPattern) { flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; } if (isPrefix) { flags |= AffixPatternProvider::AFFIX_PREFIX; } if (plural != StandardPlural::Form::COUNT) { U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); flags |= plural; } // Should we prepend a sign to the pattern? bool prependSign; if (!isPrefix || useNegativeAffixPattern) { prependSign = false; } else if (patternSignType == PATTERN_SIGN_TYPE_NEG) { prependSign = true; } else { prependSign = plusReplacesMinusSign; } // Compute the length of the affix pattern. int length = patternInfo.length(flags) + (prependSign ? 1 : 0); // Finally, set the result into the StringBuilder. output.remove(); for (int index = 0; index < length; index++) { char16_t candidate; if (prependSign && index == 0) { candidate = u'-'; } else if (prependSign) { candidate = patternInfo.charAt(flags, index - 1); } else { candidate = patternInfo.charAt(flags, index); } if (plusReplacesMinusSign && candidate == u'-') { candidate = u'+'; } if (perMilleReplacesPercent && candidate == u'%') { candidate = u'‰'; } output.append(candidate); } } PatternSignType PatternStringUtils::resolveSignDisplay(UNumberSignDisplay signDisplay, Signum signum) { switch (signDisplay) { case UNUM_SIGN_AUTO: case UNUM_SIGN_ACCOUNTING: switch (signum) { case SIGNUM_NEG: case SIGNUM_NEG_ZERO: return PATTERN_SIGN_TYPE_NEG; case SIGNUM_POS_ZERO: case SIGNUM_POS: return PATTERN_SIGN_TYPE_POS; default: break; } break; case UNUM_SIGN_ALWAYS: case UNUM_SIGN_ACCOUNTING_ALWAYS: switch (signum) { case SIGNUM_NEG: case SIGNUM_NEG_ZERO: return PATTERN_SIGN_TYPE_NEG; case SIGNUM_POS_ZERO: case SIGNUM_POS: return PATTERN_SIGN_TYPE_POS_SIGN; default: break; } break; case UNUM_SIGN_EXCEPT_ZERO: case UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO: switch (signum) { case SIGNUM_NEG: return PATTERN_SIGN_TYPE_NEG; case SIGNUM_NEG_ZERO: case SIGNUM_POS_ZERO: return PATTERN_SIGN_TYPE_POS; case SIGNUM_POS: return PATTERN_SIGN_TYPE_POS_SIGN; default: break; } break; case UNUM_SIGN_NEVER: return PATTERN_SIGN_TYPE_POS; default: break; } UPRV_UNREACHABLE; return PATTERN_SIGN_TYPE_POS; } #endif /* #if !UCONFIG_NO_FORMATTING */