// © 2018 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING #ifndef __SOURCE_NUMBER_SKELETONS_H__ #define __SOURCE_NUMBER_SKELETONS_H__ #include "number_types.h" #include "numparse_types.h" #include "unicode/ucharstrie.h" #include "string_segment.h" U_NAMESPACE_BEGIN namespace number { namespace impl { // Forward-declaration struct SeenMacroProps; // namespace for enums and entrypoint functions namespace skeleton { //////////////////////////////////////////////////////////////////////////////////////// // NOTE: For examples of how to add a new stem to the number skeleton parser, see: // // https://github.com/unicode-org/icu/commit/a2a7982216b2348070dc71093775ac7195793d73 // // and // // https://github.com/unicode-org/icu/commit/6fe86f3934a8a5701034f648a8f7c5087e84aa28 // //////////////////////////////////////////////////////////////////////////////////////// /** * While parsing a skeleton, this enum records what type of option we expect to find next. */ enum ParseState { // Section 0: We expect whitespace or a stem, but not an option: STATE_NULL, // Section 1: We might accept an option, but it is not required: STATE_SCIENTIFIC, STATE_FRACTION_PRECISION, // Section 2: An option is required: STATE_INCREMENT_PRECISION, STATE_MEASURE_UNIT, STATE_PER_MEASURE_UNIT, STATE_IDENTIFIER_UNIT, STATE_UNIT_USAGE, STATE_CURRENCY_UNIT, STATE_INTEGER_WIDTH, STATE_NUMBERING_SYSTEM, STATE_SCALE, }; /** * All possible stem literals have an entry in the StemEnum. The enum name is the kebab case stem * string literal written in upper snake case. * * @see StemToObject * @see #SERIALIZED_STEM_TRIE */ enum StemEnum { // Section 1: Stems that do not require an option: STEM_COMPACT_SHORT, STEM_COMPACT_LONG, STEM_SCIENTIFIC, STEM_ENGINEERING, STEM_NOTATION_SIMPLE, STEM_BASE_UNIT, STEM_PERCENT, STEM_PERMILLE, STEM_PERCENT_100, // concise-only STEM_PRECISION_INTEGER, STEM_PRECISION_UNLIMITED, STEM_PRECISION_CURRENCY_STANDARD, STEM_PRECISION_CURRENCY_CASH, STEM_ROUNDING_MODE_CEILING, STEM_ROUNDING_MODE_FLOOR, STEM_ROUNDING_MODE_DOWN, STEM_ROUNDING_MODE_UP, STEM_ROUNDING_MODE_HALF_EVEN, STEM_ROUNDING_MODE_HALF_DOWN, STEM_ROUNDING_MODE_HALF_UP, STEM_ROUNDING_MODE_UNNECESSARY, STEM_GROUP_OFF, STEM_GROUP_MIN2, STEM_GROUP_AUTO, STEM_GROUP_ON_ALIGNED, STEM_GROUP_THOUSANDS, STEM_LATIN, STEM_UNIT_WIDTH_NARROW, STEM_UNIT_WIDTH_SHORT, STEM_UNIT_WIDTH_FULL_NAME, STEM_UNIT_WIDTH_ISO_CODE, STEM_UNIT_WIDTH_FORMAL, STEM_UNIT_WIDTH_VARIANT, STEM_UNIT_WIDTH_HIDDEN, STEM_SIGN_AUTO, STEM_SIGN_ALWAYS, STEM_SIGN_NEVER, STEM_SIGN_ACCOUNTING, STEM_SIGN_ACCOUNTING_ALWAYS, STEM_SIGN_EXCEPT_ZERO, STEM_SIGN_ACCOUNTING_EXCEPT_ZERO, STEM_DECIMAL_AUTO, STEM_DECIMAL_ALWAYS, // Section 2: Stems that DO require an option: STEM_PRECISION_INCREMENT, STEM_MEASURE_UNIT, STEM_PER_MEASURE_UNIT, STEM_UNIT, STEM_UNIT_USAGE, STEM_CURRENCY, STEM_INTEGER_WIDTH, STEM_NUMBERING_SYSTEM, STEM_SCALE, }; /** Default wildcard char, accepted on input and printed in output */ constexpr char16_t kWildcardChar = u'*'; /** Alternative wildcard char, accept on input but not printed in output */ constexpr char16_t kAltWildcardChar = u'+'; /** Checks whether the char is a wildcard on input */ inline bool isWildcardChar(char16_t c) { return c == kWildcardChar || c == kAltWildcardChar; } /** * Creates a NumberFormatter corresponding to the given skeleton string. * * @param skeletonString * A number skeleton string, possibly not in its shortest form. * @return An UnlocalizedNumberFormatter with behavior defined by the given skeleton string. */ UnlocalizedNumberFormatter create( const UnicodeString& skeletonString, UParseError* perror, UErrorCode& status); /** * Create a skeleton string corresponding to the given NumberFormatter. * * @param macros * The NumberFormatter options object. * @return A skeleton string in normalized form. */ UnicodeString generate(const MacroProps& macros, UErrorCode& status); /** * Converts from a skeleton string to a MacroProps. This method contains the primary parse loop. * * Internal: use the create() endpoint instead of this function. */ MacroProps parseSkeleton(const UnicodeString& skeletonString, int32_t& errOffset, UErrorCode& status); /** * Given that the current segment represents a stem, parse it and save the result. * * @return The next state after parsing this stem, corresponding to what subset of options to expect. */ ParseState parseStem(const StringSegment& segment, const UCharsTrie& stemTrie, SeenMacroProps& seen, MacroProps& macros, UErrorCode& status); /** * Given that the current segment represents an option, parse it and save the result. * * @return The next state after parsing this option, corresponding to what subset of options to * expect next. */ ParseState parseOption(ParseState stem, const StringSegment& segment, MacroProps& macros, UErrorCode& status); } // namespace skeleton /** * Namespace for utility methods that convert from StemEnum to corresponding objects or enums. This * applies to only the "Section 1" stems, those that are well-defined without an option. */ namespace stem_to_object { Notation notation(skeleton::StemEnum stem); MeasureUnit unit(skeleton::StemEnum stem); Precision precision(skeleton::StemEnum stem); UNumberFormatRoundingMode roundingMode(skeleton::StemEnum stem); UNumberGroupingStrategy groupingStrategy(skeleton::StemEnum stem); UNumberUnitWidth unitWidth(skeleton::StemEnum stem); UNumberSignDisplay signDisplay(skeleton::StemEnum stem); UNumberDecimalSeparatorDisplay decimalSeparatorDisplay(skeleton::StemEnum stem); } // namespace stem_to_object /** * Namespace for utility methods that convert from enums to stem strings. More complex object conversions * take place in the object_to_stem_string namespace. */ namespace enum_to_stem_string { void roundingMode(UNumberFormatRoundingMode value, UnicodeString& sb); void groupingStrategy(UNumberGroupingStrategy value, UnicodeString& sb); void unitWidth(UNumberUnitWidth value, UnicodeString& sb); void signDisplay(UNumberSignDisplay value, UnicodeString& sb); void decimalSeparatorDisplay(UNumberDecimalSeparatorDisplay value, UnicodeString& sb); } // namespace enum_to_stem_string /** * Namespace for utility methods for processing stems and options that cannot be interpreted literally. */ namespace blueprint_helpers { /** @return Whether we successfully found and parsed an exponent width option. */ bool parseExponentWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateExponentWidthOption(int32_t minExponentDigits, UnicodeString& sb, UErrorCode& status); /** @return Whether we successfully found and parsed an exponent sign option. */ bool parseExponentSignOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void parseCurrencyOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateCurrencyOption(const CurrencyUnit& currency, UnicodeString& sb, UErrorCode& status); // "measure-unit/" is deprecated in favour of "unit/". void parseMeasureUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); // "per-measure-unit/" is deprecated in favour of "unit/". void parseMeasurePerUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); /** * Parses unit identifiers like "meter-per-second" and "foot-and-inch", as * specified via a "unit/" concise skeleton. */ void parseIdentifierUnitOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void parseUnitUsageOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void parseFractionStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateFractionStem(int32_t minFrac, int32_t maxFrac, UnicodeString& sb, UErrorCode& status); void parseDigitsStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateDigitsStem(int32_t minSig, int32_t maxSig, UnicodeString& sb, UErrorCode& status); void parseScientificStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); // Note: no generateScientificStem since this syntax was added later in ICU 67 void parseIntegerStem(const StringSegment& segment, MacroProps& macros, UErrorCode& status); // Note: no generateIntegerStem since this syntax was added later in ICU 67 /** @return Whether we successfully found and parsed a frac-sig option. */ bool parseFracSigOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void parseIncrementOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateIncrementOption(double increment, int32_t trailingZeros, UnicodeString& sb, UErrorCode& status); void parseIntegerWidthOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateIntegerWidthOption(int32_t minInt, int32_t maxInt, UnicodeString& sb, UErrorCode& status); void parseNumberingSystemOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateNumberingSystemOption(const NumberingSystem& ns, UnicodeString& sb, UErrorCode& status); void parseScaleOption(const StringSegment& segment, MacroProps& macros, UErrorCode& status); void generateScaleOption(int32_t magnitude, const DecNum* arbitrary, UnicodeString& sb, UErrorCode& status); } // namespace blueprint_helpers /** * Class for utility methods for generating a token corresponding to each macro-prop. Each method * returns whether or not a token was written to the string builder. * * This needs to be a class, not a namespace, so it can be friended. */ class GeneratorHelpers { public: /** * Main skeleton generator function. Appends the normalized skeleton for the MacroProps to the given * StringBuilder. * * Internal: use the create() endpoint instead of this function. */ static void generateSkeleton(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); private: static bool notation(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool unit(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool usage(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool precision(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool roundingMode(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool grouping(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool integerWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool symbols(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool unitWidth(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool sign(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool decimal(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); static bool scale(const MacroProps& macros, UnicodeString& sb, UErrorCode& status); }; /** * Struct for null-checking. * In Java, we can just check the object reference. In C++, we need a different method. */ struct SeenMacroProps { bool notation = false; bool unit = false; bool perUnit = false; bool usage = false; bool precision = false; bool roundingMode = false; bool grouper = false; bool padder = false; bool integerWidth = false; bool symbols = false; bool unitWidth = false; bool sign = false; bool decimal = false; bool scale = false; }; namespace { #define SKELETON_UCHAR_TO_CHAR(dest, src, start, end, status) (void)(dest); \ UPRV_BLOCK_MACRO_BEGIN { \ UErrorCode conversionStatus = U_ZERO_ERROR; \ (dest).appendInvariantChars({false, (src).getBuffer() + (start), (end) - (start)}, conversionStatus); \ if (conversionStatus == U_INVARIANT_CONVERSION_ERROR) { \ /* Don't propagate the invariant conversion error; it is a skeleton syntax error */ \ (status) = U_NUMBER_SKELETON_SYNTAX_ERROR; \ return; \ } else if (U_FAILURE(conversionStatus)) { \ (status) = conversionStatus; \ return; \ } \ } UPRV_BLOCK_MACRO_END } // namespace } // namespace impl } // namespace number U_NAMESPACE_END #endif //__SOURCE_NUMBER_SKELETONS_H__ #endif /* #if !UCONFIG_NO_FORMATTING */