messagepattern.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *   Copyright (C) 2011-2012, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 *******************************************************************************
00006 *   file name:  messagepattern.h
00007 *   encoding:   US-ASCII
00008 *   tab size:   8 (not used)
00009 *   indentation:4
00010 *
00011 *   created on: 2011mar14
00012 *   created by: Markus W. Scherer
00013 */
00014 
00015 #ifndef __MESSAGEPATTERN_H__
00016 #define __MESSAGEPATTERN_H__
00017 
00023 #include "unicode/utypes.h"
00024 
00025 #if !UCONFIG_NO_FORMATTING
00026 
00027 #include "unicode/parseerr.h"
00028 #include "unicode/unistr.h"
00029 
00066 enum UMessagePatternApostropheMode {
00078     UMSGPAT_APOS_DOUBLE_OPTIONAL,
00087     UMSGPAT_APOS_DOUBLE_REQUIRED
00088 };
00092 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
00093 
00098 enum UMessagePatternPartType {
00108     UMSGPAT_PART_TYPE_MSG_START,
00117     UMSGPAT_PART_TYPE_MSG_LIMIT,
00125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
00132     UMSGPAT_PART_TYPE_INSERT_CHAR,
00140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
00151     UMSGPAT_PART_TYPE_ARG_START,
00158     UMSGPAT_PART_TYPE_ARG_LIMIT,
00163     UMSGPAT_PART_TYPE_ARG_NUMBER,
00169     UMSGPAT_PART_TYPE_ARG_NAME,
00175     UMSGPAT_PART_TYPE_ARG_TYPE,
00181     UMSGPAT_PART_TYPE_ARG_STYLE,
00187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
00194     UMSGPAT_PART_TYPE_ARG_INT,
00202     UMSGPAT_PART_TYPE_ARG_DOUBLE
00203 };
00207 typedef enum UMessagePatternPartType UMessagePatternPartType;
00208 
00217 enum UMessagePatternArgType {
00222     UMSGPAT_ARG_TYPE_NONE,
00228     UMSGPAT_ARG_TYPE_SIMPLE,
00234     UMSGPAT_ARG_TYPE_CHOICE,
00244     UMSGPAT_ARG_TYPE_PLURAL,
00249     UMSGPAT_ARG_TYPE_SELECT,
00255     UMSGPAT_ARG_TYPE_SELECTORDINAL
00256 };
00260 typedef enum UMessagePatternArgType UMessagePatternArgType;
00261 
00267 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
00268     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
00269 
00270 enum {
00276     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
00277 
00285     UMSGPAT_ARG_NAME_NOT_VALID=-2
00286 };
00287 
00294 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
00295 
00296 U_NAMESPACE_BEGIN
00297 
00298 class MessagePatternDoubleList;
00299 class MessagePatternPartsList;
00300 
00357 class U_COMMON_API MessagePattern : public UObject {
00358 public:
00367     MessagePattern(UErrorCode &errorCode);
00368 
00378     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
00379 
00398     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00399 
00405     MessagePattern(const MessagePattern &other);
00406 
00413     MessagePattern &operator=(const MessagePattern &other);
00414 
00419     virtual ~MessagePattern();
00420 
00438     MessagePattern &parse(const UnicodeString &pattern,
00439                           UParseError *parseError, UErrorCode &errorCode);
00440 
00458     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
00459                                      UParseError *parseError, UErrorCode &errorCode);
00460 
00478     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
00479                                      UParseError *parseError, UErrorCode &errorCode);
00480 
00498     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
00499                                      UParseError *parseError, UErrorCode &errorCode);
00500 
00506     void clear();
00507 
00514     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
00515         clear();
00516         aposMode=mode;
00517     }
00518 
00524     UBool operator==(const MessagePattern &other) const;
00525 
00531     inline UBool operator!=(const MessagePattern &other) const {
00532         return !operator==(other);
00533     }
00534 
00539     int32_t hashCode() const;
00540 
00545     UMessagePatternApostropheMode getApostropheMode() const {
00546         return aposMode;
00547     }
00548 
00549     // Java has package-private jdkAposMode() here.
00550     // In C++, this is declared in the MessageImpl class.
00551 
00556     const UnicodeString &getPatternString() const {
00557         return msg;
00558     }
00559 
00565     UBool hasNamedArguments() const {
00566         return hasArgNames;
00567     }
00568 
00574     UBool hasNumberedArguments() const {
00575         return hasArgNumbers;
00576     }
00577 
00589     static int32_t validateArgumentName(const UnicodeString &name);
00590 
00601     UnicodeString autoQuoteApostropheDeep() const;
00602 
00603     class Part;
00604 
00611     int32_t countParts() const {
00612         return partsLength;
00613     }
00614 
00621     const Part &getPart(int32_t i) const {
00622         return parts[i];
00623     }
00624 
00632     UMessagePatternPartType getPartType(int32_t i) const {
00633         return getPart(i).type;
00634     }
00635 
00643     int32_t getPatternIndex(int32_t partIndex) const {
00644         return getPart(partIndex).index;
00645     }
00646 
00654     UnicodeString getSubstring(const Part &part) const {
00655         return msg.tempSubString(part.index, part.length);
00656     }
00657 
00665     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
00666         return 0==msg.compare(part.index, part.length, s);
00667     }
00668 
00675     double getNumericValue(const Part &part) const;
00676 
00683     double getPluralOffset(int32_t pluralStart) const;
00684 
00693     int32_t getLimitPartIndex(int32_t start) const {
00694         int32_t limit=getPart(start).limitPartIndex;
00695         if(limit<start) {
00696             return start;
00697         }
00698         return limit;
00699     }
00700 
00708     class Part : public UMemory {
00709     public:
00714         Part() {}
00715 
00721         UMessagePatternPartType getType() const {
00722             return type;
00723         }
00724 
00730         int32_t getIndex() const {
00731             return index;
00732         }
00733 
00740         int32_t getLength() const {
00741             return length;
00742         }
00743 
00750         int32_t getLimit() const {
00751             return index+length;
00752         }
00753 
00760         int32_t getValue() const {
00761             return value;
00762         }
00763 
00770         UMessagePatternArgType getArgType() const {
00771             UMessagePatternPartType type=getType();
00772             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
00773                 return (UMessagePatternArgType)value;
00774             } else {
00775                 return UMSGPAT_ARG_TYPE_NONE;
00776             }
00777         }
00778 
00786         static UBool hasNumericValue(UMessagePatternPartType type) {
00787             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
00788         }
00789 
00795         UBool operator==(const Part &other) const;
00796 
00802         inline UBool operator!=(const Part &other) const {
00803             return !operator==(other);
00804         }
00805 
00810         int32_t hashCode() const {
00811             return ((type*37+index)*37+length)*37+value;
00812         }
00813 
00814     private:
00815         friend class MessagePattern;
00816 
00817         static const int32_t MAX_LENGTH=0xffff;
00818         static const int32_t MAX_VALUE=0x7fff;
00819 
00820         // Some fields are not final because they are modified during pattern parsing.
00821         // After pattern parsing, the parts are effectively immutable.
00822         UMessagePatternPartType type;
00823         int32_t index;
00824         uint16_t length;
00825         int16_t value;
00826         int32_t limitPartIndex;
00827     };
00828 
00829 private:
00830     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
00831 
00832     void postParse();
00833 
00834     int32_t parseMessage(int32_t index, int32_t msgStartLength,
00835                          int32_t nestingLevel, UMessagePatternArgType parentType,
00836                          UParseError *parseError, UErrorCode &errorCode);
00837 
00838     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
00839                      UParseError *parseError, UErrorCode &errorCode);
00840 
00841     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
00842 
00843     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
00844                              UParseError *parseError, UErrorCode &errorCode);
00845 
00846     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
00847                                      UParseError *parseError, UErrorCode &errorCode);
00848 
00857     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
00858 
00859     int32_t parseArgNumber(int32_t start, int32_t limit) {
00860         return parseArgNumber(msg, start, limit);
00861     }
00862 
00871     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
00872                      UParseError *parseError, UErrorCode &errorCode);
00873 
00874     // Java has package-private appendReducedApostrophes() here.
00875     // In C++, this is declared in the MessageImpl class.
00876 
00877     int32_t skipWhiteSpace(int32_t index);
00878 
00879     int32_t skipIdentifier(int32_t index);
00880 
00885     int32_t skipDouble(int32_t index);
00886 
00887     static UBool isArgTypeChar(UChar32 c);
00888 
00889     UBool isChoice(int32_t index);
00890 
00891     UBool isPlural(int32_t index);
00892 
00893     UBool isSelect(int32_t index);
00894 
00895     UBool isOrdinal(int32_t index);
00896 
00901     UBool inMessageFormatPattern(int32_t nestingLevel);
00902 
00907     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
00908 
00909     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
00910                  int32_t value, UErrorCode &errorCode);
00911 
00912     void addLimitPart(int32_t start,
00913                       UMessagePatternPartType type, int32_t index, int32_t length,
00914                       int32_t value, UErrorCode &errorCode);
00915 
00916     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
00917 
00918     void setParseError(UParseError *parseError, int32_t index);
00919 
00920     // No ICU "poor man's RTTI" for this class nor its subclasses.
00921     virtual UClassID getDynamicClassID() const;
00922 
00923     UBool init(UErrorCode &errorCode);
00924     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
00925 
00926     UMessagePatternApostropheMode aposMode;
00927     UnicodeString msg;
00928     // ArrayList<Part> parts=new ArrayList<Part>();
00929     MessagePatternPartsList *partsList;
00930     Part *parts;
00931     int32_t partsLength;
00932     // ArrayList<Double> numericValues;
00933     MessagePatternDoubleList *numericValuesList;
00934     double *numericValues;
00935     int32_t numericValuesLength;
00936     UBool hasArgNames;
00937     UBool hasArgNumbers;
00938     UBool needsAutoQuoting;
00939 };
00940 
00941 U_NAMESPACE_END
00942 
00943 #endif  // !UCONFIG_NO_FORMATTING
00944 
00945 #endif  // __MESSAGEPATTERN_H__

Generated on 25 Nov 2014 for ICU 50.1.2 by  doxygen 1.4.7