00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef __NORMALIZER2_H__
00018 #define __NORMALIZER2_H__
00019
00025 #include "unicode/utypes.h"
00026
00027 #if !UCONFIG_NO_NORMALIZATION
00028
00029 #include "unicode/uniset.h"
00030 #include "unicode/unistr.h"
00031 #include "unicode/unorm2.h"
00032
00033 U_NAMESPACE_BEGIN
00034
00078 class U_COMMON_API Normalizer2 : public UObject {
00079 public:
00084 ~Normalizer2();
00085
00086 #ifndef U_HIDE_DRAFT_API
00087
00098 static const Normalizer2 *
00099 getNFCInstance(UErrorCode &errorCode);
00100
00112 static const Normalizer2 *
00113 getNFDInstance(UErrorCode &errorCode);
00114
00126 static const Normalizer2 *
00127 getNFKCInstance(UErrorCode &errorCode);
00128
00140 static const Normalizer2 *
00141 getNFKDInstance(UErrorCode &errorCode);
00142
00154 static const Normalizer2 *
00155 getNFKCCasefoldInstance(UErrorCode &errorCode);
00156 #endif
00157
00179 static const Normalizer2 *
00180 getInstance(const char *packageName,
00181 const char *name,
00182 UNormalization2Mode mode,
00183 UErrorCode &errorCode);
00184
00195 UnicodeString
00196 normalize(const UnicodeString &src, UErrorCode &errorCode) const {
00197 UnicodeString result;
00198 normalize(src, result, errorCode);
00199 return result;
00200 }
00214 virtual UnicodeString &
00215 normalize(const UnicodeString &src,
00216 UnicodeString &dest,
00217 UErrorCode &errorCode) const = 0;
00232 virtual UnicodeString &
00233 normalizeSecondAndAppend(UnicodeString &first,
00234 const UnicodeString &second,
00235 UErrorCode &errorCode) const = 0;
00250 virtual UnicodeString &
00251 append(UnicodeString &first,
00252 const UnicodeString &second,
00253 UErrorCode &errorCode) const = 0;
00254
00268 virtual UBool
00269 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
00270
00295 virtual UBool
00296 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00297
00313 virtual UChar32
00314 composePair(UChar32 a, UChar32 b) const;
00315
00324 virtual uint8_t
00325 getCombiningClass(UChar32 c) const;
00326
00341 virtual UBool
00342 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00343
00359 virtual UNormalizationCheckResult
00360 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00361
00384 virtual int32_t
00385 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
00386
00400 virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
00401
00416 virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
00417
00431 virtual UBool isInert(UChar32 c) const = 0;
00432
00433 private:
00434
00435 virtual UClassID getDynamicClassID() const;
00436 };
00437
00449 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
00450 public:
00461 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
00462 norm2(n2), set(filterSet) {}
00463
00468 ~FilteredNormalizer2();
00469
00483 virtual UnicodeString &
00484 normalize(const UnicodeString &src,
00485 UnicodeString &dest,
00486 UErrorCode &errorCode) const;
00501 virtual UnicodeString &
00502 normalizeSecondAndAppend(UnicodeString &first,
00503 const UnicodeString &second,
00504 UErrorCode &errorCode) const;
00519 virtual UnicodeString &
00520 append(UnicodeString &first,
00521 const UnicodeString &second,
00522 UErrorCode &errorCode) const;
00523
00535 virtual UBool
00536 getDecomposition(UChar32 c, UnicodeString &decomposition) const;
00537
00549 virtual UBool
00550 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
00551
00562 virtual UChar32
00563 composePair(UChar32 a, UChar32 b) const;
00564
00573 virtual uint8_t
00574 getCombiningClass(UChar32 c) const;
00575
00587 virtual UBool
00588 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
00600 virtual UNormalizationCheckResult
00601 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
00613 virtual int32_t
00614 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
00615
00624 virtual UBool hasBoundaryBefore(UChar32 c) const;
00625
00634 virtual UBool hasBoundaryAfter(UChar32 c) const;
00635
00643 virtual UBool isInert(UChar32 c) const;
00644 private:
00645 UnicodeString &
00646 normalize(const UnicodeString &src,
00647 UnicodeString &dest,
00648 USetSpanCondition spanCondition,
00649 UErrorCode &errorCode) const;
00650
00651 UnicodeString &
00652 normalizeSecondAndAppend(UnicodeString &first,
00653 const UnicodeString &second,
00654 UBool doNormalize,
00655 UErrorCode &errorCode) const;
00656
00657 const Normalizer2 &norm2;
00658 const UnicodeSet &set;
00659 };
00660
00661 U_NAMESPACE_END
00662
00663 #endif // !UCONFIG_NO_NORMALIZATION
00664 #endif // __NORMALIZER2_H__