00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #ifndef __UCHARSTRIE_H__
00016 #define __UCHARSTRIE_H__
00017
00024 #include "unicode/utypes.h"
00025 #include "unicode/unistr.h"
00026 #include "unicode/uobject.h"
00027 #include "unicode/ustringtrie.h"
00028
00029 U_NAMESPACE_BEGIN
00030
00031 class Appendable;
00032 class UCharsTrieBuilder;
00033 class UVector32;
00034
00048 class U_COMMON_API UCharsTrie : public UMemory {
00049 public:
00064 UCharsTrie(const UChar *trieUChars)
00065 : ownedArray_(NULL), uchars_(trieUChars),
00066 pos_(uchars_), remainingMatchLength_(-1) {}
00067
00072 ~UCharsTrie();
00073
00080 UCharsTrie(const UCharsTrie &other)
00081 : ownedArray_(NULL), uchars_(other.uchars_),
00082 pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
00083
00089 UCharsTrie &reset() {
00090 pos_=uchars_;
00091 remainingMatchLength_=-1;
00092 return *this;
00093 }
00094
00100 class State : public UMemory {
00101 public:
00106 State() { uchars=NULL; }
00107 private:
00108 friend class UCharsTrie;
00109
00110 const UChar *uchars;
00111 const UChar *pos;
00112 int32_t remainingMatchLength;
00113 };
00114
00122 const UCharsTrie &saveState(State &state) const {
00123 state.uchars=uchars_;
00124 state.pos=pos_;
00125 state.remainingMatchLength=remainingMatchLength_;
00126 return *this;
00127 }
00128
00139 UCharsTrie &resetToState(const State &state) {
00140 if(uchars_==state.uchars && uchars_!=NULL) {
00141 pos_=state.pos;
00142 remainingMatchLength_=state.remainingMatchLength;
00143 }
00144 return *this;
00145 }
00146
00153 UStringTrieResult current() const;
00154
00162 inline UStringTrieResult first(int32_t uchar) {
00163 remainingMatchLength_=-1;
00164 return nextImpl(uchars_, uchar);
00165 }
00166
00175 UStringTrieResult firstForCodePoint(UChar32 cp);
00176
00183 UStringTrieResult next(int32_t uchar);
00184
00192 UStringTrieResult nextForCodePoint(UChar32 cp);
00193
00209 UStringTrieResult next(const UChar *s, int32_t length);
00210
00220 inline int32_t getValue() const {
00221 const UChar *pos=pos_;
00222 int32_t leadUnit=*pos++;
00223
00224 return leadUnit&kValueIsFinal ?
00225 readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
00226 }
00227
00237 inline UBool hasUniqueValue(int32_t &uniqueValue) const {
00238 const UChar *pos=pos_;
00239
00240 return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
00241 }
00242
00250 int32_t getNextUChars(Appendable &out) const;
00251
00256 class U_COMMON_API Iterator : public UMemory {
00257 public:
00269 Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
00270
00282 Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
00283
00288 ~Iterator();
00289
00295 Iterator &reset();
00296
00301 UBool hasNext() const;
00302
00317 UBool next(UErrorCode &errorCode);
00318
00323 const UnicodeString &getString() const { return str_; }
00328 int32_t getValue() const { return value_; }
00329
00330 private:
00331 UBool truncateAndStop() {
00332 pos_=NULL;
00333 value_=-1;
00334 return TRUE;
00335 }
00336
00337 const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode);
00338
00339 const UChar *uchars_;
00340 const UChar *pos_;
00341 const UChar *initialPos_;
00342 int32_t remainingMatchLength_;
00343 int32_t initialRemainingMatchLength_;
00344 UBool skipValue_;
00345
00346 UnicodeString str_;
00347 int32_t maxLength_;
00348 int32_t value_;
00349
00350
00351
00352
00353
00354
00355
00356
00357 UVector32 *stack_;
00358 };
00359
00360 private:
00361 friend class UCharsTrieBuilder;
00362
00369 UCharsTrie(UChar *adoptUChars, const UChar *trieUChars)
00370 : ownedArray_(adoptUChars), uchars_(trieUChars),
00371 pos_(uchars_), remainingMatchLength_(-1) {}
00372
00373
00374 UCharsTrie &operator=(const UCharsTrie &other);
00375
00376 inline void stop() {
00377 pos_=NULL;
00378 }
00379
00380
00381
00382 static inline int32_t readValue(const UChar *pos, int32_t leadUnit) {
00383 int32_t value;
00384 if(leadUnit<kMinTwoUnitValueLead) {
00385 value=leadUnit;
00386 } else if(leadUnit<kThreeUnitValueLead) {
00387 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
00388 } else {
00389 value=(pos[0]<<16)|pos[1];
00390 }
00391 return value;
00392 }
00393 static inline const UChar *skipValue(const UChar *pos, int32_t leadUnit) {
00394 if(leadUnit>=kMinTwoUnitValueLead) {
00395 if(leadUnit<kThreeUnitValueLead) {
00396 ++pos;
00397 } else {
00398 pos+=2;
00399 }
00400 }
00401 return pos;
00402 }
00403 static inline const UChar *skipValue(const UChar *pos) {
00404 int32_t leadUnit=*pos++;
00405 return skipValue(pos, leadUnit&0x7fff);
00406 }
00407
00408 static inline int32_t readNodeValue(const UChar *pos, int32_t leadUnit) {
00409
00410 int32_t value;
00411 if(leadUnit<kMinTwoUnitNodeValueLead) {
00412 value=(leadUnit>>6)-1;
00413 } else if(leadUnit<kThreeUnitNodeValueLead) {
00414 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
00415 } else {
00416 value=(pos[0]<<16)|pos[1];
00417 }
00418 return value;
00419 }
00420 static inline const UChar *skipNodeValue(const UChar *pos, int32_t leadUnit) {
00421
00422 if(leadUnit>=kMinTwoUnitNodeValueLead) {
00423 if(leadUnit<kThreeUnitNodeValueLead) {
00424 ++pos;
00425 } else {
00426 pos+=2;
00427 }
00428 }
00429 return pos;
00430 }
00431
00432 static inline const UChar *jumpByDelta(const UChar *pos) {
00433 int32_t delta=*pos++;
00434 if(delta>=kMinTwoUnitDeltaLead) {
00435 if(delta==kThreeUnitDeltaLead) {
00436 delta=(pos[0]<<16)|pos[1];
00437 pos+=2;
00438 } else {
00439 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
00440 }
00441 }
00442 return pos+delta;
00443 }
00444
00445 static const UChar *skipDelta(const UChar *pos) {
00446 int32_t delta=*pos++;
00447 if(delta>=kMinTwoUnitDeltaLead) {
00448 if(delta==kThreeUnitDeltaLead) {
00449 pos+=2;
00450 } else {
00451 ++pos;
00452 }
00453 }
00454 return pos;
00455 }
00456
00457 static inline UStringTrieResult valueResult(int32_t node) {
00458 return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
00459 }
00460
00461
00462 UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
00463
00464
00465 UStringTrieResult nextImpl(const UChar *pos, int32_t uchar);
00466
00467
00468
00469
00470 static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length,
00471 UBool haveUniqueValue, int32_t &uniqueValue);
00472
00473
00474 static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue);
00475
00476
00477
00478 static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out);
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523 static const int32_t kMaxBranchLinearSubNodeLength=5;
00524
00525
00526 static const int32_t kMinLinearMatch=0x30;
00527 static const int32_t kMaxLinearMatchLength=0x10;
00528
00529
00530
00531
00532 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
00533 static const int32_t kNodeTypeMask=kMinValueLead-1;
00534
00535
00536 static const int32_t kValueIsFinal=0x8000;
00537
00538
00539 static const int32_t kMaxOneUnitValue=0x3fff;
00540
00541 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
00542 static const int32_t kThreeUnitValueLead=0x7fff;
00543
00544 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
00545
00546
00547 static const int32_t kMaxOneUnitNodeValue=0xff;
00548 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
00549 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
00550
00551 static const int32_t kMaxTwoUnitNodeValue=
00552 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
00553
00554
00555 static const int32_t kMaxOneUnitDelta=0xfbff;
00556 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
00557 static const int32_t kThreeUnitDeltaLead=0xffff;
00558
00559 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
00560
00561 UChar *ownedArray_;
00562
00563
00564 const UChar *uchars_;
00565
00566
00567
00568
00569 const UChar *pos_;
00570
00571 int32_t remainingMatchLength_;
00572 };
00573
00574 U_NAMESPACE_END
00575
00576 #endif // __UCHARSTRIE_H__