00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #ifndef __NGRAM_MODEL_INTERNAL_H__
00044 #define __NGRAM_MODEL_INTERNAL_H__
00045
00046 #include "ngram_model.h"
00047 #include "hash_table.h"
00048
00055 struct ngram_model_s {
00056 int refcount;
00057 int32 *n_counts;
00058 int32 n_1g_alloc;
00059 int32 n_words;
00061 uint8 n;
00062 uint8 n_classes;
00063 uint8 writable;
00064 uint8 flags;
00066 logmath_t *lmath;
00067 float32 lw;
00068 int32 log_wip;
00069 int32 log_uw;
00070 int32 log_uniform;
00071 int32 log_uniform_weight;
00072 int32 log_zero;
00073 char **word_str;
00074 hash_table_t *wid;
00075 int32 *tmp_wids;
00076 struct ngram_class_s **classes;
00077 struct ngram_funcs_s *funcs;
00078 };
00079
00083 struct ngram_class_s {
00084 int32 tag_wid;
00085 int32 start_wid;
00086 int32 n_words;
00087 int32 *prob1;
00091 struct ngram_hash_s {
00092 int32 wid;
00093 int32 prob1;
00094 int32 next;
00095 } *nword_hash;
00096 int32 n_hash;
00097 int32 n_hash_inuse;
00098 };
00099
00100 #define NGRAM_HASH_SIZE 128
00101
00102 #define NGRAM_BASEWID(wid) ((wid)&0xffffff)
00103 #define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f)
00104 #define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid))
00105 #define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000)
00106
00107 #define UG_ALLOC_STEP 10
00108
00110 typedef struct ngram_funcs_s {
00114 void (*free)(ngram_model_t *model);
00118 int (*apply_weights)(ngram_model_t *model,
00119 float32 lw,
00120 float32 wip,
00121 float32 uw);
00125 int32 (*score)(ngram_model_t *model,
00126 int32 wid,
00127 int32 *history,
00128 int32 n_hist,
00129 int32 *n_used);
00134 int32 (*raw_score)(ngram_model_t *model,
00135 int32 wid,
00136 int32 *history,
00137 int32 n_hist,
00138 int32 *n_used);
00150 int32 (*add_ug)(ngram_model_t *model,
00151 int32 wid, int32 lweight);
00155 void (*flush)(ngram_model_t *model);
00156
00160 ngram_iter_t * (*iter)(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist);
00161
00165 ngram_iter_t * (*mgrams)(ngram_model_t *model, int32 m);
00166
00170 ngram_iter_t * (*successors)(ngram_iter_t *itor);
00171
00175 int32 const * (*iter_get)(ngram_iter_t *itor,
00176 int32 *out_score,
00177 int32 *out_bowt);
00178
00182 ngram_iter_t * (*iter_next)(ngram_iter_t *itor);
00183
00187 void (*iter_free)(ngram_iter_t *itor);
00188 } ngram_funcs_t;
00189
00193 struct ngram_iter_s {
00194 ngram_model_t *model;
00195 int32 *wids;
00196 int16 m;
00197 int16 successor;
00198 };
00199
00203 typedef struct classdef_s {
00204 char **words;
00205 float32 *weights;
00206 int32 n_words;
00207 } classdef_t;
00208
00212 int32
00213 ngram_model_init(ngram_model_t *model,
00214 ngram_funcs_t *funcs,
00215 logmath_t *lmath,
00216 int32 n, int32 n_unigram);
00217
00221 ngram_model_t *ngram_model_arpa_read(cmd_ln_t *config,
00222 const char *file_name,
00223 logmath_t *lmath);
00227 ngram_model_t *ngram_model_dmp_read(cmd_ln_t *config,
00228 const char *file_name,
00229 logmath_t *lmath);
00233 ngram_model_t *ngram_model_dmp32_read(cmd_ln_t *config,
00234 const char *file_name,
00235 logmath_t *lmath);
00236
00240 int ngram_model_arpa_write(ngram_model_t *model,
00241 const char *file_name);
00245 int ngram_model_dmp_write(ngram_model_t *model,
00246 const char *file_name);
00247
00251 int32 read_classdef_file(hash_table_t *classes, const char *classdef_file);
00252
00256 void classdef_free(classdef_t *classdef);
00257
00261 ngram_class_t *ngram_class_new(ngram_model_t *model, int32 tag_wid,
00262 int32 start_wid, glist_t classwords);
00263
00267 void ngram_class_free(ngram_class_t *lmclass);
00268
00274 int32 ngram_class_prob(ngram_class_t *lmclass, int32 wid);
00275
00279 void ngram_iter_init(ngram_iter_t *itor, ngram_model_t *model,
00280 int m, int successor);
00281
00282 #endif