00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00042 #include "ngram_model_set.h"
00043
00044 #include <err.h>
00045 #include <ckd_alloc.h>
00046 #include <strfuncs.h>
00047 #include <filename.h>
00048
00049 #include <string.h>
00050 #include <stdlib.h>
00051
00052 static ngram_funcs_t ngram_model_set_funcs;
00053
00054 static int
00055 my_compare(const void *a, const void *b)
00056 {
00057
00058 if (strcmp(*(char * const *)a, "<UNK>") == 0)
00059 return -1;
00060 else if (strcmp(*(char * const *)b, "<UNK>") == 0)
00061 return 1;
00062 else
00063 return strcmp(*(char * const *)a, *(char * const *)b);
00064 }
00065
00066 static void
00067 build_widmap(ngram_model_t *base, logmath_t *lmath, int32 n)
00068 {
00069 ngram_model_set_t *set = (ngram_model_set_t *)base;
00070 ngram_model_t **models = set->lms;
00071 hash_table_t *vocab;
00072 glist_t hlist;
00073 gnode_t *gn;
00074 int32 i;
00075
00076
00077 vocab = hash_table_new(models[0]->n_words, FALSE);
00078
00079 for (i = 0; i < set->n_models; ++i) {
00080 int32 j;
00081 for (j = 0; j < models[i]->n_words; ++j) {
00082
00083 (void)hash_table_enter_int32(vocab, models[i]->word_str[j], j);
00084 }
00085 }
00086
00087 if (hash_table_lookup(vocab, "<UNK>", NULL) != 0)
00088 (void)hash_table_enter_int32(vocab, "<UNK>", 0);
00089
00090 ngram_model_init(base, &ngram_model_set_funcs, lmath, n, hash_table_inuse(vocab));
00091 base->writable = FALSE;
00092 i = 0;
00093 hlist = hash_table_tolist(vocab, NULL);
00094 for (gn = hlist; gn; gn = gnode_next(gn)) {
00095 hash_entry_t *ent = gnode_ptr(gn);
00096 base->word_str[i++] = (char *)ent->key;
00097 }
00098 glist_free(hlist);
00099 qsort(base->word_str, base->n_words, sizeof(*base->word_str), my_compare);
00100
00101
00102 if (set->widmap)
00103 ckd_free_2d((void **)set->widmap);
00104 set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models,
00105 sizeof(**set->widmap));
00106 for (i = 0; i < base->n_words; ++i) {
00107 int32 j;
00108
00109 (void)hash_table_enter_int32(base->wid, base->word_str[i], i);
00110
00111 for (j = 0; j < set->n_models; ++j) {
00112 set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]);
00113
00114 }
00115
00116 }
00117 hash_table_free(vocab);
00118 }
00119
00120 ngram_model_t *
00121 ngram_model_set_init(cmd_ln_t *config,
00122 ngram_model_t **models,
00123 char **names,
00124 const float32 *weights,
00125 int32 n_models)
00126 {
00127 ngram_model_set_t *model;
00128 ngram_model_t *base;
00129 logmath_t *lmath;
00130 int32 i, n;
00131
00132 if (n_models == 0)
00133 return NULL;
00134
00135
00136
00137 lmath = models[0]->lmath;
00138 for (i = 1; i < n_models; ++i) {
00139 if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath)
00140 || logmath_get_shift(models[i]->lmath) != logmath_get_shift(lmath)) {
00141 E_ERROR("Log-math parameters don't match, will not create LM set\n");
00142 return NULL;
00143 }
00144 }
00145
00146
00147 model = ckd_calloc(1, sizeof(*model));
00148 base = &model->base;
00149 model->n_models = n_models;
00150 model->lms = ckd_calloc(n_models, sizeof(*model->lms));
00151 model->names = ckd_calloc(n_models, sizeof(*model->names));
00152
00153 model->lweights = ckd_calloc(n_models, sizeof(*model->lweights));
00154 {
00155 int32 uniform = logmath_log(lmath, 1.0/n_models);
00156 for (i = 0; i < n_models; ++i)
00157 model->lweights[i] = uniform;
00158 }
00159
00160 if (weights)
00161 model->cur = -1;
00162
00163 n = 0;
00164 for (i = 0; i < n_models; ++i) {
00165 model->lms[i] = models[i];
00166 model->names[i] = ckd_salloc(names[i]);
00167 if (weights)
00168 model->lweights[i] = logmath_log(lmath, weights[i]);
00169
00170 if (models[i]->n > n)
00171 n = models[i]->n;
00172 }
00173
00174 model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist));
00175
00176
00177 build_widmap(base, lmath, n);
00178 return base;
00179 }
00180
00181 ngram_model_t *
00182 ngram_model_set_read(cmd_ln_t *config,
00183 const char *lmctlfile,
00184 logmath_t *lmath)
00185 {
00186 FILE *ctlfp;
00187 glist_t lms = NULL;
00188 glist_t lmnames = NULL;
00189 __BIGSTACKVARIABLE__ char str[1024];
00190 ngram_model_t *set = NULL;
00191 hash_table_t *classes;
00192 char *basedir, *c;
00193
00194
00195
00196 classes = hash_table_new(0, FALSE);
00197 if ((ctlfp = fopen(lmctlfile, "r")) == NULL) {
00198 E_ERROR_SYSTEM("Failed to open %s", lmctlfile);
00199 return NULL;
00200 }
00201
00202
00203
00204 if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) {
00205
00206 basedir = ckd_calloc(c - lmctlfile + 2, 1);
00207 memcpy(basedir, lmctlfile, c - lmctlfile + 1);
00208 }
00209 else {
00210 basedir = NULL;
00211 }
00212 E_INFO("Reading LM control file '%s'\n", lmctlfile);
00213 if (basedir)
00214 E_INFO("Will prepend '%s' to unqualified paths\n", basedir);
00215
00216 if (fscanf(ctlfp, "%1023s", str) == 1) {
00217 if (strcmp(str, "{") == 0) {
00218
00219 while ((fscanf(ctlfp, "%1023s", str) == 1)
00220 && (strcmp(str, "}") != 0)) {
00221 char *deffile;
00222 if (basedir && !path_is_absolute(str))
00223 deffile = string_join(basedir, str, NULL);
00224 else
00225 deffile = ckd_salloc(str);
00226 E_INFO("Reading classdef from '%s'\n", deffile);
00227 if (read_classdef_file(classes, deffile) < 0) {
00228 ckd_free(deffile);
00229 goto error_out;
00230 }
00231 ckd_free(deffile);
00232 }
00233
00234 if (strcmp(str, "}") != 0) {
00235 E_ERROR("Unexpected EOF in %s\n", lmctlfile);
00236 goto error_out;
00237 }
00238
00239
00240 if (fscanf(ctlfp, "%1023s", str) != 1)
00241 str[0] = '\0';
00242 }
00243 }
00244 else
00245 str[0] = '\0';
00246
00247
00248 while (str[0] != '\0') {
00249 char *lmfile;
00250 ngram_model_t *lm;
00251
00252 if (basedir && str[0] != '/' && str[0] != '\\')
00253 lmfile = string_join(basedir, str, NULL);
00254 else
00255 lmfile = ckd_salloc(str);
00256 E_INFO("Reading lm from '%s'\n", lmfile);
00257 lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath);
00258 if (lm == NULL) {
00259 ckd_free(lmfile);
00260 goto error_out;
00261 }
00262 if (fscanf(ctlfp, "%1023s", str) != 1) {
00263 E_ERROR("LMname missing after LMFileName '%s'\n", lmfile);
00264 ckd_free(lmfile);
00265 goto error_out;
00266 }
00267 ckd_free(lmfile);
00268 lms = glist_add_ptr(lms, lm);
00269 lmnames = glist_add_ptr(lmnames, ckd_salloc(str));
00270
00271 if (fscanf(ctlfp, "%1023s", str) == 1) {
00272 if (strcmp(str, "{") == 0) {
00273
00274 while ((fscanf(ctlfp, "%1023s", str) == 1) &&
00275 (strcmp(str, "}") != 0)) {
00276 void *val;
00277 classdef_t *classdef;
00278
00279 if (hash_table_lookup(classes, str, &val) == -1) {
00280 E_ERROR("Unknown class %s in control file\n", str);
00281 goto error_out;
00282 }
00283 classdef = val;
00284 if (ngram_model_add_class(lm, str, 1.0,
00285 classdef->words, classdef->weights,
00286 classdef->n_words) < 0) {
00287 goto error_out;
00288 }
00289 E_INFO("Added class %s containing %d words\n",
00290 str, classdef->n_words);
00291 }
00292 if (strcmp(str, "}") != 0) {
00293 E_ERROR("Unexpected EOF in %s\n", lmctlfile);
00294 goto error_out;
00295 }
00296 if (fscanf(ctlfp, "%1023s", str) != 1)
00297 str[0] = '\0';
00298 }
00299 }
00300 else
00301 str[0] = '\0';
00302 }
00303 fclose(ctlfp);
00304
00305
00306
00307 lms = glist_reverse(lms);
00308 lmnames = glist_reverse(lmnames);
00309 {
00310 int32 n_models;
00311 ngram_model_t **lm_array;
00312 char **name_array;
00313 gnode_t *lm_node, *name_node;
00314 int32 i;
00315
00316 n_models = glist_count(lms);
00317 lm_array = ckd_calloc(n_models, sizeof(*lm_array));
00318 name_array = ckd_calloc(n_models, sizeof(*name_array));
00319 lm_node = lms;
00320 name_node = lmnames;
00321 for (i = 0; i < n_models; ++i) {
00322 lm_array[i] = gnode_ptr(lm_node);
00323 name_array[i] = gnode_ptr(name_node);
00324 lm_node = gnode_next(lm_node);
00325 name_node = gnode_next(name_node);
00326 }
00327 set = ngram_model_set_init(config, lm_array, name_array,
00328 NULL, n_models);
00329 ckd_free(lm_array);
00330 ckd_free(name_array);
00331 }
00332 error_out:
00333 {
00334 gnode_t *gn;
00335 glist_t hlist;
00336
00337 if (set == NULL) {
00338 for (gn = lms; gn; gn = gnode_next(gn)) {
00339 ngram_model_free(gnode_ptr(gn));
00340 }
00341 }
00342 glist_free(lms);
00343 for (gn = lmnames; gn; gn = gnode_next(gn)) {
00344 ckd_free(gnode_ptr(gn));
00345 }
00346 glist_free(lmnames);
00347 hlist = hash_table_tolist(classes, NULL);
00348 for (gn = hlist; gn; gn = gnode_next(gn)) {
00349 hash_entry_t *he = gnode_ptr(gn);
00350 ckd_free((char *)he->key);
00351 classdef_free(he->val);
00352 }
00353 glist_free(hlist);
00354 hash_table_free(classes);
00355 ckd_free(basedir);
00356 }
00357 return set;
00358 }
00359
00360 int32
00361 ngram_model_set_count(ngram_model_t *base)
00362 {
00363 ngram_model_set_t *set = (ngram_model_set_t *)base;
00364 return set->n_models;
00365 }
00366
00367 ngram_model_set_iter_t *
00368 ngram_model_set_iter(ngram_model_t *base)
00369 {
00370 ngram_model_set_t *set = (ngram_model_set_t *)base;
00371 ngram_model_set_iter_t *itor;
00372
00373 if (set == NULL || set->n_models == 0)
00374 return NULL;
00375 itor = ckd_calloc(1, sizeof(*itor));
00376 itor->set = set;
00377 return itor;
00378 }
00379
00380 ngram_model_set_iter_t *
00381 ngram_model_set_iter_next(ngram_model_set_iter_t *itor)
00382 {
00383 if (++itor->cur == itor->set->n_models) {
00384 ngram_model_set_iter_free(itor);
00385 return NULL;
00386 }
00387 return itor;
00388 }
00389
00390 void
00391 ngram_model_set_iter_free(ngram_model_set_iter_t *itor)
00392 {
00393 ckd_free(itor);
00394 }
00395
00396 ngram_model_t *
00397 ngram_model_set_iter_model(ngram_model_set_iter_t *itor,
00398 char const **lmname)
00399 {
00400 if (lmname) *lmname = itor->set->names[itor->cur];
00401 return itor->set->lms[itor->cur];
00402 }
00403
00404 ngram_model_t *
00405 ngram_model_set_lookup(ngram_model_t *base,
00406 const char *name)
00407 {
00408 ngram_model_set_t *set = (ngram_model_set_t *)base;
00409 int32 i;
00410
00411 if (name == NULL) {
00412 if (set->cur == -1)
00413 return NULL;
00414 else
00415 return set->lms[set->cur];
00416 }
00417
00418
00419 for (i = 0; i < set->n_models; ++i)
00420 if (0 == strcmp(set->names[i], name))
00421 break;
00422 if (i == set->n_models)
00423 return NULL;
00424 return set->lms[i];
00425 }
00426
00427 ngram_model_t *
00428 ngram_model_set_select(ngram_model_t *base,
00429 const char *name)
00430 {
00431 ngram_model_set_t *set = (ngram_model_set_t *)base;
00432 int32 i;
00433
00434
00435 for (i = 0; i < set->n_models; ++i)
00436 if (0 == strcmp(set->names[i], name))
00437 break;
00438 if (i == set->n_models)
00439 return NULL;
00440 set->cur = i;
00441 return set->lms[set->cur];
00442 }
00443
00444 const char *
00445 ngram_model_set_current(ngram_model_t *base)
00446 {
00447 ngram_model_set_t *set = (ngram_model_set_t *)base;
00448
00449 if (set->cur == -1)
00450 return NULL;
00451 else
00452 return set->names[set->cur];
00453 }
00454
00455 int32
00456 ngram_model_set_current_wid(ngram_model_t *base,
00457 int32 set_wid)
00458 {
00459 ngram_model_set_t *set = (ngram_model_set_t *)base;
00460
00461 if (set->cur == -1 || set_wid >= base->n_words)
00462 return NGRAM_INVALID_WID;
00463 else
00464 return set->widmap[set_wid][set->cur];
00465 }
00466
00467 int32
00468 ngram_model_set_known_wid(ngram_model_t *base,
00469 int32 set_wid)
00470 {
00471 ngram_model_set_t *set = (ngram_model_set_t *)base;
00472
00473 if (set_wid >= base->n_words)
00474 return FALSE;
00475 else if (set->cur == -1) {
00476 int32 i;
00477 for (i = 0; i < set->n_models; ++i) {
00478 if (set->widmap[set_wid][i] != ngram_unknown_wid(set->lms[i]))
00479 return TRUE;
00480 }
00481 return FALSE;
00482 }
00483 else
00484 return (set->widmap[set_wid][set->cur]
00485 != ngram_unknown_wid(set->lms[set->cur]));
00486 }
00487
00488 ngram_model_t *
00489 ngram_model_set_interp(ngram_model_t *base,
00490 const char **names,
00491 const float32 *weights)
00492 {
00493 ngram_model_set_t *set = (ngram_model_set_t *)base;
00494
00495
00496 if (names && weights) {
00497 int32 i, j;
00498
00499
00500 for (i = 0; i < set->n_models; ++i) {
00501 for (j = 0; j < set->n_models; ++j)
00502 if (0 == strcmp(names[i], set->names[j]))
00503 break;
00504 if (j == set->n_models) {
00505 E_ERROR("Unknown LM name %s\n", names[i]);
00506 return NULL;
00507 }
00508 set->lweights[j] = logmath_log(base->lmath, weights[i]);
00509 }
00510 }
00511 else if (weights) {
00512 memcpy(set->lweights, weights, set->n_models * sizeof(*set->lweights));
00513 }
00514
00515 set->cur = -1;
00516 return base;
00517 }
00518
00519 ngram_model_t *
00520 ngram_model_set_add(ngram_model_t *base,
00521 ngram_model_t *model,
00522 const char *name,
00523 float32 weight,
00524 int reuse_widmap)
00525
00526 {
00527 ngram_model_set_t *set = (ngram_model_set_t *)base;
00528 float32 fprob;
00529 int32 scale, i;
00530
00531
00532 ++set->n_models;
00533 set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms));
00534 set->lms[set->n_models - 1] = model;
00535 set->names = ckd_realloc(set->names, set->n_models * sizeof(*set->names));
00536 set->names[set->n_models - 1] = ckd_salloc(name);
00537
00538 if (model->n > base->n) {
00539 base->n = model->n;
00540 set->maphist = ckd_realloc(set->maphist,
00541 (model->n - 1) * sizeof(*set->maphist));
00542 }
00543
00544
00545 fprob = weight * 1.0 / set->n_models;
00546 set->lweights = ckd_realloc(set->lweights,
00547 set->n_models * sizeof(*set->lweights));
00548 set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob);
00549
00550
00551
00552 scale = logmath_log(base->lmath, 1.0 - fprob);
00553 for (i = 0; i < set->n_models - 1; ++i)
00554 set->lweights[i] += scale;
00555
00556
00557 if (reuse_widmap) {
00558 int32 **new_widmap;
00559
00560
00561 new_widmap = (int32 **)ckd_calloc_2d(base->n_words, set->n_models,
00562 sizeof (**new_widmap));
00563 for (i = 0; i < base->n_words; ++i) {
00564
00565 memcpy(new_widmap[i], set->widmap[i],
00566 (set->n_models - 1) * sizeof(**new_widmap));
00567
00568 new_widmap[i][set->n_models-1] = ngram_wid(model, base->word_str[i]);
00569 }
00570 ckd_free_2d((void **)set->widmap);
00571 set->widmap = new_widmap;
00572 }
00573 else {
00574 build_widmap(base, base->lmath, base->n);
00575 }
00576 return model;
00577 }
00578
00579 ngram_model_t *
00580 ngram_model_set_remove(ngram_model_t *base,
00581 const char *name,
00582 int reuse_widmap)
00583 {
00584 ngram_model_set_t *set = (ngram_model_set_t *)base;
00585 ngram_model_t *submodel;
00586 int32 lmidx, scale, n, i;
00587 float32 fprob;
00588
00589 for (lmidx = 0; lmidx < set->n_models; ++lmidx)
00590 if (0 == strcmp(name, set->names[lmidx]))
00591 break;
00592 if (lmidx == set->n_models)
00593 return NULL;
00594 submodel = set->lms[lmidx];
00595
00596
00597
00598 fprob = logmath_exp(base->lmath, set->lweights[lmidx]);
00599 scale = logmath_log(base->lmath, 1.0 - fprob);
00600
00601
00602
00603 --set->n_models;
00604 n = 0;
00605 ckd_free(set->names[lmidx]);
00606 set->names[lmidx] = NULL;
00607 for (i = 0; i < set->n_models; ++i) {
00608 if (i >= lmidx) {
00609 set->lms[i] = set->lms[i+1];
00610 set->names[i] = set->names[i+1];
00611 set->lweights[i] = set->lweights[i+1];
00612 }
00613 set->lweights[i] -= scale;
00614 if (set->lms[i]->n > n)
00615 n = set->lms[i]->n;
00616 }
00617
00618 set->lms[set->n_models] = NULL;
00619 set->lweights[set->n_models] = base->log_zero;
00620
00621
00622
00623 if (reuse_widmap) {
00624
00625 for (i = 0; i < base->n_words; ++i) {
00626 memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1,
00627 (set->n_models - lmidx) * sizeof(**set->widmap));
00628 }
00629 }
00630 else {
00631 build_widmap(base, base->lmath, n);
00632 }
00633 return submodel;
00634 }
00635
00636 void
00637 ngram_model_set_map_words(ngram_model_t *base,
00638 const char **words,
00639 int32 n_words)
00640 {
00641 ngram_model_set_t *set = (ngram_model_set_t *)base;
00642 int32 i;
00643
00644
00645 if (base->writable) {
00646 for (i = 0; i < base->n_words; ++i) {
00647 ckd_free(base->word_str[i]);
00648 }
00649 }
00650 ckd_free(base->word_str);
00651 ckd_free_2d((void **)set->widmap);
00652 base->writable = TRUE;
00653 base->n_words = base->n_1g_alloc = n_words;
00654 base->word_str = ckd_calloc(n_words, sizeof(*base->word_str));
00655 set->widmap = (int32 **)ckd_calloc_2d(n_words, set->n_models, sizeof(**set->widmap));
00656 hash_table_empty(base->wid);
00657 for (i = 0; i < n_words; ++i) {
00658 int32 j;
00659 base->word_str[i] = ckd_salloc(words[i]);
00660 (void)hash_table_enter_int32(base->wid, base->word_str[i], i);
00661 for (j = 0; j < set->n_models; ++j) {
00662 set->widmap[i][j] = ngram_wid(set->lms[j], base->word_str[i]);
00663 }
00664 }
00665 }
00666
00667 static int
00668 ngram_model_set_apply_weights(ngram_model_t *base, float32 lw,
00669 float32 wip, float32 uw)
00670 {
00671 ngram_model_set_t *set = (ngram_model_set_t *)base;
00672 int32 i;
00673
00674
00675 for (i = 0; i < set->n_models; ++i)
00676 ngram_model_apply_weights(set->lms[i], lw, wip, uw);
00677 return 0;
00678 }
00679
00680 static int32
00681 ngram_model_set_score(ngram_model_t *base, int32 wid,
00682 int32 *history, int32 n_hist,
00683 int32 *n_used)
00684 {
00685 ngram_model_set_t *set = (ngram_model_set_t *)base;
00686 int32 mapwid;
00687 int32 score;
00688 int32 i;
00689
00690
00691 if (n_hist > base->n - 1)
00692 n_hist = base->n - 1;
00693
00694
00695 if (set->cur == -1) {
00696 score = base->log_zero;
00697 for (i = 0; i < set->n_models; ++i) {
00698 int32 j;
00699
00700 mapwid = set->widmap[wid][i];
00701 for (j = 0; j < n_hist; ++j) {
00702 if (history[j] == NGRAM_INVALID_WID)
00703 set->maphist[j] = NGRAM_INVALID_WID;
00704 else
00705 set->maphist[j] = set->widmap[history[j]][i];
00706 }
00707 score = logmath_add(base->lmath, score,
00708 set->lweights[i] +
00709 ngram_ng_score(set->lms[i],
00710 mapwid, set->maphist, n_hist, n_used));
00711 }
00712 }
00713 else {
00714 int32 j;
00715
00716 mapwid = set->widmap[wid][set->cur];
00717 for (j = 0; j < n_hist; ++j) {
00718 if (history[j] == NGRAM_INVALID_WID)
00719 set->maphist[j] = NGRAM_INVALID_WID;
00720 else
00721 set->maphist[j] = set->widmap[history[j]][set->cur];
00722 }
00723 score = ngram_ng_score(set->lms[set->cur],
00724 mapwid, set->maphist, n_hist, n_used);
00725 }
00726
00727 return score;
00728 }
00729
00730 static int32
00731 ngram_model_set_raw_score(ngram_model_t *base, int32 wid,
00732 int32 *history, int32 n_hist,
00733 int32 *n_used)
00734 {
00735 ngram_model_set_t *set = (ngram_model_set_t *)base;
00736 int32 mapwid;
00737 int32 score;
00738 int32 i;
00739
00740
00741 if (n_hist > base->n - 1)
00742 n_hist = base->n - 1;
00743
00744
00745 if (set->cur == -1) {
00746 score = base->log_zero;
00747 for (i = 0; i < set->n_models; ++i) {
00748 int32 j;
00749
00750 mapwid = set->widmap[wid][i];
00751 for (j = 0; j < n_hist; ++j) {
00752 if (history[j] == NGRAM_INVALID_WID)
00753 set->maphist[j] = NGRAM_INVALID_WID;
00754 else
00755 set->maphist[j] = set->widmap[history[j]][i];
00756 }
00757 score = logmath_add(base->lmath, score,
00758 set->lweights[i] +
00759 ngram_ng_prob(set->lms[i],
00760 mapwid, set->maphist, n_hist, n_used));
00761 }
00762 }
00763 else {
00764 int32 j;
00765
00766 mapwid = set->widmap[wid][set->cur];
00767 for (j = 0; j < n_hist; ++j) {
00768 if (history[j] == NGRAM_INVALID_WID)
00769 set->maphist[j] = NGRAM_INVALID_WID;
00770 else
00771 set->maphist[j] = set->widmap[history[j]][set->cur];
00772 }
00773 score = ngram_ng_prob(set->lms[set->cur],
00774 mapwid, set->maphist, n_hist, n_used);
00775 }
00776
00777 return score;
00778 }
00779
00780 static int32
00781 ngram_model_set_add_ug(ngram_model_t *base,
00782 int32 wid, int32 lweight)
00783 {
00784 ngram_model_set_t *set = (ngram_model_set_t *)base;
00785 int32 *newwid;
00786 int32 i, prob;
00787
00788
00789
00790
00791 newwid = ckd_calloc(set->n_models, sizeof(*newwid));
00792 prob = base->log_zero;
00793 for (i = 0; i < set->n_models; ++i) {
00794 int32 wprob, n_hist;
00795
00796
00797 if (set->cur == -1 || set->cur == i) {
00798
00799 newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]);
00800 if (newwid[i] == NGRAM_INVALID_WID) {
00801
00802 newwid[i] = ngram_model_add_word(set->lms[i], base->word_str[wid],
00803 logmath_exp(base->lmath, lweight));
00804 if (newwid[i] == NGRAM_INVALID_WID) {
00805 ckd_free(newwid);
00806 return base->log_zero;
00807 }
00808 }
00809
00810
00811 wprob = ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist);
00812 if (set->cur == i)
00813 prob = wprob;
00814 else if (set->cur == -1)
00815 prob = logmath_add(base->lmath, prob, set->lweights[i] + wprob);
00816 }
00817 else {
00818 newwid[i] = NGRAM_INVALID_WID;
00819 }
00820 }
00821
00822
00823
00824 set->widmap = ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap));
00825 set->widmap[0] = ckd_realloc(set->widmap[0],
00826 base->n_words
00827 * set->n_models
00828 * sizeof(**set->widmap));
00829 for (i = 0; i < base->n_words; ++i)
00830 set->widmap[i] = set->widmap[0] + i * set->n_models;
00831 memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid));
00832 ckd_free(newwid);
00833 return prob;
00834 }
00835
00836 static void
00837 ngram_model_set_free(ngram_model_t *base)
00838 {
00839 ngram_model_set_t *set = (ngram_model_set_t *)base;
00840 int32 i;
00841
00842 for (i = 0; i < set->n_models; ++i)
00843 ngram_model_free(set->lms[i]);
00844 ckd_free(set->lms);
00845 for (i = 0; i < set->n_models; ++i)
00846 ckd_free(set->names[i]);
00847 ckd_free(set->names);
00848 ckd_free(set->lweights);
00849 ckd_free(set->maphist);
00850 ckd_free_2d((void **)set->widmap);
00851 }
00852
00853 static void
00854 ngram_model_set_flush(ngram_model_t *base)
00855 {
00856 ngram_model_set_t *set = (ngram_model_set_t *)base;
00857 int32 i;
00858
00859 for (i = 0; i < set->n_models; ++i)
00860 ngram_model_flush(set->lms[i]);
00861 }
00862
00863 static ngram_funcs_t ngram_model_set_funcs = {
00864 ngram_model_set_free,
00865 ngram_model_set_apply_weights,
00866 ngram_model_set_score,
00867 ngram_model_set_raw_score,
00868 ngram_model_set_add_ug,
00869 ngram_model_set_flush
00870 };