00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00042 #include <err.h>
00043
00044 #include "phone_loop_search.h"
00045
00046 static int phone_loop_search_start(ps_search_t *search);
00047 static int phone_loop_search_step(ps_search_t *search, int frame_idx);
00048 static int phone_loop_search_finish(ps_search_t *search);
00049 static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p);
00050 static void phone_loop_search_free(ps_search_t *search);
00051 static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score);
00052 static int32 phone_loop_search_prob(ps_search_t *search);
00053 static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score);
00054
00055 static ps_searchfuncs_t phone_loop_search_funcs = {
00056 "phone_loop",
00057 phone_loop_search_start,
00058 phone_loop_search_step,
00059 phone_loop_search_finish,
00060 phone_loop_search_reinit,
00061 phone_loop_search_free,
00062 NULL,
00063 phone_loop_search_hyp,
00064 phone_loop_search_prob,
00065 phone_loop_search_seg_iter,
00066 };
00067
00068 static int
00069 phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p)
00070 {
00071 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00072 cmd_ln_t *config = ps_search_config(search);
00073 acmod_t *acmod = ps_search_acmod(search);
00074 int i;
00075
00076
00077 ps_search_base_reinit(search, dict, d2p);
00078
00079
00080 if (pls->hmmctx)
00081 hmm_context_free(pls->hmmctx);
00082 pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef),
00083 acmod->tmat->tp, NULL, acmod->mdef->sseq);
00084 if (pls->hmmctx == NULL)
00085 return -1;
00086
00087
00088 if (pls->phones) {
00089 for (i = 0; i < pls->n_phones; ++i)
00090 hmm_deinit((hmm_t *)&pls->phones[i]);
00091 ckd_free(pls->phones);
00092 }
00093 pls->n_phones = bin_mdef_n_ciphone(acmod->mdef);
00094 pls->phones = ckd_calloc(pls->n_phones, sizeof(*pls->phones));
00095 for (i = 0; i < pls->n_phones; ++i) {
00096 pls->phones[i].ciphone = i;
00097 hmm_init(pls->hmmctx, (hmm_t *)&pls->phones[i],
00098 FALSE,
00099 bin_mdef_pid2ssid(acmod->mdef, i),
00100 bin_mdef_pid2tmatid(acmod->mdef, i));
00101 }
00102 pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam"));
00103 pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam"));
00104 pls->pip = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pip"));
00105 E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n",
00106 pls->beam, pls->pbeam, pls->pip);
00107
00108 return 0;
00109 }
00110
00111 ps_search_t *
00112 phone_loop_search_init(cmd_ln_t *config,
00113 acmod_t *acmod,
00114 dict_t *dict)
00115 {
00116 phone_loop_search_t *pls;
00117
00118
00119 pls = ckd_calloc(1, sizeof(*pls));
00120 ps_search_init(ps_search_base(pls), &phone_loop_search_funcs,
00121 config, acmod, dict, NULL);
00122 phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls),
00123 ps_search_dict2pid(pls));
00124
00125 return ps_search_base(pls);
00126 }
00127
00128 static void
00129 phone_loop_search_free_renorm(phone_loop_search_t *pls)
00130 {
00131 gnode_t *gn;
00132 for (gn = pls->renorm; gn; gn = gnode_next(gn))
00133 ckd_free(gnode_ptr(gn));
00134 glist_free(pls->renorm);
00135 pls->renorm = NULL;
00136 }
00137
00138 static void
00139 phone_loop_search_free(ps_search_t *search)
00140 {
00141 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00142 int i;
00143
00144 ps_search_deinit(search);
00145 for (i = 0; i < pls->n_phones; ++i)
00146 hmm_deinit((hmm_t *)&pls->phones[i]);
00147 phone_loop_search_free_renorm(pls);
00148 ckd_free(pls->phones);
00149 hmm_context_free(pls->hmmctx);
00150 ckd_free(pls);
00151 }
00152
00153 static int
00154 phone_loop_search_start(ps_search_t *search)
00155 {
00156 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00157 int i;
00158
00159
00160 for (i = 0; i < pls->n_phones; ++i) {
00161 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00162 hmm_clear(hmm);
00163 hmm_enter(hmm, 0, -1, 0);
00164 }
00165 phone_loop_search_free_renorm(pls);
00166 pls->best_score = 0;
00167
00168 return 0;
00169 }
00170
00171 static void
00172 renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm)
00173 {
00174 phone_loop_renorm_t *rn = ckd_calloc(1, sizeof(*rn));
00175 int i;
00176
00177 pls->renorm = glist_add_ptr(pls->renorm, rn);
00178 rn->frame_idx = frame_idx;
00179 rn->norm = norm;
00180
00181 for (i = 0; i < pls->n_phones; ++i) {
00182 hmm_normalize((hmm_t *)&pls->phones[i], norm);
00183 }
00184 }
00185
00186 static int32
00187 evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx)
00188 {
00189 int32 bs = WORST_SCORE;
00190 int i, bi;
00191
00192 hmm_context_set_senscore(pls->hmmctx, senscr);
00193
00194 bi = 0;
00195 for (i = 0; i < pls->n_phones; ++i) {
00196 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00197 int32 score;
00198
00199 if (hmm_frame(hmm) < frame_idx)
00200 continue;
00201 score = hmm_vit_eval(hmm);
00202 if (score BETTER_THAN bs) {
00203 bs = score;
00204 bi = i;
00205 }
00206 }
00207 pls->best_score = bs;
00208
00209 for (i = 0; i < pls->n_phones; ++i) {
00210 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00211 if (hmm_frame(hmm) < frame_idx)
00212 continue;
00213 if (hmm_bestscore(hmm) < bs + pls->beam)
00214 continue;
00215 }
00216
00217 return bs;
00218 }
00219
00220 static void
00221 prune_hmms(phone_loop_search_t *pls, int frame_idx)
00222 {
00223 int32 thresh = pls->best_score + pls->beam;
00224 int nf = frame_idx + 1;
00225 int i;
00226
00227
00228 for (i = 0; i < pls->n_phones; ++i) {
00229 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00230
00231 if (hmm_frame(hmm) < frame_idx)
00232 continue;
00233
00234 if (hmm_bestscore(hmm) BETTER_THAN thresh) {
00235 hmm_frame(hmm) = nf;
00236 }
00237 else
00238 hmm_clear_scores(hmm);
00239 }
00240 }
00241
00242 static void
00243 phone_transition(phone_loop_search_t *pls, int frame_idx)
00244 {
00245 int32 thresh = pls->best_score + pls->pbeam;
00246 int nf = frame_idx + 1;
00247 int i;
00248
00249
00250
00251 for (i = 0; i < pls->n_phones; ++i) {
00252 hmm_t *hmm = (hmm_t *)&pls->phones[i];
00253 int32 newphone_score;
00254 int j;
00255
00256 if (hmm_frame(hmm) != nf)
00257 continue;
00258
00259 newphone_score = hmm_out_score(hmm) + pls->pip;
00260 if (newphone_score BETTER_THAN thresh) {
00261
00262 for (j = 0; j < pls->n_phones; ++j) {
00263 hmm_t *nhmm = (hmm_t *)&pls->phones[j];
00264
00265 if (hmm_frame(nhmm) < frame_idx
00266 || newphone_score BETTER_THAN hmm_in_score(nhmm)) {
00267 hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf);
00268 }
00269 }
00270 }
00271 }
00272 }
00273
00274 static int
00275 phone_loop_search_step(ps_search_t *search, int frame_idx)
00276 {
00277 phone_loop_search_t *pls = (phone_loop_search_t *)search;
00278 acmod_t *acmod = ps_search_acmod(search);
00279 int16 const *senscr;
00280 int i;
00281
00282
00283 if (!ps_search_acmod(pls)->compallsen)
00284 for (i = 0; i < pls->n_phones; ++i)
00285 acmod_activate_hmm(acmod, (hmm_t *)&pls->phones[i]);
00286
00287
00288 senscr = acmod_score(acmod, &frame_idx);
00289
00290
00291 if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) {
00292 E_INFO("Renormalizing Scores at frame %d, best score %d\n",
00293 frame_idx, pls->best_score);
00294 renormalize_hmms(pls, frame_idx, pls->best_score);
00295 }
00296
00297
00298 pls->best_score = evaluate_hmms(pls, senscr, frame_idx);
00299
00300
00301 prune_hmms(pls, frame_idx);
00302
00303
00304 phone_transition(pls, frame_idx);
00305
00306 return 0;
00307 }
00308
00309 static int
00310 phone_loop_search_finish(ps_search_t *search)
00311 {
00312
00313 return 0;
00314 }
00315
00316 static char const *
00317 phone_loop_search_hyp(ps_search_t *search, int32 *out_score)
00318 {
00319 E_WARN("Hypotheses are not returned from phone loop search");
00320 return NULL;
00321 }
00322
00323 static int32
00324 phone_loop_search_prob(ps_search_t *search)
00325 {
00326
00327 E_WARN("Posterior probabilities are not returned from phone loop search");
00328 return 0;
00329 }
00330
00331 static ps_seg_t *
00332 phone_loop_search_seg_iter(ps_search_t *search, int32 *out_score)
00333 {
00334 E_WARN("Hypotheses are not returned from phone loop search");
00335 return NULL;
00336 }