• Main Page
  • Data Structures
  • Files
  • File List
  • Globals

src/libpocketsphinx/acmod.c

Go to the documentation of this file.
00001 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
00002 /* ====================================================================
00003  * Copyright (c) 2008 Carnegie Mellon University.  All rights
00004  * reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions
00008  * are met:
00009  *
00010  * 1. Redistributions of source code must retain the above copyright
00011  *    notice, this list of conditions and the following disclaimer. 
00012  *
00013  * 2. Redistributions in binary form must reproduce the above copyright
00014  *    notice, this list of conditions and the following disclaimer in
00015  *    the documentation and/or other materials provided with the
00016  *    distribution.
00017  *
00018  * This work was supported in part by funding from the Defense Advanced 
00019  * Research Projects Agency and the National Science Foundation of the 
00020  * United States of America, and the CMU Sphinx Speech Consortium.
00021  *
00022  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND 
00023  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
00024  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
00025  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
00026  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00027  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
00028  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
00029  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
00030  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
00031  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
00032  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00033  *
00034  * ====================================================================
00035  *
00036  */
00037 
00038 
00044 /* System headers. */
00045 #include <assert.h>
00046 
00047 /* SphinxBase headers. */
00048 #include <prim_type.h>
00049 #include <err.h>
00050 #include <cmd_ln.h>
00051 #include <strfuncs.h>
00052 #include <string.h>
00053 #include <byteorder.h>
00054 #include <feat.h>
00055 
00056 /* Local headers. */
00057 #include "cmdln_macro.h"
00058 #include "acmod.h"
00059 #include "s2_semi_mgau.h"
00060 #include "ptm_mgau.h"
00061 #include "ms_mgau.h"
00062 
00063 /* Feature and front-end parameters that may be in feat.params */
00064 static const arg_t feat_defn[] = {
00065     waveform_to_cepstral_command_line_macro(),
00066     cepstral_to_feature_command_line_macro(),
00067     CMDLN_EMPTY_OPTION
00068 };
00069 
00070 #ifndef WORDS_BIGENDIAN
00071 #define WORDS_BIGENDIAN 1
00072 #endif
00073 
00074 static int32 acmod_flags2list(acmod_t *acmod);
00075 static int32 acmod_process_mfcbuf(acmod_t *acmod);
00076 
00077 static int
00078 acmod_init_am(acmod_t *acmod)
00079 {
00080     char const *mdeffn, *tmatfn, *mllrfn;
00081 
00082     /* Read model definition. */
00083     if ((mdeffn = cmd_ln_str_r(acmod->config, "-mdef")) == NULL) {
00084         E_ERROR("Must specify -mdef or -hmm\n");
00085         return -1;
00086     }
00087 
00088     if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) {
00089         E_ERROR("Failed to read model definition from %s\n", mdeffn);
00090         return -1;
00091     }
00092 
00093     /* Read transition matrices. */
00094     if ((tmatfn = cmd_ln_str_r(acmod->config, "-tmat")) == NULL) {
00095         E_ERROR("No tmat file specified\n");
00096         return -1;
00097     }
00098     acmod->tmat = tmat_init(tmatfn, acmod->lmath,
00099                             cmd_ln_float32_r(acmod->config, "-tmatfloor"),
00100                             TRUE);
00101 
00102     /* Read the acoustic models. */
00103     if ((cmd_ln_str_r(acmod->config, "-mean") == NULL)
00104         || (cmd_ln_str_r(acmod->config, "-var") == NULL)
00105         || (cmd_ln_str_r(acmod->config, "-tmat") == NULL)) {
00106         E_ERROR("No mean/var/tmat files specified\n");
00107         return -1;
00108     }
00109 
00110     if (cmd_ln_str_r(acmod->config, "-senmgau")) {
00111         E_INFO("Using general multi-stream GMM computation\n");
00112         acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
00113         if (acmod->mgau == NULL)
00114             return -1;
00115     }
00116     else {
00117         E_INFO("Attempting to use SCHMM computation module\n");
00118         if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) {
00119             E_INFO("Attempting to use PTHMM computation module\n");
00120             if ((acmod->mgau = ptm_mgau_init(acmod)) == NULL) {
00121                 E_INFO("Falling back to general multi-stream GMM computation\n");
00122                 acmod->mgau = ms_mgau_init(acmod->config, acmod->lmath, acmod->mdef);
00123                 if (acmod->mgau == NULL)
00124                     return -1;
00125             }
00126         }
00127     }
00128 
00129     /* If there is an MLLR transform, apply it. */
00130     if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) {
00131         ps_mllr_t *mllr = ps_mllr_read(mllrfn);
00132         if (mllr == NULL)
00133             return -1;
00134         acmod_update_mllr(acmod, mllr);
00135     }
00136 
00137     return 0;
00138 }
00139 
00140 static int
00141 acmod_init_feat(acmod_t *acmod)
00142 {
00143     acmod->fcb = 
00144         feat_init(cmd_ln_str_r(acmod->config, "-feat"),
00145                   cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")),
00146                   cmd_ln_boolean_r(acmod->config, "-varnorm"),
00147                   agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")),
00148                   1, cmd_ln_int32_r(acmod->config, "-ceplen"));
00149     if (acmod->fcb == NULL)
00150         return -1;
00151 
00152     if (cmd_ln_str_r(acmod->config, "-lda")) {
00153         E_INFO("Reading linear feature transformation from %s\n",
00154                cmd_ln_str_r(acmod->config, "-lda"));
00155         if (feat_read_lda(acmod->fcb,
00156                           cmd_ln_str_r(acmod->config, "-lda"),
00157                           cmd_ln_int32_r(acmod->config, "-ldadim")) < 0)
00158             return -1;
00159     }
00160 
00161     if (cmd_ln_str_r(acmod->config, "-svspec")) {
00162         int32 **subvecs;
00163         E_INFO("Using subvector specification %s\n", 
00164                cmd_ln_str_r(acmod->config, "-svspec"));
00165         if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL)
00166             return -1;
00167         if ((feat_set_subvecs(acmod->fcb, subvecs)) < 0)
00168             return -1;
00169     }
00170 
00171     if (cmd_ln_exists_r(acmod->config, "-agcthresh")
00172         && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) {
00173         agc_set_threshold(acmod->fcb->agc_struct,
00174                           cmd_ln_float32_r(acmod->config, "-agcthresh"));
00175     }
00176 
00177     if (acmod->fcb->cmn_struct
00178         && cmd_ln_exists_r(acmod->config, "-cmninit")) {
00179         char *c, *cc, *vallist;
00180         int32 nvals;
00181 
00182         vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit"));
00183         c = vallist;
00184         nvals = 0;
00185         while (nvals < acmod->fcb->cmn_struct->veclen
00186                && (cc = strchr(c, ',')) != NULL) {
00187             *cc = '\0';
00188             acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
00189             c = cc + 1;
00190             ++nvals;
00191         }
00192         if (nvals < acmod->fcb->cmn_struct->veclen && *c != '\0') {
00193             acmod->fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof(c));
00194         }
00195         ckd_free(vallist);
00196     }
00197     return 0;
00198 }
00199 
00200 int
00201 acmod_fe_mismatch(acmod_t *acmod, fe_t *fe)
00202 {
00203     /* Output vector dimension needs to be the same. */
00204     if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe))
00205         return TRUE;
00206     /* Feature parameters need to be the same. */
00207     /* ... */
00208     return FALSE;
00209 }
00210 
00211 int
00212 acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb)
00213 {
00214     /* Feature type needs to be the same. */
00215     if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb)))
00216         return TRUE;
00217     /* Input vector dimension needs to be the same. */
00218     if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb))
00219         return TRUE;
00220     /* FIXME: Need to check LDA and stuff too. */
00221     return FALSE;
00222 }
00223 
00224 acmod_t *
00225 acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb)
00226 {
00227     acmod_t *acmod;
00228     char const *featparams;
00229 
00230     acmod = ckd_calloc(1, sizeof(*acmod));
00231     acmod->config = config;
00232     acmod->lmath = lmath;
00233     acmod->state = ACMOD_IDLE;
00234 
00235     /* Look for feat.params in acoustic model dir. */
00236     if ((featparams = cmd_ln_str_r(acmod->config, "-featparams"))) {
00237         if (cmd_ln_parse_file_r(acmod->config, feat_defn, featparams, FALSE) != NULL) {
00238             E_INFO("Parsed model-specific feature parameters from %s\n", featparams);
00239         }
00240     }
00241 
00242     /* Initialize feature computation. */
00243     if (fe) {
00244         if (acmod_fe_mismatch(acmod, fe))
00245             goto error_out;
00246         fe_retain(fe);
00247         acmod->fe = fe;
00248     }
00249     else {
00250         /* Initialize a new front end. */
00251         cmd_ln_retain(config);
00252         acmod->fe = fe_init_auto_r(config);
00253         if (acmod->fe == NULL)
00254             goto error_out;
00255     }
00256     if (fcb) {
00257         if (acmod_feat_mismatch(acmod, fcb))
00258             goto error_out;
00259         feat_retain(fcb);
00260         acmod->fcb = fcb;
00261     }
00262     else {
00263         /* Initialize a new fcb. */
00264         if (acmod_init_feat(acmod) < 0)
00265             goto error_out;
00266     }
00267 
00268     /* Load acoustic model parameters. */
00269     if (acmod_init_am(acmod) < 0)
00270         goto error_out;
00271 
00272 
00273     /* The MFCC buffer needs to be at least as large as the dynamic
00274      * feature window.  */
00275     acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1;
00276     acmod->mfc_buf = (mfcc_t **)
00277         ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize,
00278                       sizeof(**acmod->mfc_buf));
00279 
00280     /* Feature buffer has to be at least as large as MFCC buffer. */
00281     acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(config, "-pl_window");
00282     acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc);
00283 
00284     /* Senone computation stuff. */
00285     acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
00286                                                      sizeof(*acmod->senone_scores));
00287     acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef));
00288     acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef),
00289                                                      sizeof(*acmod->senone_active));
00290     acmod->log_zero = logmath_get_zero(acmod->lmath);
00291     acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen");
00292     return acmod;
00293 
00294 error_out:
00295     acmod_free(acmod);
00296     return NULL;
00297 }
00298 
00299 void
00300 acmod_free(acmod_t *acmod)
00301 {
00302     if (acmod == NULL)
00303         return;
00304 
00305     feat_free(acmod->fcb);
00306     fe_free(acmod->fe);
00307 
00308     if (acmod->mfc_buf)
00309         ckd_free_2d((void **)acmod->mfc_buf);
00310     if (acmod->feat_buf)
00311         feat_array_free(acmod->feat_buf);
00312 
00313     if (acmod->mfcfh)
00314         fclose(acmod->mfcfh);
00315     if (acmod->rawfh)
00316         fclose(acmod->rawfh);
00317 
00318     ckd_free(acmod->senone_scores);
00319     ckd_free(acmod->senone_active_vec);
00320     ckd_free(acmod->senone_active);
00321 
00322     if (acmod->mdef)
00323         bin_mdef_free(acmod->mdef);
00324     if (acmod->tmat)
00325         tmat_free(acmod->tmat);
00326     if (acmod->mgau)
00327         ps_mgau_free(acmod->mgau);
00328     if (acmod->mllr)
00329         ps_mllr_free(acmod->mllr);
00330     
00331     ckd_free(acmod);
00332 }
00333 
00334 ps_mllr_t *
00335 acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr)
00336 {
00337     if (acmod->mllr)
00338         ps_mllr_free(acmod->mllr);
00339     acmod->mllr = mllr;
00340     ps_mgau_transform(acmod->mgau, mllr);
00341 
00342     return mllr;
00343 }
00344 
00345 int
00346 acmod_set_mfcfh(acmod_t *acmod, FILE *logfh)
00347 {
00348     int rv = 0;
00349 
00350     if (acmod->mfcfh)
00351         fclose(acmod->mfcfh);
00352     acmod->mfcfh = logfh;
00353     fwrite(&rv, 4, 1, acmod->mfcfh);
00354     return rv;
00355 }
00356 
00357 int
00358 acmod_set_rawfh(acmod_t *acmod, FILE *logfh)
00359 {
00360     if (acmod->rawfh)
00361         fclose(acmod->rawfh);
00362     acmod->rawfh = logfh;
00363     return 0;
00364 }
00365 
00366 void
00367 acmod_grow_feat_buf(acmod_t *acmod, int nfr)
00368 {
00369     mfcc_t ***new_feat_buf;
00370 
00371     new_feat_buf = feat_array_alloc(acmod->fcb, nfr);
00372     if (acmod->n_feat_frame || acmod->grow_feat) {
00373         memcpy(new_feat_buf[0][0], acmod->feat_buf[0][0],
00374                (acmod->n_feat_alloc
00375                 * feat_dimension(acmod->fcb)
00376                 * sizeof(***acmod->feat_buf)));
00377     }
00378     feat_array_free(acmod->feat_buf);
00379     acmod->feat_buf = new_feat_buf;
00380     acmod->n_feat_alloc = nfr;
00381 }
00382 
00383 int
00384 acmod_set_grow(acmod_t *acmod, int grow_feat)
00385 {
00386     int tmp = acmod->grow_feat;
00387     acmod->grow_feat = grow_feat;
00388 
00389     /* Expand feat_buf to a reasonable size to start with. */
00390     if (grow_feat && acmod->n_feat_alloc < 128)
00391         acmod_grow_feat_buf(acmod, 128);
00392 
00393     return tmp;
00394 }
00395 
00396 int
00397 acmod_start_utt(acmod_t *acmod)
00398 {
00399     fe_start_utt(acmod->fe);
00400     acmod->state = ACMOD_STARTED;
00401     acmod->n_mfc_frame = 0;
00402     acmod->n_feat_frame = 0;
00403     acmod->mfc_outidx = 0;
00404     acmod->feat_outidx = 0;
00405     acmod->output_frame = 0;
00406     acmod->senscr_frame = -1;
00407     acmod->n_senone_active = 0;
00408     acmod->mgau->frame_idx = 0;
00409     return 0;
00410 }
00411 
00412 int
00413 acmod_end_utt(acmod_t *acmod)
00414 {
00415     int32 nfr = 0;
00416 
00417     acmod->state = ACMOD_ENDED;
00418     if (acmod->n_mfc_frame < acmod->n_mfc_alloc) {
00419         int inptr;
00420         /* Where to start writing them (circular buffer) */
00421         inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
00422         /* nfr is always either zero or one. */
00423         fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr);
00424         acmod->n_mfc_frame += nfr;
00425         /* Process whatever's left, and any leadout. */
00426         if (nfr)
00427             nfr = acmod_process_mfcbuf(acmod);
00428     }
00429     if (acmod->mfcfh) {
00430         int32 outlen, rv;
00431         outlen = (ftell(acmod->mfcfh) - 4) / 4;
00432         if (!WORDS_BIGENDIAN)
00433             SWAP_INT32(&outlen);
00434         /* Try to seek and write */
00435         if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) {
00436             fwrite(&outlen, 4, 1, acmod->mfcfh);
00437         }
00438         fclose(acmod->mfcfh);
00439         acmod->mfcfh = NULL;
00440     }
00441     if (acmod->rawfh) {
00442         fclose(acmod->rawfh);
00443         acmod->rawfh = NULL;
00444     }
00445 
00446     return nfr;
00447 }
00448 
00449 static int
00450 acmod_log_mfc(acmod_t *acmod,
00451               mfcc_t **cep, int n_frames)
00452 {
00453     int i, n;
00454     int32 *ptr = (int32 *)cep[0];
00455 
00456     n = n_frames * feat_cepsize(acmod->fcb);
00457     /* Swap bytes. */
00458     if (!WORDS_BIGENDIAN) {
00459         for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
00460             SWAP_INT32(ptr + i);
00461         }
00462     }
00463     /* Write features. */
00464     if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) {
00465         E_ERROR_SYSTEM("Failed to write %d values to log file", n);
00466     }
00467 
00468     /* Swap them back. */
00469     if (!WORDS_BIGENDIAN) {
00470         for (i = 0; i < (n * sizeof(mfcc_t)); ++i) {
00471             SWAP_INT32(ptr + i);
00472         }
00473     }
00474     return 0;
00475 }
00476 
00477 static int
00478 acmod_process_full_cep(acmod_t *acmod,
00479                        mfcc_t ***inout_cep,
00480                        int *inout_n_frames)
00481 {
00482     int32 nfr;
00483 
00484     /* Write to log file. */
00485     if (acmod->mfcfh)
00486         acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
00487 
00488     /* Resize feat_buf to fit. */
00489     if (acmod->n_feat_alloc < *inout_n_frames) {
00490         feat_array_free(acmod->feat_buf);
00491         acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames);
00492         acmod->n_feat_alloc = *inout_n_frames;
00493         acmod->n_feat_frame = 0;
00494         acmod->feat_outidx = 0;
00495     }
00496     /* Make dynamic features. */
00497     nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames,
00498                                TRUE, TRUE, acmod->feat_buf);
00499     acmod->n_feat_frame = nfr;
00500     assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00501     *inout_cep += *inout_n_frames;
00502     *inout_n_frames = 0;
00503     return nfr;
00504 }
00505 
00506 static int
00507 acmod_process_full_raw(acmod_t *acmod,
00508                        int16 const **inout_raw,
00509                        size_t *inout_n_samps)
00510 {
00511     int32 nfr, ntail;
00512     mfcc_t **cepptr;
00513 
00514     /* Write to logging file if any. */
00515     if (acmod->rawfh)
00516         fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh);
00517     /* Resize mfc_buf to fit. */
00518     if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0)
00519         return -1;
00520     if (acmod->n_mfc_alloc < nfr + 1) {
00521         ckd_free_2d(acmod->mfc_buf);
00522         acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe),
00523                                        sizeof(**acmod->mfc_buf));
00524         acmod->n_mfc_alloc = nfr + 1;
00525     }
00526     acmod->n_mfc_frame = 0;
00527     acmod->mfc_outidx = 0;
00528     fe_start_utt(acmod->fe);
00529     if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00530                           acmod->mfc_buf, &nfr) < 0)
00531         return -1;
00532     fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail);
00533     nfr += ntail;
00534 
00535     cepptr = acmod->mfc_buf;
00536     nfr = acmod_process_full_cep(acmod, &cepptr, &nfr);
00537     acmod->n_mfc_frame = 0;
00538     return nfr;
00539 }
00540 
00544 static int32
00545 acmod_process_mfcbuf(acmod_t *acmod)
00546 {
00547     mfcc_t **mfcptr;
00548     int32 ncep;
00549 
00550     ncep = acmod->n_mfc_frame;
00551     /* Also do this in two parts because of the circular mfc_buf. */
00552     if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) {
00553         int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx;
00554         int saved_state = acmod->state;
00555 
00556         /* Make sure we don't end the utterance here. */
00557         if (acmod->state == ACMOD_ENDED)
00558             acmod->state = ACMOD_PROCESSING;
00559         mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
00560         ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE);
00561         /* It's possible that not all available frames were filled. */
00562         ncep -= ncep1;
00563         acmod->n_mfc_frame -= ncep1;
00564         acmod->mfc_outidx += ncep1;
00565         acmod->mfc_outidx %= acmod->n_mfc_alloc;
00566         /* Restore original state (could this really be the end) */
00567         acmod->state = saved_state;
00568     }
00569     mfcptr = acmod->mfc_buf + acmod->mfc_outidx;
00570     ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE);
00571     acmod->n_mfc_frame -= ncep;
00572     acmod->mfc_outidx += ncep;
00573     acmod->mfc_outidx %= acmod->n_mfc_alloc;
00574     return ncep;
00575 }
00576 
00577 int
00578 acmod_process_raw(acmod_t *acmod,
00579                   int16 const **inout_raw,
00580                   size_t *inout_n_samps,
00581                   int full_utt)
00582 {
00583     int32 ncep;
00584 
00585     /* If this is a full utterance, process it all at once. */
00586     if (full_utt)
00587         return acmod_process_full_raw(acmod, inout_raw, inout_n_samps);
00588 
00589     /* Append MFCCs to the end of any that are previously in there
00590      * (in practice, there will probably be none) */
00591     if (inout_n_samps && *inout_n_samps) {
00592         int16 const *prev_audio_inptr = *inout_raw;
00593         int inptr;
00594 
00595         /* Total number of frames available. */
00596         ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame;
00597         /* Where to start writing them (circular buffer) */
00598         inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc;
00599 
00600         /* Write them in two (or more) parts if there is wraparound. */
00601         while (inptr + ncep > acmod->n_mfc_alloc) {
00602             int32 ncep1 = acmod->n_mfc_alloc - inptr;
00603             if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00604                                   acmod->mfc_buf + inptr, &ncep1) < 0)
00605                 return -1;
00606             /* Write to logging file if any. */
00607             if (acmod->rawfh) {
00608                 fwrite(prev_audio_inptr, 2,
00609                        *inout_raw - prev_audio_inptr,
00610                        acmod->rawfh);
00611                 prev_audio_inptr = *inout_raw;
00612             }
00613             /* ncep1 now contains the number of frames actually
00614              * processed.  This is a good thing, but it means we
00615              * actually still might have some room left at the end of
00616              * the buffer, hence the while loop.  Unfortunately it
00617              * also means that in the case where we are really
00618              * actually done, we need to get out totally, hence the
00619              * goto. */
00620             acmod->n_mfc_frame += ncep1;
00621             ncep -= ncep1;
00622             inptr += ncep1;
00623             inptr %= acmod->n_mfc_alloc;
00624             if (ncep1 == 0)
00625                 goto alldone;
00626         }
00627         assert(inptr + ncep <= acmod->n_mfc_alloc);
00628         if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps,
00629                               acmod->mfc_buf + inptr, &ncep) < 0)
00630             return -1;
00631         /* Write to logging file if any. */
00632         if (acmod->rawfh) {
00633             fwrite(prev_audio_inptr, 2,
00634                    *inout_raw - prev_audio_inptr, acmod->rawfh);
00635             prev_audio_inptr = *inout_raw;
00636         }
00637         acmod->n_mfc_frame += ncep;
00638     alldone:
00639         ;
00640     }
00641 
00642     /* Hand things off to acmod_process_cep. */
00643     return acmod_process_mfcbuf(acmod);
00644 }
00645 
00646 int
00647 acmod_process_cep(acmod_t *acmod,
00648                   mfcc_t ***inout_cep,
00649                   int *inout_n_frames,
00650                   int full_utt)
00651 {
00652     int32 nfeat, ncep, inptr;
00653     int orig_n_frames;
00654 
00655     /* If this is a full utterance, process it all at once. */
00656     if (full_utt)
00657         return acmod_process_full_cep(acmod, inout_cep, inout_n_frames);
00658 
00659     /* Write to log file. */
00660     if (acmod->mfcfh)
00661         acmod_log_mfc(acmod, *inout_cep, *inout_n_frames);
00662 
00663     /* Maximum number of frames we're going to generate. */
00664     orig_n_frames = ncep = nfeat = *inout_n_frames;
00665 
00666     /* FIXME: This behaviour isn't guaranteed... */
00667     if (acmod->state == ACMOD_ENDED)
00668         nfeat += feat_window_size(acmod->fcb);
00669     else if (acmod->state == ACMOD_STARTED)
00670         nfeat -= feat_window_size(acmod->fcb);
00671 
00672     /* Clamp number of features to fit available space. */
00673     if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) {
00674         /* Grow it as needed - we have to grow it at the end of an
00675          * utterance because we can't return a short read there. */
00676         if (acmod->grow_feat || acmod->state == ACMOD_ENDED)
00677             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat);
00678         else
00679             ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame));
00680     }
00681 
00682     /* Where to start writing in the feature buffer. */
00683     if (acmod->grow_feat) {
00684         /* Grow to avoid wraparound if grow_feat == TRUE. */
00685         inptr = acmod->feat_outidx + acmod->n_feat_frame;
00686         while (inptr + nfeat > acmod->n_feat_alloc)
00687             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
00688     }
00689     else {
00690         inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
00691     }
00692 
00693     /* Write them in two parts if there is wraparound. */
00694     if (inptr + nfeat > acmod->n_feat_alloc) {
00695         int32 ncep1 = acmod->n_feat_alloc - inptr;
00696         int saved_state = acmod->state;
00697 
00698         /* Make sure we don't end the utterance here. */
00699         if (acmod->state == ACMOD_ENDED)
00700             acmod->state = ACMOD_PROCESSING;
00701         nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
00702                                      &ncep1,
00703                                      (acmod->state == ACMOD_STARTED),
00704                                      (acmod->state == ACMOD_ENDED),
00705                                      acmod->feat_buf + inptr);
00706         if (nfeat < 0)
00707             return -1;
00708         /* Move the output feature pointer forward. */
00709         acmod->n_feat_frame += nfeat;
00710         assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00711         inptr += nfeat;
00712         inptr %= acmod->n_feat_alloc;
00713         /* Move the input feature pointers forward. */
00714         *inout_n_frames -= ncep1;
00715         *inout_cep += ncep1;
00716         ncep -= ncep1;
00717         /* Restore original state (could this really be the end) */
00718         acmod->state = saved_state;
00719     }
00720 
00721     nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep,
00722                                  &ncep,
00723                                  (acmod->state == ACMOD_STARTED),
00724                                  (acmod->state == ACMOD_ENDED),
00725                                  acmod->feat_buf + inptr);
00726     if (nfeat < 0)
00727         return -1;
00728     acmod->n_feat_frame += nfeat;
00729     assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00730     /* Move the input feature pointers forward. */
00731     *inout_n_frames -= ncep;
00732     *inout_cep += ncep;
00733     if (acmod->state == ACMOD_STARTED)
00734         acmod->state = ACMOD_PROCESSING;
00735     return orig_n_frames - *inout_n_frames;
00736 }
00737 
00738 int
00739 acmod_process_feat(acmod_t *acmod,
00740                    mfcc_t **feat)
00741 {
00742     int i, inptr;
00743 
00744     if (acmod->n_feat_frame == acmod->n_feat_alloc) {
00745         if (acmod->grow_feat)
00746             acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2);
00747         else
00748             return 0;
00749     }
00750 
00751     inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc;
00752     for (i = 0; i < feat_dimension1(acmod->fcb); ++i)
00753         memcpy(acmod->feat_buf[inptr][i],
00754                feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat));
00755     ++acmod->n_feat_frame;
00756     assert(acmod->n_feat_frame <= acmod->n_feat_alloc);
00757 
00758     return 1;
00759 }
00760 
00761 int
00762 acmod_rewind(acmod_t *acmod)
00763 {
00764     /* If the feature buffer is circular, this is not possible. */
00765     if (acmod->output_frame > acmod->n_feat_alloc)
00766         return -1;
00767 
00768     /* Frames consumed + frames available */
00769     acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame;
00770 
00771     /* Reset output pointers. */
00772     acmod->feat_outidx = 0;
00773     acmod->output_frame = 0;
00774     acmod->senscr_frame = -1;
00775     acmod->mgau->frame_idx = 0;
00776 
00777     return 0;
00778 }
00779 
00780 int
00781 acmod_advance(acmod_t *acmod)
00782 {
00783     /* Advance the output pointers. */
00784     if (++acmod->feat_outidx == acmod->n_feat_alloc)
00785         acmod->feat_outidx = 0;
00786     --acmod->n_feat_frame;
00787     ++acmod->mgau->frame_idx;
00788 
00789     return ++acmod->output_frame;
00790 }
00791 
00792 int16 const *
00793 acmod_score(acmod_t *acmod,
00794             int *inout_frame_idx)
00795 {
00796     int frame_idx, feat_idx, n_backfr;
00797 
00798     /* Calculate the absolute frame index to be scored. */
00799     if (inout_frame_idx == NULL)
00800         frame_idx = acmod->output_frame;
00801     else if (*inout_frame_idx < 0)
00802         frame_idx = acmod->output_frame + 1 + *inout_frame_idx;
00803     else
00804         frame_idx = *inout_frame_idx;
00805 
00806     /* Check to make sure features are available for the requested frame index. */
00807     n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame;
00808     if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) {
00809         E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), cannot score\n",
00810                 frame_idx, acmod->n_feat_frame, acmod->n_feat_alloc,
00811                 acmod->output_frame - frame_idx, n_backfr);
00812         return NULL;
00813     }
00814 
00815     /* If all senones are being computed then we can reuse existing scores. */
00816     if (acmod->compallsen && frame_idx == acmod->senscr_frame)
00817         return acmod->senone_scores;
00818 
00819     /* Build active senone list. */
00820     acmod_flags2list(acmod);
00821 
00822     /* Get the index in feat_buf of the frame to be scored. */
00823     feat_idx = ((acmod->feat_outidx + frame_idx - acmod->output_frame)
00824                 % acmod->n_feat_alloc);
00825     if (feat_idx < 0) feat_idx += acmod->n_feat_alloc;
00826 
00827     /* Generate scores for the next available frame */
00828     ps_mgau_frame_eval(acmod->mgau,
00829                        acmod->senone_scores,
00830                        acmod->senone_active,
00831                        acmod->n_senone_active,
00832                        acmod->feat_buf[feat_idx],
00833                        frame_idx,
00834                        acmod->compallsen);
00835 
00836     if (inout_frame_idx)
00837         *inout_frame_idx = frame_idx;
00838     acmod->senscr_frame = frame_idx;
00839 
00840     return acmod->senone_scores;
00841 }
00842 
00843 int
00844 acmod_best_score(acmod_t *acmod, int *out_best_senid)
00845 {
00846     int i, best;
00847 
00848     best = WORST_SCORE;
00849     if (acmod->compallsen) {
00850         for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) {
00851             if (acmod->senone_scores[i] BETTER_THAN best) {
00852                 best = acmod->senone_scores[i];
00853                 *out_best_senid = i;
00854             }
00855         }
00856     }
00857     else {
00858         int16 *senscr;
00859         senscr = acmod->senone_scores;
00860         for (i = 0; i < acmod->n_senone_active; ++i) {
00861             senscr += acmod->senone_active[i];
00862             if (*senscr BETTER_THAN best) {
00863                 best = *senscr;
00864                 *out_best_senid = i;
00865             }
00866         }
00867     }
00868     return best;
00869 }
00870 
00871 
00872 void
00873 acmod_clear_active(acmod_t *acmod)
00874 {
00875     bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef));
00876     acmod->n_senone_active = 0;
00877 }
00878 
00879 #define MPX_BITVEC_SET(a,h,i)                                   \
00880     if (hmm_mpx_ssid(h,i) != BAD_SSID)                          \
00881         bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i))
00882 #define NONMPX_BITVEC_SET(a,h,i)                                        \
00883     bitvec_set((a)->senone_active_vec,                                  \
00884                hmm_nonmpx_senid(h,i))
00885 
00886 void
00887 acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm)
00888 {
00889     int i;
00890 
00891     if (hmm_is_mpx(hmm)) {
00892         switch (hmm_n_emit_state(hmm)) {
00893         case 5:
00894             MPX_BITVEC_SET(acmod, hmm, 4);
00895             MPX_BITVEC_SET(acmod, hmm, 3);
00896         case 3:
00897             MPX_BITVEC_SET(acmod, hmm, 2);
00898             MPX_BITVEC_SET(acmod, hmm, 1);
00899             MPX_BITVEC_SET(acmod, hmm, 0);
00900             break;
00901         default:
00902             for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
00903                 MPX_BITVEC_SET(acmod, hmm, i);
00904             }
00905         }
00906     }
00907     else {
00908         switch (hmm_n_emit_state(hmm)) {
00909         case 5:
00910             NONMPX_BITVEC_SET(acmod, hmm, 4);
00911             NONMPX_BITVEC_SET(acmod, hmm, 3);
00912         case 3:
00913             NONMPX_BITVEC_SET(acmod, hmm, 2);
00914             NONMPX_BITVEC_SET(acmod, hmm, 1);
00915             NONMPX_BITVEC_SET(acmod, hmm, 0);
00916             break;
00917         default:
00918             for (i = 0; i < hmm_n_emit_state(hmm); ++i) {
00919                 NONMPX_BITVEC_SET(acmod, hmm, i);
00920             }
00921         }
00922     }
00923 }
00924 
00925 static int32
00926 acmod_flags2list(acmod_t *acmod)
00927 {
00928     int32 w, l, n, b, total_dists, total_words, extra_bits;
00929     bitvec_t *flagptr;
00930 
00931     total_dists = bin_mdef_n_sen(acmod->mdef);
00932     if (acmod->compallsen) {
00933         acmod->n_senone_active = total_dists;
00934         return total_dists;
00935     }
00936     total_words = total_dists / BITVEC_BITS;
00937     extra_bits = total_dists % BITVEC_BITS;
00938     w = n = l = 0;
00939     for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) {
00940         if (*flagptr == 0)
00941             continue;
00942         for (b = 0; b < BITVEC_BITS; ++b) {
00943             if (*flagptr & (1UL << b)) {
00944                 int32 sen = w * BITVEC_BITS + b;
00945                 int32 delta = sen - l;
00946                 /* Handle excessive deltas "lossily" by adding a few
00947                    extra senones to bridge the gap. */
00948                 while (delta > 255) {
00949                     acmod->senone_active[n++] = 255;
00950                     delta -= 255;
00951                 }
00952                 acmod->senone_active[n++] = delta;
00953                 l = sen;
00954             }
00955         }
00956     }
00957 
00958     for (b = 0; b < extra_bits; ++b) {
00959         if (*flagptr & (1UL << b)) {
00960             int32 sen = w * BITVEC_BITS + b;
00961             int32 delta = sen - l;
00962             /* Handle excessive deltas "lossily" by adding a few
00963                extra senones to bridge the gap. */
00964             while (delta > 255) {
00965                 acmod->senone_active[n++] = 255;
00966                 delta -= 255;
00967             }
00968             acmod->senone_active[n++] = delta;
00969             l = sen;
00970         }
00971     }
00972 
00973     acmod->n_senone_active = n;
00974     E_DEBUG(1, ("acmod_flags2list: %d active in frame %d\n",
00975                 acmod->n_senone_active, acmod->output_frame));
00976     return n;
00977 }

Generated on Tue Aug 17 2010 for PocketSphinx by  doxygen 1.7.1