diff --git a/src/april_model.c b/src/april_model.c index fb9d164..de4b6c3 100644 --- a/src/april_model.c +++ b/src/april_model.c @@ -93,6 +93,9 @@ AprilASRModel aam_create_model(const char *model_path) { //aam->fbank_opts.snip_edges = aam->params.snip_edges; aam->fbank_opts.snip_edges = true; + aam->fbank_opts.remove_dc_offset = true; + aam->fbank_opts.preemph_coeff = 0.97f; + ASSERT_OR_FREE_AAM_AND_RETURN_NULL(aam, aam->x_dim[0] == aam->params.batch_size); ASSERT_OR_FREE_AAM_AND_RETURN_NULL(aam, aam->x_dim[1] == aam->fbank_opts.pull_segment_count); ASSERT_OR_FREE_AAM_AND_RETURN_NULL(aam, aam->x_dim[2] == aam->fbank_opts.num_bins); diff --git a/src/fbank.c b/src/fbank.c index 890e58a..dbca3f5 100644 --- a/src/fbank.c +++ b/src/fbank.c @@ -185,6 +185,7 @@ void fbank_accept_waveform(OnlineFBank fbank, float *wave, size_t wave_count) { sonicReadFloatFromStream(fbank->sonic_stream, wave, wave_count); } + float preemph_coeff = fbank->opts.preemph_coeff; for(ssize_t i=0;; i++) { if((fbank->temp_segment_avail + 1) > fbank->temp_segments_y){ LOG_WARNING("fbank ran out of space. Please call fbank_pull_segments. Can't eat wave"); @@ -228,12 +229,33 @@ void fbank_accept_waveform(OnlineFBank fbank, float *wave, size_t wave_count) { ssize_t wave_idx = start_idx + j; if(wave_idx < 0){ ssize_t ll_idx = fbank->prev_leftover_count + wave_idx; - fbank->data[j] = fbank->prev_leftover[ll_idx] * fbank->window[j]; + fbank->data[j] = fbank->prev_leftover[ll_idx]; } else { - fbank->data[j] = wave[start_idx + j] * fbank->window[j]; + fbank->data[j] = wave[start_idx + j]; } } + // Not included: dither + + // Apply remove dc offset + if(fbank->opts.remove_dc_offset) { + float sum = 0; + for(int j=0; jpadded_window_size; j++) sum += fbank->data[j]; + float mean = sum / fbank->padded_window_size; + for(int j=0; jpadded_window_size; j++) fbank->data[j] -= mean; + } + + // Apply preemphasize + if(preemph_coeff > 0.0f) { + for(int j=fbank->padded_window_size-1; j>0; --j) + fbank->data[j] -= preemph_coeff * fbank->data[j - 1]; + fbank->data[0] -= preemph_coeff * fbank->data[0]; + } + + // Apply window function + for(int j=0; jpadded_window_size; j++) + fbank->data[j] *= fbank->window[j]; + double *dptr = fbank->data; double *rptr = fbank->ret; memcpy((char *)(rptr+1), dptr, fbank->padded_window_size * sizeof(double)); diff --git a/src/fbank.h b/src/fbank.h index 0139c8f..5942b96 100644 --- a/src/fbank.h +++ b/src/fbank.h @@ -60,6 +60,9 @@ typedef struct FBankOptions { // If false, speed feature will be unavailable bool use_sonic; + + bool remove_dc_offset; // true + float preemph_coeff; // 0.97 } FBankOptions; OnlineFBank make_fbank(FBankOptions opts);