Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2026 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #ifdef DEBUGGING
36 : #include "debug.h"
37 : #include <string.h>
38 : #endif
39 : #include "cnst.h"
40 : #include "ivas_cnst.h"
41 : #include "rom_enc.h"
42 : #include "rom_com.h"
43 : #include "prot.h"
44 : #include "ivas_prot.h"
45 : #include "wmc_auto.h"
46 : #include <math.h>
47 :
48 :
49 : /*---------------------------------------------------------------*
50 : * Local constants
51 : *---------------------------------------------------------------*/
52 :
53 : #define SCE_SMC_THR 16000
54 :
55 :
56 : /*-------------------------------------------------------------------*
57 : * Local function prototypes
58 : *--------------------------------------------------------------------*/
59 :
60 : static void calculate_energy_buffer( CPE_ENC_HANDLE hCPE, float enerBuffer_dft[], const int16_t no_channels, const int32_t input_Fs );
61 :
62 :
63 : /*-------------------------------------------------------------------*
64 : * pre_proc_front_ivas()
65 : *
66 : * Front Pre-processing for IVAS
67 : * (resampling, spectral analysis, LP analysis, VAD, OL pitch calculation, classification)
68 : *--------------------------------------------------------------------*/
69 :
70 : #ifdef FIX_2344_ALIGN_PREPROC
71 3102575 : void pre_proc_front_ivas(
72 : #else
73 : ivas_error pre_proc_front_ivas(
74 : #endif
75 : SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */
76 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
77 : const int32_t element_brate, /* i : SCE/CPE element bitrate */
78 : const int16_t nb_bits_metadata, /* i : number of metadata bits */
79 : const int16_t input_frame, /* i : frame length */
80 : const int16_t n, /* i : channel number */
81 : float old_inp_12k8[], /* o : buffer of old input signal */
82 : float old_inp_16k[], /* o : buffer of old input signal @16kHz */
83 : float *ener, /* o : residual energy from Levinson-Durbin */
84 : float *relE, /* o : frame relative energy */
85 : float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */
86 : float Aw[NB_SUBFR16k * ( M + 1 )], /* o : weighted A(z) unquantized for subframes */
87 : float epsP[M + 1], /* o : LP prediction errors */
88 : float lsp_new[M], /* o : LSPs at the end of the frame */
89 : float lsp_mid[M], /* o : LSPs in the middle of the frame */
90 : int16_t *vad_hover_flag, /* o : VAD hangover flag */
91 : int16_t *attack_flag, /* o : flag signaling attack */
92 : float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */
93 : float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */
94 : float old_wsp[], /* o : weighted input signal buffer */
95 : float pitch_fr[NB_SUBFR], /* o : fractional pitch values */
96 : float voicing_fr[NB_SUBFR], /* o : fractional pitch gains */
97 : int16_t *loc_harm, /* o : harmonicity flag */
98 : float *cor_map_sum, /* o : speech/music clasif. parameter */
99 : int16_t *vad_flag_dtx, /* o : HE-SAD flag with additional DTX HO */
100 : float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* o : energy buffer */
101 : float fft_buff[2 * L_FFT], /* o : FFT buffer */
102 : const float tdm_A_PCh[M + 1], /* i : unq. LP coeff. of primary channel */
103 : const float tdm_lsp_new_PCh[M], /* i : unq. LSPs of primary channel */
104 : const float currFlatness, /* i : flatness parameter */
105 : const int16_t tdm_ratio_idx, /* i : Current Ratio_L index */
106 : float fr_bands_LR[][2 * NB_BANDS], /* i : energy in frequency bands */
107 : const float Etot_LR[], /* i : total energy Left & Right channel */
108 : float lf_E_LR[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */
109 : const int16_t localVAD_HE_SAD_LR[], /* i : HE-SAD flag without hangover, LR channels */
110 : float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN */
111 : const int16_t flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */
112 : const int16_t front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */
113 : const int16_t force_front_vad, /* i : flag to force VAD decision */
114 : const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/
115 : const IVAS_FORMAT ivas_format, /* i : IVAS format */
116 : const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
117 : const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */
118 : const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */
119 : )
120 : {
121 : float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */
122 : float *wsp; /* weighted input signal buffer */
123 : float Etot; /* total energy */
124 : float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */
125 : float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */
126 : float tmpN[NB_BANDS]; /* Temporary noise update */
127 : float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */
128 : float tmpN_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update */
129 : float tmpE_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary averaged energy of 2 sf. */
130 : float cor_map_sum_LR[CPE_CHANNELS]; /* speech/music clasif. parameter */
131 : float non_staX_LR; /* non-stationarity for sp/mus classifier */
132 : float ncharX_LR; /* noise character for sp/mus classifier */
133 : float sp_div_LR; /* spectral diversity feature */
134 : float S_map_LR[L_FFT / 2]; /* short-term correlation map */
135 : float corr_shiftL; /* correlation shift */
136 : float corr_shiftR; /* correlation shift */
137 : int16_t loc_harmLR[CPE_CHANNELS]; /* harmonicity flag */
138 : int16_t lr_vad_enabled; /* LR VAD indicator */
139 : float ee[2]; /* Spectral tilt */
140 : float corr_shift; /* correlation shift */
141 : float sp_div, PS[128]; /* speech/music clasif. parameters */
142 : int16_t L_look; /* length of look-ahead */
143 : float snr_sum_he; /* HE SAD parameters */
144 : float hp_E[2]; /* Energy in HF */
145 : int16_t flag_spitch;
146 : int16_t high_lpn_flag;
147 : float lsf_new[M];
148 : float band_energies[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */
149 : int16_t localVAD_HE_SAD;
150 : float non_staX;
151 : float stab_fac;
152 : int16_t alw_pitch_lag_12k8[2];
153 : float alw_voicing[2];
154 : int16_t last_core_orig;
155 : float dummy;
156 : float mem_decim_dummy[2 * L_FILT_MAX]; /* dummy decimation filter memory */
157 : float S_map[L_FFT / 2];
158 : int16_t i, lMemRecalc, lMemRecalc_12k8;
159 : int16_t smc_dec;
160 : float ncharX, dE1X;
161 : Encoder_State *st;
162 : float *signal_in;
163 : int16_t element_mode;
164 : int32_t input_Fs, last_element_brate;
165 : int16_t *tdm_SM_last_clas, tmpS;
166 : float *res_cod_SNR_M, tmpF[STEREO_DFT_BAND_MAX];
167 : STEREO_CLASSIF_HANDLE hStereoClassif;
168 : float temp1F_icatdmResampBuf[L_FILT_MAX]; /* temp buffers for ICA TDM resamplers */
169 : int16_t old_pitch1; /* previous frame OL pitch[1] @12.8 kHz */
170 : int16_t LR_localVAD;
171 : #ifndef FIX_2344_ALIGN_PREPROC
172 : ivas_error error;
173 : #endif
174 :
175 3102575 : push_wmops( "pre_proc_front" );
176 :
177 : /*------------------------------------------------------------------*
178 : * Initialization
179 : *------------------------------------------------------------------*/
180 :
181 : #ifndef FIX_2344_ALIGN_PREPROC
182 : error = IVAS_ERR_OK;
183 : #endif
184 3102575 : tmpS = 0;
185 3102575 : tdm_SM_last_clas = &tmpS;
186 3102575 : set_f( tmpF, 0, STEREO_DFT_BAND_MAX );
187 3102575 : res_cod_SNR_M = tmpF;
188 :
189 3102575 : LR_localVAD = 0;
190 :
191 3102575 : if ( hSCE != NULL )
192 : {
193 732130 : st = hSCE->hCoreCoder[n];
194 732130 : signal_in = hSCE->hCoreCoder[n]->input;
195 732130 : element_mode = IVAS_SCE;
196 732130 : last_element_brate = hSCE->last_element_brate;
197 732130 : hStereoClassif = NULL;
198 732130 : lr_vad_enabled = 0;
199 : }
200 : else /* CPE */
201 : {
202 2370445 : st = hCPE->hCoreCoder[n];
203 2370445 : signal_in = hCPE->hCoreCoder[n]->input;
204 2370445 : element_mode = hCPE->element_mode;
205 2370445 : last_element_brate = hCPE->last_element_brate;
206 2370445 : hStereoClassif = hCPE->hStereoClassif;
207 2370445 : lr_vad_enabled = 0;
208 2370445 : if ( hCPE->hFrontVad[0] != NULL && hCPE->element_mode != IVAS_CPE_MDCT )
209 : {
210 37618 : lr_vad_enabled = 1;
211 : }
212 :
213 2370445 : if ( lr_vad_enabled && n == 0 )
214 : {
215 : /* Combine localVAD and vad_flag from LR processing */
216 35818 : LR_localVAD = hCPE->hCoreCoder[0]->localVAD || hCPE->hCoreCoder[1]->localVAD;
217 : }
218 :
219 2370445 : if ( hCPE->hStereoTD != NULL )
220 : {
221 9182 : tdm_SM_last_clas = &hCPE->hStereoTD->tdm_SM_last_clas[n];
222 9182 : mvs2s( hCPE->hStereoTD->tdm_SM_last_clas, hCPE->hStereoTD->tdm_SM_last2_clas, CPE_CHANNELS );
223 : }
224 :
225 2370445 : if ( hCPE->hStereoDft != NULL )
226 : {
227 134625 : res_cod_SNR_M = hCPE->hStereoDft->res_cod_SNR_M;
228 : }
229 : }
230 :
231 : #ifdef DEBUG_MODE_INFO
232 : if ( !( hCPE != NULL && hCPE->hStereoTD != NULL && n > 0 ) )
233 : {
234 : /* for TD stereo only write out first channel. The existence of a second channel can vary, this is just easier to handle */
235 : int16_t tmp_dmx_in[L_FRAME48k];
236 : mvr2s( signal_in - NS2SA( st->input_Fs, ACELP_LOOK_NS ), tmp_dmx_in, input_frame );
237 : dbgwrite( tmp_dmx_in, sizeof( int16_t ), input_frame, 1, strcat( fname( debug_dir, "ivas_input_dmx", 0, n + 1, ENC ), ".pcm" ) );
238 : }
239 : #endif
240 :
241 3102575 : lMemRecalc_12k8 = 0;
242 3102575 : lMemRecalc = 0;
243 3102575 : if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
244 : {
245 2235820 : lMemRecalc = NS2SA( st->input_Fs, L_MEM_RECALC_NS );
246 2235820 : lMemRecalc_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_NS );
247 : }
248 :
249 3102575 : input_Fs = st->input_Fs;
250 :
251 3102575 : localVAD_HE_SAD = 0;
252 3102575 : snr_sum_he = 0;
253 :
254 3102575 : corr_shiftL = 0;
255 3102575 : corr_shiftR = 0;
256 :
257 3102575 : if ( hSCE != NULL )
258 : {
259 732130 : *vad_hover_flag = 0;
260 : }
261 3102575 : st->sp_aud_decision1 = 0;
262 3102575 : st->sp_aud_decision2 = 0;
263 3102575 : st->coder_type = GENERIC;
264 3102575 : if ( st->hGSCEnc != NULL )
265 : {
266 875937 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
267 : }
268 3102575 : *attack_flag = 0;
269 :
270 3102575 : if ( st->Opt_SC_VBR )
271 : {
272 0 : st->hSC_VBR->bump_up = 0;
273 0 : st->hSC_VBR->ppp_mode = 0;
274 0 : st->hSC_VBR->nelp_mode = 0;
275 0 : st->hSC_VBR->avoid_HQ_VBR_NB = 0;
276 : }
277 :
278 3102575 : L_look = L_LOOK_12k8; /* lookahead at 12.8kHz */
279 :
280 3102575 : new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */
281 3102575 : inp_12k8 = new_inp_12k8 - L_look; /* pointer to the current frame of input signal in 12.8kHz core */
282 :
283 3102575 : if ( element_mode != IVAS_CPE_DFT )
284 : {
285 2967950 : new_inp_12k8 -= L_FILT;
286 : }
287 :
288 3102575 : if ( element_mode == IVAS_CPE_DFT )
289 : {
290 134625 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - STEREO_DFT_OVL_12k8 );
291 : }
292 2967950 : else if ( element_mode == IVAS_CPE_TD )
293 : {
294 9182 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - lMemRecalc_12k8 - L_FILT );
295 : }
296 : else
297 : {
298 2958768 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - L_FILT );
299 : }
300 :
301 3102575 : mvr2r( st->old_wsp, old_wsp, L_WSP_MEM );
302 3102575 : wsp = old_wsp + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */
303 :
304 3102575 : st->rf_mode = st->Opt_RF_ON;
305 :
306 3102575 : last_core_orig = st->last_core;
307 :
308 : /*--------------------------------------------------------------*
309 : * energy analysis
310 : *---------------------------------------------------------------*/
311 :
312 3102575 : if ( element_mode == IVAS_SCE || ( element_mode == IVAS_CPE_MDCT && st->Opt_DTX_ON ) )
313 : {
314 790006 : analysisCldfbEncoder( st, signal_in, input_frame, realBuffer, imagBuffer, enerBuffer );
315 : }
316 2312569 : else if ( ( element_mode == IVAS_CPE_TD && st->idchan == 0 ) || ( st->idchan == 1 && st->tdm_LRTD_flag ) )
317 : {
318 : /* cldfb analysis only for pri. channel */
319 8994 : analysisCldfbEncoder( st, signal_in - NS2SA( input_Fs, L_MEM_RECALC_TBE_NS ), input_frame, realBuffer, imagBuffer, enerBuffer );
320 : }
321 2303575 : else if ( element_mode == IVAS_CPE_DFT )
322 : {
323 134625 : calculate_energy_buffer( hCPE, enerBuffer, st->cldfbAnaEnc->no_channels, input_Fs );
324 : }
325 : else
326 : {
327 2168950 : set_f( enerBuffer, 0, CLDFB_NO_CHANNELS_MAX );
328 : }
329 :
330 : /*----------------------------------------------------------------*
331 : * Change the sampling frequency to 12.8 kHz
332 : * (if not available from downsampled DMX)
333 : *----------------------------------------------------------------*/
334 :
335 3102575 : if ( element_mode == IVAS_SCE )
336 : {
337 732130 : modify_Fs( signal_in, input_frame, input_Fs, new_inp_12k8, INT_FS_12k8, st->mem_decim, ( st->max_bwidth == NB ) );
338 :
339 732130 : mvr2r( st->mem_decim, mem_decim_dummy, 2 * L_FILT_MAX );
340 732130 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
341 732130 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( st->input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_12k8 + L_FRAME, INT_FS_12k8, mem_decim_dummy, 0 );
342 : }
343 2370445 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
344 : {
345 : /* reconstruct past segment of the Secondary channel input signal when switching from DFT stereo */
346 2235820 : if ( hCPE->last_element_mode == IVAS_CPE_DFT && st->idchan == 1 )
347 : {
348 2912 : int16_t length_inp = NS2SA( input_Fs, L_MEM_RECALC_SCH_NS );
349 2912 : int16_t length_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_SCH_NS );
350 :
351 2912 : modify_Fs( signal_in - lMemRecalc - length_inp, length_inp, input_Fs, new_inp_12k8 - lMemRecalc_12k8 - length_12k8, INT_FS_12k8, st->mem_decim, 0 );
352 : }
353 :
354 2235820 : modify_Fs( signal_in - lMemRecalc, input_frame, input_Fs, new_inp_12k8 - lMemRecalc_12k8, INT_FS_12k8, st->mem_decim, ( st->max_bwidth == NB ) );
355 2235820 : mvr2r( st->mem_decim, mem_decim_dummy, 2 * L_FILT_MAX );
356 :
357 2235820 : if ( lMemRecalc > 0 )
358 : {
359 2235820 : modify_Fs( signal_in + input_frame - lMemRecalc, lMemRecalc, input_Fs, new_inp_12k8 + L_FRAME - lMemRecalc_12k8, INT_FS_12k8, mem_decim_dummy, ( st->max_bwidth == NB ) );
360 : }
361 2235820 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
362 2235820 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_12k8 + L_FRAME, INT_FS_12k8, mem_decim_dummy, 0 );
363 : }
364 : else /* DFT stereo */
365 : {
366 : /* update the FIR resampling filter memory, needed for switching to time-domain (FIR) resampling */
367 134625 : mvr2r( signal_in + input_frame - NS2SA( input_Fs, L_MEM_RECALC_NS ) - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
368 : }
369 :
370 : /* save input resampled at 12.8kHz, non-preemhasised */
371 3102575 : if ( element_mode == IVAS_CPE_DFT )
372 : {
373 134625 : mvr2r( new_inp_12k8 - STEREO_DFT_OVL_12k8, st->buf_speech_enc + L_FRAME32k - STEREO_DFT_OVL_12k8, L_FRAME + STEREO_DFT_OVL_12k8 );
374 : }
375 2967950 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
376 : {
377 2235820 : mvr2r( new_inp_12k8 - lMemRecalc_12k8, st->buf_speech_enc + L_FRAME32k - lMemRecalc_12k8 - L_FILT, L_FRAME + lMemRecalc_12k8 + L_FILT );
378 : }
379 : else
380 : {
381 732130 : mvr2r( new_inp_12k8, st->buf_speech_enc + L_FRAME32k, L_FRAME );
382 : }
383 :
384 : /*------------------------------------------------------------------*
385 : * Perform fixed preemphasis (12.8 kHz signal) through 1 - g*z^-1
386 : *-----------------------------------------------------------------*/
387 :
388 3102575 : if ( element_mode == IVAS_CPE_DFT )
389 : {
390 134625 : mvr2r( new_inp_12k8 - STEREO_DFT_OVL_12k8 + L_FRAME, st->inp_12k8_mem_stereo_sw, STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ); /* memory for TD/DFT stereo switching */
391 :
392 134625 : st->mem_preemph = st->mem_preemph_DFT;
393 134625 : st->mem_preemph_DFT = old_inp_12k8[L_INP_MEM - STEREO_DFT_OVL_12k8 + L_FRAME - 1];
394 :
395 134625 : preemph( new_inp_12k8 - STEREO_DFT_OVL_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
396 134625 : dummy = st->mem_preemph;
397 134625 : preemph( new_inp_12k8 - STEREO_DFT_OVL_12k8 + L_FRAME, PREEMPH_FAC, STEREO_DFT_OVL_12k8, &dummy );
398 : }
399 2967950 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
400 : {
401 2235820 : if ( st->idchan == 0 )
402 : {
403 1117910 : if ( hCPE->last_element_mode == IVAS_CPE_DFT )
404 : {
405 2912 : st->mem_preemph = st->mem_preemph_DFT;
406 2912 : mvr2r( st->inp_12k8_mem_stereo_sw, new_inp_12k8 - L_MEM_RECALC_12K8 - ( STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ), STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT );
407 2912 : preemph( new_inp_12k8 - L_MEM_RECALC_12K8 - ( STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ), PREEMPH_FAC, STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT, &st->mem_preemph );
408 : }
409 :
410 1117910 : st->mem_preemph_DFT = old_inp_12k8[L_INP_MEM - STEREO_DFT_OVL_12k8 + L_FRAME - 1]; /* == inp_12k8[L_FRAME-1] */
411 : }
412 :
413 : /* preemphasise past segment of the Secondary channel input signal when switching from DFT stereo */
414 2235820 : if ( hCPE->last_element_mode == IVAS_CPE_DFT && st->idchan == 1 )
415 : {
416 2912 : int16_t length_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_SCH_NS );
417 2912 : preemph( new_inp_12k8 - lMemRecalc_12k8 - length_12k8, PREEMPH_FAC, length_12k8, &st->mem_preemph );
418 : }
419 :
420 2235820 : preemph( new_inp_12k8 - lMemRecalc_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
421 2235820 : dummy = st->mem_preemph;
422 2235820 : preemph( new_inp_12k8 - lMemRecalc_12k8 + L_FRAME, PREEMPH_FAC, lMemRecalc_12k8 + L_FILT, &dummy );
423 : }
424 : else /* IVAS_SCE or IVAS_CPE_MDCT */
425 : {
426 732130 : preemph( new_inp_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
427 732130 : dummy = st->mem_preemph;
428 732130 : preemph( new_inp_12k8 + L_FRAME, PREEMPH_FAC, L_FILT, &dummy );
429 : }
430 :
431 : /*-------------------------------------------------------------------------*
432 : * Spectral analysis
433 : *--------------------------------------------------------------------------*/
434 :
435 3102575 : analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff );
436 :
437 3102575 : if ( hStereoClassif != NULL )
438 : {
439 2370445 : if ( st->lp_speech - Etot > 25 )
440 : {
441 468852 : hStereoClassif->silence_flag = 2;
442 : }
443 : else
444 : {
445 1901593 : hStereoClassif->silence_flag = hStereoClassif->silence_flag - 1;
446 : }
447 2370445 : hStereoClassif->silence_flag = max( 0, hStereoClassif->silence_flag );
448 : }
449 :
450 : /*----------------------------------------------------------------*
451 : * SAD (1-signal, 0-noise)
452 : *----------------------------------------------------------------*/
453 :
454 3102575 : noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode );
455 :
456 3102575 : if ( element_mode == IVAS_CPE_TD && ( ( abs( hCPE->hStereoTD->tdm_last_ratio_idx - tdm_ratio_idx ) > 5 && st->idchan == 1 ) || abs( hCPE->hStereoTD->tdm_last_inst_ratio_idx - hCPE->hStereoTD->tdm_inst_ratio_idx ) > 10 ) )
457 : {
458 671 : st->ini_frame = 1;
459 : }
460 :
461 3102575 : st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f );
462 :
463 :
464 3102575 : if ( force_front_vad == 1 || front_vad_flag == 1 )
465 : {
466 : /* overwrite VAD decision with front-VAD decision if external VAD is set to 1*/
467 32381 : st->vad_flag = front_vad_flag;
468 32381 : st->localVAD = front_vad_flag;
469 : }
470 3102575 : if ( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL )
471 : {
472 3066757 : *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL, NULL );
473 : }
474 : else
475 : {
476 : /* This only applies to st->idchan==0 now */
477 : /* Add down mix stereo activity to LR vad_flag_dtx */
478 35818 : *vad_flag_dtx = *vad_flag_dtx || st->vad_flag;
479 :
480 :
481 : /* Determine hangover flag status based on LR localVAD and downmix localVAD */
482 35818 : *vad_hover_flag = *vad_flag_dtx && !( LR_localVAD || st->localVAD );
483 : }
484 :
485 3102575 : if ( force_front_vad == 1 || front_vad_dtx_flag == 1 )
486 : {
487 : /* overwrite VAD decision with front-VAD decision if external VAD is set to 1*/
488 15255 : *vad_flag_dtx = front_vad_dtx_flag;
489 : }
490 :
491 : /*----------------------------------------------------------------*
492 : * NB/WB/SWB/FB bandwidth detector
493 : *----------------------------------------------------------------*/
494 :
495 3102575 : if ( st->idchan == 0 && element_mode != IVAS_CPE_MDCT )
496 : {
497 871346 : bw_detect( st, st->input, NULL, enerBuffer, ivas_format, 0 );
498 : }
499 :
500 3102575 : if ( element_mode != IVAS_CPE_MDCT ) /* in MDCT stereo, set_bw_stereo() is used instead */
501 : {
502 875937 : set_bw( element_mode, element_brate, st, MODE1 );
503 : }
504 :
505 : /* set the BW of the TD secondary channel in LRTD mode same as BW of the primary channel (only at higher bitrates) */
506 3102575 : if ( st->idchan == 1 && element_mode == IVAS_CPE_TD && st->tdm_LRTD_flag == 1 && st->bits_frame_channel >= IVAS_16k4 / FRAMES_PER_SEC )
507 : {
508 4403 : st->bwidth = hCPE->hCoreCoder[0]->bwidth;
509 : }
510 :
511 : /*----------------------------------------------------------------*
512 : * Noise energy down-ward update and total noise energy estimation
513 : * Long-term energies and relative frame energy updates
514 : * Correlation correction as a function of total noise level
515 : *----------------------------------------------------------------*/
516 :
517 3102575 : noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 );
518 :
519 3102575 : if ( lr_vad_enabled && st->idchan == 0 )
520 : {
521 35818 : noise_est_down( fr_bands_LR[0], hCPE->hFrontVad[0]->hNoiseEst->bckr, tmpN_LR[0], tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise, Etot_LR[0], &hCPE->hFrontVad[0]->hNoiseEst->Etot_last, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2 );
522 35818 : noise_est_down( fr_bands_LR[1], hCPE->hFrontVad[1]->hNoiseEst->bckr, tmpN_LR[1], tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise, Etot_LR[1], &hCPE->hFrontVad[1]->hNoiseEst->Etot_last, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2 );
523 35818 : corr_shiftL = correlation_shift( hCPE->hFrontVad[0]->hNoiseEst->totalNoise );
524 35818 : corr_shiftR = correlation_shift( hCPE->hFrontVad[1]->hNoiseEst->totalNoise );
525 : }
526 :
527 3102575 : *relE = Etot - st->lp_speech;
528 :
529 3102575 : corr_shift = correlation_shift( st->hNoiseEst->totalNoise );
530 :
531 : /*----------------------------------------------------------------*
532 : * FD-CNG Noise Estimator
533 : *----------------------------------------------------------------*/
534 :
535 3102575 : if ( st->hFdCngEnc != NULL )
536 : {
537 212017 : resetFdCngEnc( st );
538 :
539 212017 : if ( st->idchan == 0 || element_mode == IVAS_CPE_MDCT )
540 : {
541 211967 : if ( element_mode == IVAS_CPE_TD && lr_vad_enabled && band_energies_LR != NULL )
542 : {
543 1800 : perform_noise_estimation_enc( band_energies_LR, enerBuffer, st->hFdCngEnc, input_Fs, hCPE );
544 : }
545 : else
546 : {
547 210167 : perform_noise_estimation_enc( band_energies, enerBuffer, st->hFdCngEnc, input_Fs, hCPE );
548 : }
549 : }
550 : }
551 :
552 : /*-----------------------------------------------------------------*
553 : * Select SID or FRAME_NO_DATA frame if DTX enabled
554 : *-----------------------------------------------------------------*/
555 :
556 3102575 : if ( hCPE != NULL && element_mode != IVAS_CPE_DFT && element_mode != IVAS_CPE_MDCT )
557 : {
558 9182 : *vad_flag_dtx = 1;
559 : }
560 :
561 3102575 : if ( st->Opt_DTX_ON == 1 && *vad_flag_dtx == 0 && element_mode == IVAS_CPE_DFT && element_brate <= ACELP_16k40 && hCPE->hStereoDft->hConfig->force_mono_transmission == 1 ) /* force LP_CNG usage for MASA DTX when mono tranmission */
562 : {
563 266 : st->cng_type = LP_CNG;
564 : }
565 :
566 3102575 : dtx( st, last_ivas_total_brate, ivas_total_brate, *vad_flag_dtx, inp_12k8 );
567 :
568 3102575 : if ( hCPE != NULL && hCPE->hStereoDft != NULL && st->core_brate == SID_2k40 )
569 : {
570 : /* Add another period of expected xcorr updates */
571 1636 : hCPE->hStereoDft->expectedNumUpdates += st->hDtxEnc->max_SID;
572 : }
573 :
574 : /*----------------------------------------------------------------*
575 : * Adjust FD-CNG Noise Estimator
576 : *----------------------------------------------------------------*/
577 :
578 3102575 : if ( st->hFdCngEnc != NULL && ( st->ini_frame == 0 || last_element_brate != element_brate || st->last_bwidth != st->bwidth ) )
579 : {
580 : int32_t total_brate;
581 :
582 6889 : total_brate = ( element_mode == IVAS_SCE ) ? st->total_brate : st->bits_frame_nominal * FRAMES_PER_SEC;
583 6889 : configureFdCngEnc( st->hFdCngEnc, max( st->input_bwidth, WB ), total_brate );
584 6889 : if ( hCPE != NULL )
585 : {
586 1452 : st->hFdCngEnc->hFdCngCom->CngBitrate = hCPE->element_brate - 1;
587 : }
588 : }
589 :
590 3102575 : if ( st->hFdCngEnc != NULL && st->Opt_DTX_ON )
591 : {
592 212017 : AdjustFirstSID( st );
593 : }
594 :
595 : /*----------------------------------------------------------------*
596 : * LP analysis
597 : *----------------------------------------------------------------*/
598 :
599 3102575 : alw_pitch_lag_12k8[0] = st->old_pitch_la;
600 3102575 : alw_pitch_lag_12k8[1] = st->old_pitch_la;
601 3102575 : alw_voicing[0] = st->voicing[2];
602 3102575 : alw_voicing[1] = st->voicing[2];
603 :
604 3102575 : i = 0;
605 3102575 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 && hCPE->hStereoTD->tdm_low_rate_mode == 1 )
606 : {
607 0 : i = 1;
608 : }
609 :
610 3102575 : analy_lp( inp_12k8, L_FRAME, L_look, ener, A, epsP, lsp_new, lsp_mid, st->lsp_old1, alw_pitch_lag_12k8, alw_voicing, INT_FS_12k8, i );
611 :
612 3102575 : lsp2lsf( lsp_new, lsf_new, M, INT_FS_12k8 );
613 3102575 : stab_fac = lsf_stab( lsf_new, st->lsf_old1, 0, L_FRAME );
614 3102575 : mvr2r( lsf_new, st->lsf_old1, M );
615 :
616 3102575 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 )
617 : {
618 : /*----------------------------------------------------------------*
619 : * Comparison of the LP coefficents to determine if it is possible
620 : * to reuse the primary channel LP coefficients in the secondary channel
621 : *----------------------------------------------------------------*/
622 :
623 4591 : hCPE->hStereoTD->tdm_lp_reuse_flag = tdm_lp_comparison( hCPE->hStereoTD, hCPE->hStereoClassif, st, inp_12k8, tdm_A_PCh, A, M, tdm_lsp_new_PCh, lsp_new, L_FRAME, element_brate - nb_bits_metadata * FRAMES_PER_SEC );
624 : }
625 :
626 : /*----------------------------------------------------------------*
627 : * Compute weighted input (for OL pitch analysis)
628 : * OL pitch analysis
629 : * stable high pitch detection
630 : * 1/4 pitch precision improvement
631 : *----------------------------------------------------------------*/
632 :
633 3102575 : find_wsp( L_FRAME, L_SUBFR, NB_SUBFR, A, Aw, inp_12k8, TILT_FAC, wsp, &st->mem_wsp, GAMMA1, L_look );
634 :
635 3102575 : if ( st->vad_flag == 0 )
636 : {
637 : /* reset the OL pitch tracker memories during inactive frames */
638 396490 : pitch_ol_init( &st->old_thres, &st->old_pitch, &st->delta_pit, &st->old_corr );
639 : }
640 :
641 3102575 : old_pitch1 = st->pitch[1];
642 :
643 3102575 : pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, *relE, L_look, st->clas, st->input_bwidth, st->Opt_SC_VBR );
644 :
645 : /* Updates for adaptive lag window memory */
646 3102575 : st->old_pitch_la = st->pitch[2];
647 :
648 : /* Detection of very short stable pitch period */
649 3102575 : StableHighPitchDetect( &flag_spitch, st->pitch, st->voicing, st->Bin_E, wsp, st->localVAD, &st->voicing_sm, &st->voicing0_sm, &st->LF_EnergyRatio_sm, &st->predecision_flag, &st->diff_sm, &st->energy_sm );
650 :
651 : /* 1/4 pitch precision improvement */
652 3102575 : if ( element_brate <= IVAS_32k )
653 : {
654 629240 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[0], &pitch_fr[0], &voicing_fr[0], 0, wsp, 7 );
655 629240 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[0], &pitch_fr[1], &voicing_fr[1], L_SUBFR, wsp, 7 );
656 629240 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[1], &pitch_fr[2], &voicing_fr[2], 2 * L_SUBFR, wsp, 7 );
657 629240 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[1], &pitch_fr[3], &voicing_fr[3], 3 * L_SUBFR, wsp, 7 );
658 : }
659 : else
660 : {
661 2473335 : pitch_fr[0] = st->pitch[0];
662 2473335 : pitch_fr[1] = st->pitch[0];
663 2473335 : pitch_fr[2] = st->pitch[1];
664 2473335 : pitch_fr[3] = st->pitch[1];
665 :
666 2473335 : voicing_fr[0] = st->voicing[0];
667 2473335 : voicing_fr[1] = st->voicing[0];
668 2473335 : voicing_fr[2] = st->voicing[1];
669 2473335 : voicing_fr[3] = st->voicing[1];
670 : }
671 :
672 : /*------------------------------------------------------------------*
673 : * Update estimated noise energy and voicing cut-off frequency
674 : *-----------------------------------------------------------------*/
675 :
676 3102575 : noise_est( st, old_pitch1, tmpN, epsP, Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div,
677 3102575 : &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame );
678 :
679 3102575 : if ( lr_vad_enabled && st->idchan == 0 )
680 : {
681 : /* Run noise_est for Left and Right channel */
682 35818 : *loc_harmLR = *loc_harm;
683 35818 : noise_est( st, old_pitch1, tmpN_LR[0], epsP, Etot_LR[0], Etot_LR[0] - hCPE->hFrontVad[0]->lp_speech, corr_shiftL, tmpE_LR[0], fr_bands_LR[0], &cor_map_sum_LR[0], &ncharX_LR, &sp_div_LR,
684 35818 : &non_staX_LR, loc_harmLR, lf_E_LR[0], &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp, &dummy, S_map_LR, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame );
685 :
686 : /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */
687 35818 : noise_est( st, old_pitch1, tmpN_LR[1], epsP, Etot_LR[1], Etot_LR[1] - hCPE->hFrontVad[1]->lp_speech, corr_shiftR, tmpE_LR[1], fr_bands_LR[1], &cor_map_sum_LR[1], &ncharX_LR, &sp_div_LR,
688 35818 : &non_staX_LR, loc_harmLR, lf_E_LR[1], &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp, &dummy, S_map_LR, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame );
689 : }
690 :
691 : /*------------------------------------------------------------------*
692 : * Update parameters used in the VAD and DTX
693 : *-----------------------------------------------------------------*/
694 :
695 3102575 : vad_param_updt( st, corr_shift, corr_shift, A, old_pitch1, NULL, 1 );
696 :
697 3102575 : if ( lr_vad_enabled && st->idchan == 0 )
698 : {
699 35818 : vad_param_updt( st, corr_shiftL, corr_shiftR, A, old_pitch1, &hCPE->hFrontVad[0], CPE_CHANNELS );
700 : }
701 :
702 : /*-----------------------------------------------------------------*
703 : * Find spectral tilt
704 : * UC and VC frame selection
705 : *-----------------------------------------------------------------*/
706 :
707 3102575 : find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR );
708 :
709 3102575 : st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif );
710 :
711 : /*-----------------------------------------------------------------*
712 : * channel aware mode configuration *
713 : *-----------------------------------------------------------------*/
714 :
715 3102575 : st->rf_mode = 0;
716 3102575 : st->rf_target_bits_write = 0;
717 :
718 : /*-----------------------------------------------------------------*
719 : * Signal classification for FEC
720 : * TC frame selection
721 : *-----------------------------------------------------------------*/
722 :
723 3102575 : st->clas = signal_clas( st, inp_12k8, ee, *relE, L_look, tdm_SM_last_clas );
724 :
725 3102575 : select_TC( MODE1, st->tc_cnt, &st->coder_type, st->localVAD );
726 :
727 3102575 : if ( st->Opt_SC_VBR )
728 : {
729 0 : st->hSC_VBR->Local_VAD = st->localVAD;
730 : }
731 :
732 : /*-----------------------------------------------------------------*
733 : * Collect stereo classifier features
734 : *-----------------------------------------------------------------*/
735 :
736 3102575 : if ( hStereoClassif != NULL )
737 : {
738 2370445 : stereo_classifier_features( hStereoClassif, st->idchan, element_mode, localVAD_HE_SAD, lsf_new, epsP, st->pitch, st->voicing, *cor_map_sum, non_staX, sp_div, st->clas );
739 : }
740 :
741 : /*----------------------------------------------------------------*
742 : * 1st stage speech/music classification (GMM model)
743 : *----------------------------------------------------------------*/
744 :
745 3102575 : smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch );
746 :
747 : #ifdef DEBUGGING
748 : if ( st->idchan == 0 )
749 : {
750 : if ( st->force == FORCE_SPEECH )
751 : {
752 : /* enforce speech */
753 : st->sp_aud_decision0 = 0;
754 : }
755 : else if ( st->force == FORCE_MUSIC )
756 : {
757 : /* enforce music */
758 : st->sp_aud_decision0 = 1;
759 : }
760 : }
761 : #endif
762 :
763 : /*----------------------------------------------------------------*
764 : * VAD energy updates
765 : * Update of old per-band energy spectrum
766 : *----------------------------------------------------------------*/
767 :
768 3102575 : long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL );
769 :
770 3102575 : mvr2r( fr_bands + NB_BANDS, st->hNoiseEst->enrO, NB_BANDS );
771 :
772 3102575 : if ( lr_vad_enabled && st->idchan == 0 )
773 : {
774 35818 : long_enr( st, -1, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR );
775 :
776 35818 : mvr2r( fr_bands_LR[0] + NB_BANDS, hCPE->hFrontVad[0]->hNoiseEst->enrO, NB_BANDS );
777 35818 : mvr2r( fr_bands_LR[1] + NB_BANDS, hCPE->hFrontVad[1]->hNoiseEst->enrO, NB_BANDS );
778 : }
779 :
780 : /*----------------------------------------------------------------*
781 : * SNR-based speech/music classification
782 : * AC frame selection
783 : *----------------------------------------------------------------*/
784 :
785 3102575 : st->GSC_IVAS_mode = 0;
786 3102575 : if ( st->idchan == 1 && element_mode == IVAS_CPE_TD )
787 : {
788 : /* No speech/music classification in the secondary channel of TD stereo */
789 4591 : st->sp_aud_decision1 = 0;
790 4591 : st->sp_aud_decision2 = 0;
791 :
792 4591 : st->GSC_noisy_speech = 0;
793 4591 : if ( st->hGSCEnc != NULL )
794 : {
795 4591 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP3;
796 : }
797 : }
798 3097984 : else if ( element_mode != IVAS_CPE_MDCT )
799 : {
800 : /* SNR-based speech/music classification */
801 871346 : if ( ( element_mode >= IVAS_CPE_DFT && element_brate >= IVAS_24k4 ) || ( element_mode == IVAS_SCE && element_brate >= SCE_SMC_THR ) )
802 : {
803 699845 : if ( ivas_format == SBA_FORMAT && st->core_brate != FRAME_NO_DATA && st->last_core_brate == FRAME_NO_DATA && st->sba_br_sw_while_no_data )
804 : {
805 0 : SetModeIndex( st, st->bits_frame_nominal * FRAMES_PER_SEC, element_mode, MCT_flag );
806 0 : st->sba_br_sw_while_no_data = 0;
807 : }
808 699845 : else if ( ivas_format == SBA_FORMAT && st->core_brate == FRAME_NO_DATA && element_brate != last_element_brate )
809 : {
810 0 : st->sba_br_sw_while_no_data = 1;
811 : }
812 :
813 699845 : if ( flag_16k_smc )
814 : {
815 : /* Compute core-coder buffers at internal sampling rate */
816 : #ifdef FIX_2344_ALIGN_PREPROC
817 31514 : ivas_compute_core_buffers( st, NULL, old_inp_16k, NULL, input_frame, IVAS_SCE /*last_element_mode*/, INT_FS_16k /*sr_core_tmp*/, ener, A, Aw, epsP, lsp_new, lsp_mid );
818 : #else
819 : error = ivas_compute_core_buffers( st, NULL, old_inp_16k, NULL, input_frame, IVAS_SCE /*last_element_mode*/, INT_FS_16k /*sr_core_tmp*/, ener, A, Aw, epsP, lsp_new, lsp_mid );
820 : if ( error != IVAS_ERR_OK )
821 : {
822 : return error;
823 : }
824 : #endif
825 :
826 31514 : smc_dec = ivas_acelp_tcx20_switching( st, st->speech_enc, st->wspeech_enc, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc );
827 : }
828 : else
829 : {
830 668331 : smc_dec = ivas_acelp_tcx20_switching( st, inp_12k8, wsp, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc );
831 : }
832 : }
833 : /* Switch to ACELP for non-harmonic transient signals */
834 171501 : else if ( ( ( element_mode >= IVAS_CPE_DFT && element_brate <= IVAS_16k4 ) || ( element_mode == IVAS_SCE && element_brate < SCE_SMC_THR ) ) && ( loc_harm[0] != 1 ) && smc_dec == MUSIC )
835 : {
836 16057 : if ( element_mode == IVAS_SCE )
837 : {
838 9926 : if ( transient_analysis( st->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) )
839 : {
840 745 : smc_dec = SPEECH;
841 : }
842 : }
843 6131 : else if ( element_mode == IVAS_CPE_DFT )
844 : {
845 18393 : for ( i = 0; i < CPE_CHANNELS; i++ )
846 : {
847 12262 : if ( smc_dec != SPEECH && transient_analysis( hCPE->hCoreCoder[i]->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) )
848 : {
849 472 : smc_dec = SPEECH; /* overwrite initial music decision, initial SPEECH_MUSIC never changed */
850 : }
851 : }
852 : }
853 : }
854 :
855 : /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */
856 871346 : ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch );
857 : }
858 :
859 : /*----------------------------------------------------------------*
860 : * Final VAD correction (when HE-SAD is used instead of the normal VAD,
861 : * rewrite the VAD flag by VAD flag with DTX hangover for further processing)
862 : *----------------------------------------------------------------*/
863 :
864 3102575 : if ( st->Opt_DTX_ON && element_mode != IVAS_CPE_DFT )
865 : {
866 179749 : st->vad_flag = *vad_flag_dtx;
867 : }
868 :
869 : /*-----------------------------------------------------------------*
870 : * Update old input signal buffer
871 : *-----------------------------------------------------------------*/
872 :
873 3102575 : mvr2r( &old_inp_12k8[L_FRAME], st->old_inp_12k8, L_INP_MEM );
874 :
875 3102575 : pop_wmops();
876 : #ifdef FIX_2344_ALIGN_PREPROC
877 3102575 : return;
878 : #else
879 : return error;
880 : #endif
881 : }
882 :
883 :
884 : /*-------------------------------------------------------------------*
885 : * calculate_energy_buffer()
886 : *
887 : * calculate DFT-based energies
888 : *--------------------------------------------------------------------*/
889 :
890 134625 : static void calculate_energy_buffer(
891 : CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */
892 : float enerBuffer_dft[], /* o : energy buffer */
893 : const int16_t no_channels, /* i : no. of used CLDFB channels */
894 : const int32_t input_Fs /* i : input sampling rate */
895 : )
896 : {
897 : int16_t i, j;
898 : float *pDFT_DMX, *p_nrg_DMX;
899 : float nrg_DMX[CLDFB_NO_CHANNELS_MAX];
900 : float band_res_dft, chan_width_f, chan_width_bins;
901 : int16_t start, stop;
902 :
903 134625 : band_res_dft = ( (float) input_Fs ) / hCPE->hStereoDft->NFFT;
904 134625 : chan_width_f = 24000.f / CLDFB_NO_CHANNELS_MAX;
905 134625 : chan_width_bins = chan_width_f / band_res_dft;
906 :
907 134625 : set_f( nrg_DMX, 0, CLDFB_NO_CHANNELS_MAX );
908 :
909 134625 : pDFT_DMX = hCPE->hStereoDft->DFT[0];
910 134625 : start = 1;
911 134625 : p_nrg_DMX = nrg_DMX;
912 :
913 134625 : *p_nrg_DMX += (float) ( pDFT_DMX[0] * pDFT_DMX[0] );
914 7003545 : for ( i = 0; i < no_channels; i++ )
915 : {
916 6868920 : stop = (int16_t) ( ( i + 1 ) * chan_width_bins + 0.5f );
917 116637015 : for ( j = start; j < stop; j++ )
918 : {
919 109768095 : *p_nrg_DMX += (float) ( pDFT_DMX[2 * j] * pDFT_DMX[2 * j] + pDFT_DMX[2 * j + 1] * pDFT_DMX[2 * j + 1] );
920 : }
921 6868920 : start = stop;
922 6868920 : p_nrg_DMX++;
923 : }
924 :
925 7003545 : for ( i = 0; i < no_channels; i++ ) /* Consider only used channels, dependent on Fs */
926 : {
927 6868920 : enerBuffer_dft[i] = nrg_DMX[i] / 3;
928 : }
929 :
930 : /* Set remaining entries of enerBuffer to zero */
931 1343205 : for ( ; i < CLDFB_NO_CHANNELS_MAX; i++ )
932 : {
933 1208580 : enerBuffer_dft[i] = 0.f;
934 : }
935 :
936 134625 : return;
937 : }
|