Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #ifdef DEBUGGING
36 : #include "debug.h"
37 : #include <string.h>
38 : #endif
39 : #include "cnst.h"
40 : #include "ivas_cnst.h"
41 : #include "rom_enc.h"
42 : #include "rom_com.h"
43 : #include "prot.h"
44 : #include "ivas_prot.h"
45 : #include "wmc_auto.h"
46 : #include <math.h>
47 :
48 :
49 : /*---------------------------------------------------------------*
50 : * Local constants
51 : *---------------------------------------------------------------*/
52 :
53 : #define SCE_SMC_THR 16000
54 :
55 :
56 : /*-------------------------------------------------------------------*
57 : * Local function prototypes
58 : *--------------------------------------------------------------------*/
59 :
60 : static void calculate_energy_buffer( CPE_ENC_HANDLE hCPE, float enerBuffer_dft[], const int16_t no_channels, const int32_t input_Fs );
61 :
62 :
63 : /*-------------------------------------------------------------------*
64 : * pre_proc_front_ivas()
65 : *
66 : * Front Pre-processing for IVAS
67 : * (resampling, spectral analysis, LP analysis, VAD, OL pitch calculation, classification)
68 : *--------------------------------------------------------------------*/
69 :
70 1132944 : ivas_error pre_proc_front_ivas(
71 : SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */
72 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
73 : const int32_t element_brate, /* i : SCE/CPE element bitrate */
74 : const int16_t nb_bits_metadata, /* i : number of metadata bits */
75 : const int16_t input_frame, /* i : frame length */
76 : const int16_t n, /* i : channel number */
77 : float old_inp_12k8[], /* o : buffer of old input signal */
78 : float old_inp_16k[], /* o : buffer of old input signal @16kHz */
79 : float *ener, /* o : residual energy from Levinson-Durbin */
80 : float *relE, /* o : frame relative energy */
81 : float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */
82 : float Aw[NB_SUBFR16k * ( M + 1 )], /* o : weighted A(z) unquantized for subframes */
83 : float epsP[M + 1], /* o : LP prediction errors */
84 : float lsp_new[M], /* o : LSPs at the end of the frame */
85 : float lsp_mid[M], /* o : LSPs in the middle of the frame */
86 : int16_t *vad_hover_flag, /* o : VAD hangover flag */
87 : int16_t *attack_flag, /* o : flag signaling attack */
88 : float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */
89 : float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */
90 : float old_wsp[], /* o : weighted input signal buffer */
91 : float pitch_fr[NB_SUBFR], /* o : fractional pitch values */
92 : float voicing_fr[NB_SUBFR], /* o : fractional pitch gains */
93 : int16_t *loc_harm, /* o : harmonicity flag */
94 : float *cor_map_sum, /* o : speech/music clasif. parameter */
95 : int16_t *vad_flag_dtx, /* o : HE-SAD flag with additional DTX HO */
96 : float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* o : energy buffer */
97 : float fft_buff[2 * L_FFT], /* o : FFT buffer */
98 : const float tdm_A_PCh[M + 1], /* i : unq. LP coeff. of primary channel */
99 : const float tdm_lsp_new_PCh[M], /* i : unq. LSPs of primary channel */
100 : const float currFlatness, /* i : flatness parameter */
101 : const int16_t tdm_ratio_idx, /* i : Current Ratio_L index */
102 : float fr_bands_LR[][2 * NB_BANDS], /* i : energy in frequency bands */
103 : const float Etot_LR[], /* i : total energy Left & Right channel */
104 : float lf_E_LR[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */
105 : const int16_t localVAD_HE_SAD_LR[], /* i : HE-SAD flag without hangover, LR channels */
106 : float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN */
107 : const int16_t flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */
108 : const int16_t front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */
109 : const int16_t force_front_vad, /* i : flag to force VAD decision */
110 : const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/
111 : const IVAS_FORMAT ivas_format, /* i : IVAS format */
112 : const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
113 : const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */
114 : const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */
115 : )
116 : {
117 : float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */
118 : float *wsp; /* weighted input signal buffer */
119 : float Etot; /* total energy */
120 : float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */
121 : float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */
122 : float tmpN[NB_BANDS]; /* Temporary noise update */
123 : float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */
124 : float tmpN_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update */
125 : float tmpE_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary averaged energy of 2 sf. */
126 : float cor_map_sum_LR[CPE_CHANNELS]; /* speech/music clasif. parameter */
127 : float non_staX_LR; /* non-stationarity for sp/mus classifier */
128 : float ncharX_LR; /* noise character for sp/mus classifier */
129 : float sp_div_LR; /* spectral diversity feature */
130 : float S_map_LR[L_FFT / 2]; /* short-term correlation map */
131 : float corr_shiftL; /* correlation shift */
132 : float corr_shiftR; /* correlation shift */
133 : int16_t loc_harmLR[CPE_CHANNELS]; /* harmonicity flag */
134 : int16_t lr_vad_enabled; /* LR VAD indicator */
135 : float ee[2]; /* Spectral tilt */
136 : float corr_shift; /* correlation shift */
137 : float sp_div, PS[128]; /* speech/music clasif. parameters */
138 : int16_t L_look; /* length of look-ahead */
139 : float snr_sum_he; /* HE SAD parameters */
140 : float hp_E[2]; /* Energy in HF */
141 : int16_t flag_spitch;
142 : int16_t high_lpn_flag;
143 : float lsf_new[M];
144 : float band_energies[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */
145 : int16_t localVAD_HE_SAD;
146 : float non_staX;
147 : float stab_fac;
148 : int16_t alw_pitch_lag_12k8[2];
149 : float alw_voicing[2];
150 : int16_t last_core_orig;
151 : float dummy;
152 : float mem_decim_dummy[2 * L_FILT_MAX]; /* dummy decimation filter memory */
153 : float S_map[L_FFT / 2];
154 : int16_t i, lMemRecalc, lMemRecalc_12k8;
155 : int16_t smc_dec;
156 : float ncharX, dE1X;
157 : Encoder_State *st;
158 : float *signal_in;
159 : int16_t element_mode;
160 : int32_t input_Fs, last_element_brate;
161 : int16_t *tdm_SM_last_clas, tmpS;
162 : float *res_cod_SNR_M, tmpF[STEREO_DFT_BAND_MAX];
163 : STEREO_CLASSIF_HANDLE hStereoClassif;
164 : float temp1F_icatdmResampBuf[L_FILT_MAX]; /* temp buffers for ICA TDM resamplers */
165 : int16_t old_pitch1; /* previous frame OL pitch[1] @12.8 kHz */
166 : int16_t LR_localVAD;
167 : ivas_error error;
168 :
169 1132944 : push_wmops( "pre_proc_front" );
170 :
171 : /*------------------------------------------------------------------*
172 : * Initialization
173 : *------------------------------------------------------------------*/
174 :
175 1132944 : error = IVAS_ERR_OK;
176 :
177 1132944 : tmpS = 0;
178 1132944 : tdm_SM_last_clas = &tmpS;
179 1132944 : set_f( tmpF, 0, STEREO_DFT_BAND_MAX );
180 1132944 : res_cod_SNR_M = tmpF;
181 :
182 1132944 : LR_localVAD = 0;
183 :
184 1132944 : if ( hSCE != NULL )
185 : {
186 350913 : st = hSCE->hCoreCoder[n];
187 350913 : signal_in = hSCE->hCoreCoder[n]->input;
188 350913 : element_mode = IVAS_SCE;
189 350913 : last_element_brate = hSCE->last_element_brate;
190 350913 : hStereoClassif = NULL;
191 350913 : lr_vad_enabled = 0;
192 : }
193 : else /* CPE */
194 : {
195 782031 : st = hCPE->hCoreCoder[n];
196 782031 : signal_in = hCPE->hCoreCoder[n]->input;
197 782031 : element_mode = hCPE->element_mode;
198 782031 : last_element_brate = hCPE->last_element_brate;
199 782031 : hStereoClassif = hCPE->hStereoClassif;
200 782031 : lr_vad_enabled = 0;
201 782031 : if ( hCPE->hFrontVad[0] != NULL && hCPE->element_mode != IVAS_CPE_MDCT )
202 : {
203 33633 : lr_vad_enabled = 1;
204 : }
205 :
206 782031 : if ( lr_vad_enabled && n == 0 )
207 : {
208 : /* Combine localVAD and vad_flag from LR processing */
209 31968 : LR_localVAD = hCPE->hCoreCoder[0]->localVAD || hCPE->hCoreCoder[1]->localVAD;
210 : }
211 :
212 782031 : if ( hCPE->hStereoTD != NULL )
213 : {
214 7582 : tdm_SM_last_clas = &hCPE->hStereoTD->tdm_SM_last_clas[n];
215 7582 : mvs2s( hCPE->hStereoTD->tdm_SM_last_clas, hCPE->hStereoTD->tdm_SM_last2_clas, CPE_CHANNELS );
216 : }
217 :
218 782031 : if ( hCPE->hStereoDft != NULL )
219 : {
220 59679 : res_cod_SNR_M = hCPE->hStereoDft->res_cod_SNR_M;
221 : }
222 : }
223 :
224 : #ifdef DEBUG_MODE_INFO
225 : if ( !( hCPE != NULL && hCPE->hStereoTD != NULL && n > 0 ) )
226 : {
227 : /* for TD stereo only write out first channel. The existence of a second channel can vary, this is just easier to handle */
228 : int16_t tmp_dmx_in[L_FRAME48k];
229 : mvr2s( signal_in - NS2SA( st->input_Fs, ACELP_LOOK_NS ), tmp_dmx_in, input_frame );
230 : dbgwrite( tmp_dmx_in, sizeof( int16_t ), input_frame, 1, strcat( fname( debug_dir, "ivas_input_dmx", 0, n + 1, ENC ), ".pcm" ) );
231 : }
232 : #endif
233 :
234 1132944 : lMemRecalc_12k8 = 0;
235 1132944 : lMemRecalc = 0;
236 1132944 : if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
237 : {
238 722352 : lMemRecalc = NS2SA( st->input_Fs, L_MEM_RECALC_NS );
239 722352 : lMemRecalc_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_NS );
240 : }
241 :
242 1132944 : input_Fs = st->input_Fs;
243 :
244 1132944 : localVAD_HE_SAD = 0;
245 1132944 : snr_sum_he = 0;
246 :
247 1132944 : corr_shiftL = 0;
248 1132944 : corr_shiftR = 0;
249 :
250 1132944 : if ( hSCE != NULL )
251 : {
252 350913 : *vad_hover_flag = 0;
253 : }
254 1132944 : st->sp_aud_decision1 = 0;
255 1132944 : st->sp_aud_decision2 = 0;
256 1132944 : st->coder_type = GENERIC;
257 1132944 : if ( st->hGSCEnc != NULL )
258 : {
259 418174 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
260 : }
261 1132944 : *attack_flag = 0;
262 :
263 1132944 : if ( st->Opt_SC_VBR )
264 : {
265 0 : st->hSC_VBR->bump_up = 0;
266 0 : st->hSC_VBR->ppp_mode = 0;
267 0 : st->hSC_VBR->nelp_mode = 0;
268 0 : st->hSC_VBR->avoid_HQ_VBR_NB = 0;
269 : }
270 :
271 1132944 : L_look = L_LOOK_12k8; /* lookahead at 12.8kHz */
272 :
273 1132944 : new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */
274 1132944 : inp_12k8 = new_inp_12k8 - L_look; /* pointer to the current frame of input signal in 12.8kHz core */
275 :
276 1132944 : if ( element_mode != IVAS_CPE_DFT )
277 : {
278 1073265 : new_inp_12k8 -= L_FILT;
279 : }
280 :
281 1132944 : if ( element_mode == IVAS_CPE_DFT )
282 : {
283 59679 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - STEREO_DFT_OVL_12k8 );
284 : }
285 1073265 : else if ( element_mode == IVAS_CPE_TD )
286 : {
287 7582 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - lMemRecalc_12k8 - L_FILT );
288 : }
289 : else
290 : {
291 1065683 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - L_FILT );
292 : }
293 :
294 1132944 : mvr2r( st->old_wsp, old_wsp, L_WSP_MEM );
295 1132944 : wsp = old_wsp + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */
296 :
297 1132944 : st->rf_mode = st->Opt_RF_ON;
298 :
299 1132944 : last_core_orig = st->last_core;
300 :
301 : /*--------------------------------------------------------------*
302 : * energy analysis
303 : *---------------------------------------------------------------*/
304 :
305 1132944 : if ( element_mode == IVAS_SCE || ( element_mode == IVAS_CPE_MDCT && st->Opt_DTX_ON ) )
306 : {
307 387369 : analysisCldfbEncoder( st, signal_in, input_frame, realBuffer, imagBuffer, enerBuffer );
308 : }
309 745575 : else if ( ( element_mode == IVAS_CPE_TD && st->idchan == 0 ) || ( st->idchan == 1 && st->tdm_LRTD_flag ) )
310 : {
311 : /* cldfb analysis only for pri. channel */
312 7456 : analysisCldfbEncoder( st, signal_in - NS2SA( input_Fs, L_MEM_RECALC_TBE_NS ), input_frame, realBuffer, imagBuffer, enerBuffer );
313 : }
314 738119 : else if ( element_mode == IVAS_CPE_DFT )
315 : {
316 59679 : calculate_energy_buffer( hCPE, enerBuffer, st->cldfbAnaEnc->no_channels, input_Fs );
317 : }
318 : else
319 : {
320 678440 : set_f( enerBuffer, 0, CLDFB_NO_CHANNELS_MAX );
321 : }
322 :
323 : /*----------------------------------------------------------------*
324 : * Change the sampling frequency to 12.8 kHz
325 : * (if not available from downsampled DMX)
326 : *----------------------------------------------------------------*/
327 :
328 1132944 : if ( element_mode == IVAS_SCE )
329 : {
330 350913 : modify_Fs( signal_in, input_frame, input_Fs, new_inp_12k8, INT_FS_12k8, st->mem_decim, ( st->max_bwidth == NB ) );
331 :
332 350913 : mvr2r( st->mem_decim, mem_decim_dummy, 2 * L_FILT_MAX );
333 350913 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
334 350913 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( st->input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_12k8 + L_FRAME, INT_FS_12k8, mem_decim_dummy, 0 );
335 : }
336 782031 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
337 : {
338 : /* reconstruct past segment of the Secondary channel input signal when switching from DFT stereo */
339 722352 : if ( hCPE->last_element_mode == IVAS_CPE_DFT && st->idchan == 1 )
340 : {
341 797 : int16_t length_inp = NS2SA( input_Fs, L_MEM_RECALC_SCH_NS );
342 797 : int16_t length_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_SCH_NS );
343 :
344 797 : modify_Fs( signal_in - lMemRecalc - length_inp, length_inp, input_Fs, new_inp_12k8 - lMemRecalc_12k8 - length_12k8, INT_FS_12k8, st->mem_decim, 0 );
345 : }
346 :
347 722352 : modify_Fs( signal_in - lMemRecalc, input_frame, input_Fs, new_inp_12k8 - lMemRecalc_12k8, INT_FS_12k8, st->mem_decim, ( st->max_bwidth == NB ) );
348 722352 : mvr2r( st->mem_decim, mem_decim_dummy, 2 * L_FILT_MAX );
349 :
350 722352 : if ( lMemRecalc > 0 )
351 : {
352 722352 : modify_Fs( signal_in + input_frame - lMemRecalc, lMemRecalc, input_Fs, new_inp_12k8 + L_FRAME - lMemRecalc_12k8, INT_FS_12k8, mem_decim_dummy, ( st->max_bwidth == NB ) );
353 : }
354 722352 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
355 722352 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_12k8 + L_FRAME, INT_FS_12k8, mem_decim_dummy, 0 );
356 : }
357 : else /* DFT stereo */
358 : {
359 : /* update the FIR resampling filter memory, needed for switching to time-domain (FIR) resampling */
360 59679 : mvr2r( signal_in + input_frame - NS2SA( input_Fs, L_MEM_RECALC_NS ) - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
361 : }
362 :
363 : /* save input resampled at 12.8kHz, non-preemhasised */
364 1132944 : if ( element_mode == IVAS_CPE_DFT )
365 : {
366 59679 : mvr2r( new_inp_12k8 - STEREO_DFT_OVL_12k8, st->buf_speech_enc + L_FRAME32k - STEREO_DFT_OVL_12k8, L_FRAME + STEREO_DFT_OVL_12k8 );
367 : }
368 1073265 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
369 : {
370 722352 : mvr2r( new_inp_12k8 - lMemRecalc_12k8, st->buf_speech_enc + L_FRAME32k - lMemRecalc_12k8 - L_FILT, L_FRAME + lMemRecalc_12k8 + L_FILT );
371 : }
372 : else
373 : {
374 350913 : mvr2r( new_inp_12k8, st->buf_speech_enc + L_FRAME32k, L_FRAME );
375 : }
376 :
377 : /*------------------------------------------------------------------*
378 : * Perform fixed preemphasis (12.8 kHz signal) through 1 - g*z^-1
379 : *-----------------------------------------------------------------*/
380 :
381 1132944 : if ( element_mode == IVAS_CPE_DFT )
382 : {
383 59679 : mvr2r( new_inp_12k8 - STEREO_DFT_OVL_12k8 + L_FRAME, st->inp_12k8_mem_stereo_sw, STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ); /* memory for TD/DFT stereo switching */
384 :
385 59679 : st->mem_preemph = st->mem_preemph_DFT;
386 59679 : st->mem_preemph_DFT = old_inp_12k8[L_INP_MEM - STEREO_DFT_OVL_12k8 + L_FRAME - 1];
387 :
388 59679 : preemph( new_inp_12k8 - STEREO_DFT_OVL_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
389 59679 : dummy = st->mem_preemph;
390 59679 : preemph( new_inp_12k8 - STEREO_DFT_OVL_12k8 + L_FRAME, PREEMPH_FAC, STEREO_DFT_OVL_12k8, &dummy );
391 : }
392 1073265 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
393 : {
394 722352 : if ( st->idchan == 0 )
395 : {
396 361176 : if ( hCPE->last_element_mode == IVAS_CPE_DFT )
397 : {
398 797 : st->mem_preemph = st->mem_preemph_DFT;
399 797 : mvr2r( st->inp_12k8_mem_stereo_sw, new_inp_12k8 - L_MEM_RECALC_12K8 - ( STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ), STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT );
400 797 : preemph( new_inp_12k8 - L_MEM_RECALC_12K8 - ( STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ), PREEMPH_FAC, STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT, &st->mem_preemph );
401 : }
402 :
403 361176 : st->mem_preemph_DFT = old_inp_12k8[L_INP_MEM - STEREO_DFT_OVL_12k8 + L_FRAME - 1]; /* == inp_12k8[L_FRAME-1] */
404 : }
405 :
406 : /* preemphasise past segment of the Secondary channel input signal when switching from DFT stereo */
407 722352 : if ( hCPE->last_element_mode == IVAS_CPE_DFT && st->idchan == 1 )
408 : {
409 797 : int16_t length_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_SCH_NS );
410 797 : preemph( new_inp_12k8 - lMemRecalc_12k8 - length_12k8, PREEMPH_FAC, length_12k8, &st->mem_preemph );
411 : }
412 :
413 722352 : preemph( new_inp_12k8 - lMemRecalc_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
414 722352 : dummy = st->mem_preemph;
415 722352 : preemph( new_inp_12k8 - lMemRecalc_12k8 + L_FRAME, PREEMPH_FAC, lMemRecalc_12k8 + L_FILT, &dummy );
416 : }
417 : else /* IVAS_SCE or IVAS_CPE_MDCT */
418 : {
419 350913 : preemph( new_inp_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
420 350913 : dummy = st->mem_preemph;
421 350913 : preemph( new_inp_12k8 + L_FRAME, PREEMPH_FAC, L_FILT, &dummy );
422 : }
423 :
424 : /*-------------------------------------------------------------------------*
425 : * Spectral analysis
426 : *--------------------------------------------------------------------------*/
427 :
428 1132944 : analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff );
429 :
430 1132944 : if ( hStereoClassif != NULL )
431 : {
432 782031 : if ( st->lp_speech - Etot > 25 )
433 : {
434 139333 : hStereoClassif->silence_flag = 2;
435 : }
436 : else
437 : {
438 642698 : hStereoClassif->silence_flag = hStereoClassif->silence_flag - 1;
439 : }
440 782031 : hStereoClassif->silence_flag = max( 0, hStereoClassif->silence_flag );
441 : }
442 :
443 : /*----------------------------------------------------------------*
444 : * SAD (1-signal, 0-noise)
445 : *----------------------------------------------------------------*/
446 :
447 1132944 : noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode );
448 :
449 1132944 : if ( element_mode == IVAS_CPE_TD && ( ( abs( hCPE->hStereoTD->tdm_last_ratio_idx - tdm_ratio_idx ) > 5 && st->idchan == 1 ) || abs( hCPE->hStereoTD->tdm_last_inst_ratio_idx - hCPE->hStereoTD->tdm_inst_ratio_idx ) > 10 ) )
450 : {
451 555 : st->ini_frame = 1;
452 : }
453 :
454 1132944 : st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f );
455 :
456 :
457 1132944 : if ( force_front_vad == 1 || front_vad_flag == 1 )
458 : {
459 : /* overwrite VAD decision with front-VAD decision if external VAD is set to 1*/
460 20632 : st->vad_flag = front_vad_flag;
461 20632 : st->localVAD = front_vad_flag;
462 : }
463 1132944 : if ( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL )
464 : {
465 1100976 : *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL, NULL );
466 : }
467 : else
468 : {
469 : /* This only applies to st->idchan==0 now */
470 : /* Add down mix stereo activity to LR vad_flag_dtx */
471 31968 : *vad_flag_dtx = *vad_flag_dtx || st->vad_flag;
472 :
473 :
474 : /* Determine hangover flag status based on LR localVAD and downmix localVAD */
475 31968 : *vad_hover_flag = *vad_flag_dtx && !( LR_localVAD || st->localVAD );
476 : }
477 :
478 1132944 : if ( force_front_vad == 1 || front_vad_dtx_flag == 1 )
479 : {
480 : /* overwrite VAD decision with front-VAD decision if external VAD is set to 1*/
481 11580 : *vad_flag_dtx = front_vad_dtx_flag;
482 : }
483 :
484 : /*----------------------------------------------------------------*
485 : * NB/WB/SWB/FB bandwidth detector
486 : *----------------------------------------------------------------*/
487 :
488 1132944 : if ( st->idchan == 0 && element_mode != IVAS_CPE_MDCT )
489 : {
490 414383 : bw_detect( st, st->input, NULL, enerBuffer, ivas_format, 0 );
491 : }
492 :
493 1132944 : if ( element_mode != IVAS_CPE_MDCT ) /* in MDCT stereo, set_bw_stereo() is used instead */
494 : {
495 418174 : set_bw( element_mode, element_brate, st, MODE1 );
496 : }
497 :
498 : /* set the BW of the TD secondary channel in LRTD mode same as BW of the primary channel (only at higher bitrates) */
499 1132944 : if ( st->idchan == 1 && element_mode == IVAS_CPE_TD && st->tdm_LRTD_flag == 1 && st->bits_frame_channel >= IVAS_16k4 / FRAMES_PER_SEC )
500 : {
501 3665 : st->bwidth = hCPE->hCoreCoder[0]->bwidth;
502 : }
503 :
504 : /*----------------------------------------------------------------*
505 : * Noise energy down-ward update and total noise energy estimation
506 : * Long-term energies and relative frame energy updates
507 : * Correlation correction as a function of total noise level
508 : *----------------------------------------------------------------*/
509 :
510 1132944 : noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 );
511 :
512 1132944 : if ( lr_vad_enabled && st->idchan == 0 )
513 : {
514 31968 : noise_est_down( fr_bands_LR[0], hCPE->hFrontVad[0]->hNoiseEst->bckr, tmpN_LR[0], tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise, Etot_LR[0], &hCPE->hFrontVad[0]->hNoiseEst->Etot_last, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2 );
515 31968 : noise_est_down( fr_bands_LR[1], hCPE->hFrontVad[1]->hNoiseEst->bckr, tmpN_LR[1], tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise, Etot_LR[1], &hCPE->hFrontVad[1]->hNoiseEst->Etot_last, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2 );
516 31968 : corr_shiftL = correlation_shift( hCPE->hFrontVad[0]->hNoiseEst->totalNoise );
517 31968 : corr_shiftR = correlation_shift( hCPE->hFrontVad[1]->hNoiseEst->totalNoise );
518 : }
519 :
520 1132944 : *relE = Etot - st->lp_speech;
521 :
522 1132944 : corr_shift = correlation_shift( st->hNoiseEst->totalNoise );
523 :
524 : /*----------------------------------------------------------------*
525 : * FD-CNG Noise Estimator
526 : *----------------------------------------------------------------*/
527 :
528 1132944 : if ( st->hFdCngEnc != NULL )
529 : {
530 121162 : resetFdCngEnc( st );
531 :
532 121162 : if ( st->idchan == 0 || element_mode == IVAS_CPE_MDCT )
533 : {
534 121132 : if ( element_mode == IVAS_CPE_TD && lr_vad_enabled && band_energies_LR != NULL )
535 : {
536 1665 : perform_noise_estimation_enc( band_energies_LR, enerBuffer, st->hFdCngEnc, input_Fs, hCPE );
537 : }
538 : else
539 : {
540 119467 : perform_noise_estimation_enc( band_energies, enerBuffer, st->hFdCngEnc, input_Fs, hCPE );
541 : }
542 : }
543 : }
544 :
545 : /*-----------------------------------------------------------------*
546 : * Select SID or FRAME_NO_DATA frame if DTX enabled
547 : *-----------------------------------------------------------------*/
548 :
549 1132944 : if ( hCPE != NULL && element_mode != IVAS_CPE_DFT && element_mode != IVAS_CPE_MDCT )
550 : {
551 7582 : *vad_flag_dtx = 1;
552 : }
553 :
554 1132944 : if ( st->Opt_DTX_ON == 1 && *vad_flag_dtx == 0 && element_mode == IVAS_CPE_DFT && element_brate <= ACELP_16k40 && hCPE->hStereoDft->hConfig->force_mono_transmission == 1 ) /* force LP_CNG usage for MASA DTX when mono tranmission */
555 : {
556 266 : st->cng_type = LP_CNG;
557 : }
558 :
559 1132944 : dtx( st, last_ivas_total_brate, ivas_total_brate, *vad_flag_dtx, inp_12k8 );
560 :
561 1132944 : if ( hCPE != NULL && hCPE->hStereoDft != NULL && st->core_brate == SID_2k40 )
562 : {
563 : /* Add another period of expected xcorr updates */
564 1519 : hCPE->hStereoDft->expectedNumUpdates += st->hDtxEnc->max_SID;
565 : }
566 :
567 : /*----------------------------------------------------------------*
568 : * Adjust FD-CNG Noise Estimator
569 : *----------------------------------------------------------------*/
570 :
571 1132944 : if ( st->hFdCngEnc != NULL && ( st->ini_frame == 0 || last_element_brate != element_brate || st->last_bwidth != st->bwidth ) )
572 : {
573 : int32_t total_brate;
574 :
575 4255 : total_brate = ( element_mode == IVAS_SCE ) ? st->total_brate : st->bits_frame_nominal * FRAMES_PER_SEC;
576 4255 : configureFdCngEnc( st->hFdCngEnc, max( st->input_bwidth, WB ), total_brate );
577 4255 : if ( hCPE != NULL )
578 : {
579 763 : st->hFdCngEnc->hFdCngCom->CngBitrate = hCPE->element_brate - 1;
580 : }
581 : }
582 :
583 1132944 : if ( st->hFdCngEnc != NULL && st->Opt_DTX_ON )
584 : {
585 121162 : AdjustFirstSID( st );
586 : }
587 :
588 : /*----------------------------------------------------------------*
589 : * LP analysis
590 : *----------------------------------------------------------------*/
591 :
592 1132944 : alw_pitch_lag_12k8[0] = st->old_pitch_la;
593 1132944 : alw_pitch_lag_12k8[1] = st->old_pitch_la;
594 1132944 : alw_voicing[0] = st->voicing[2];
595 1132944 : alw_voicing[1] = st->voicing[2];
596 :
597 1132944 : i = 0;
598 1132944 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 && hCPE->hStereoTD->tdm_low_rate_mode == 1 )
599 : {
600 0 : i = 1;
601 : }
602 :
603 1132944 : analy_lp( inp_12k8, L_FRAME, L_look, ener, A, epsP, lsp_new, lsp_mid, st->lsp_old1, alw_pitch_lag_12k8, alw_voicing, INT_FS_12k8, i );
604 :
605 1132944 : lsp2lsf( lsp_new, lsf_new, M, INT_FS_12k8 );
606 1132944 : stab_fac = lsf_stab( lsf_new, st->lsf_old1, 0, L_FRAME );
607 1132944 : mvr2r( lsf_new, st->lsf_old1, M );
608 :
609 1132944 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 )
610 : {
611 : /*----------------------------------------------------------------*
612 : * Comparison of the LP coefficents to determine if it is possible
613 : * to reuse the primary channel LP coefficients in the secondary channel
614 : *----------------------------------------------------------------*/
615 :
616 3791 : hCPE->hStereoTD->tdm_lp_reuse_flag = tdm_lp_comparison( hCPE->hStereoTD, hCPE->hStereoClassif, st, inp_12k8, tdm_A_PCh, A, M, tdm_lsp_new_PCh, lsp_new, L_FRAME, element_brate - nb_bits_metadata * FRAMES_PER_SEC );
617 : }
618 :
619 : /*----------------------------------------------------------------*
620 : * Compute weighted input (for OL pitch analysis)
621 : * OL pitch analysis
622 : * stable high pitch detection
623 : * 1/4 pitch precision improvement
624 : *----------------------------------------------------------------*/
625 :
626 1132944 : find_wsp( L_FRAME, L_SUBFR, NB_SUBFR, A, Aw, inp_12k8, TILT_FAC, wsp, &st->mem_wsp, GAMMA1, L_look );
627 :
628 1132944 : if ( st->vad_flag == 0 )
629 : {
630 : /* reset the OL pitch tracker memories during inactive frames */
631 149216 : pitch_ol_init( &st->old_thres, &st->old_pitch, &st->delta_pit, &st->old_corr );
632 : }
633 :
634 1132944 : old_pitch1 = st->pitch[1];
635 :
636 1132944 : pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, *relE, L_look, st->clas, st->input_bwidth, st->Opt_SC_VBR );
637 :
638 : /* Updates for adaptive lag window memory */
639 1132944 : st->old_pitch_la = st->pitch[2];
640 :
641 : /* Detection of very short stable pitch period */
642 1132944 : StableHighPitchDetect( &flag_spitch, st->pitch, st->voicing, st->Bin_E, wsp, st->localVAD, &st->voicing_sm, &st->voicing0_sm, &st->LF_EnergyRatio_sm, &st->predecision_flag, &st->diff_sm, &st->energy_sm );
643 :
644 : /* 1/4 pitch precision improvement */
645 1132944 : if ( element_brate <= IVAS_32k )
646 : {
647 294726 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[0], &pitch_fr[0], &voicing_fr[0], 0, wsp, 7 );
648 294726 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[0], &pitch_fr[1], &voicing_fr[1], L_SUBFR, wsp, 7 );
649 294726 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[1], &pitch_fr[2], &voicing_fr[2], 2 * L_SUBFR, wsp, 7 );
650 294726 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[1], &pitch_fr[3], &voicing_fr[3], 3 * L_SUBFR, wsp, 7 );
651 : }
652 : else
653 : {
654 838218 : pitch_fr[0] = st->pitch[0];
655 838218 : pitch_fr[1] = st->pitch[0];
656 838218 : pitch_fr[2] = st->pitch[1];
657 838218 : pitch_fr[3] = st->pitch[1];
658 :
659 838218 : voicing_fr[0] = st->voicing[0];
660 838218 : voicing_fr[1] = st->voicing[0];
661 838218 : voicing_fr[2] = st->voicing[1];
662 838218 : voicing_fr[3] = st->voicing[1];
663 : }
664 :
665 : /*------------------------------------------------------------------*
666 : * Update estimated noise energy and voicing cut-off frequency
667 : *-----------------------------------------------------------------*/
668 :
669 1132944 : noise_est( st, old_pitch1, tmpN, epsP, Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div,
670 1132944 : &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame );
671 :
672 1132944 : if ( lr_vad_enabled && st->idchan == 0 )
673 : {
674 : /* Run noise_est for Left and Right channel */
675 31968 : *loc_harmLR = *loc_harm;
676 31968 : noise_est( st, old_pitch1, tmpN_LR[0], epsP, Etot_LR[0], Etot_LR[0] - hCPE->hFrontVad[0]->lp_speech, corr_shiftL, tmpE_LR[0], fr_bands_LR[0], &cor_map_sum_LR[0], &ncharX_LR, &sp_div_LR,
677 31968 : &non_staX_LR, loc_harmLR, lf_E_LR[0], &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp, &dummy, S_map_LR, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame );
678 :
679 : /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */
680 31968 : noise_est( st, old_pitch1, tmpN_LR[1], epsP, Etot_LR[1], Etot_LR[1] - hCPE->hFrontVad[1]->lp_speech, corr_shiftR, tmpE_LR[1], fr_bands_LR[1], &cor_map_sum_LR[1], &ncharX_LR, &sp_div_LR,
681 31968 : &non_staX_LR, loc_harmLR, lf_E_LR[1], &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp, &dummy, S_map_LR, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame );
682 : }
683 :
684 : /*------------------------------------------------------------------*
685 : * Update parameters used in the VAD and DTX
686 : *-----------------------------------------------------------------*/
687 :
688 1132944 : vad_param_updt( st, corr_shift, corr_shift, A, old_pitch1, NULL, 1 );
689 :
690 1132944 : if ( lr_vad_enabled && st->idchan == 0 )
691 : {
692 31968 : vad_param_updt( st, corr_shiftL, corr_shiftR, A, old_pitch1, &hCPE->hFrontVad[0], CPE_CHANNELS );
693 : }
694 :
695 : /*-----------------------------------------------------------------*
696 : * Find spectral tilt
697 : * UC and VC frame selection
698 : *-----------------------------------------------------------------*/
699 :
700 1132944 : find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR );
701 :
702 1132944 : st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif );
703 :
704 : /*-----------------------------------------------------------------*
705 : * channel aware mode configuration *
706 : *-----------------------------------------------------------------*/
707 :
708 1132944 : st->rf_mode = 0;
709 1132944 : st->rf_target_bits_write = 0;
710 :
711 : /*-----------------------------------------------------------------*
712 : * Signal classification for FEC
713 : * TC frame selection
714 : *-----------------------------------------------------------------*/
715 :
716 1132944 : st->clas = signal_clas( st, inp_12k8, ee, *relE, L_look, tdm_SM_last_clas );
717 :
718 1132944 : select_TC( MODE1, st->tc_cnt, &st->coder_type, st->localVAD );
719 :
720 1132944 : if ( st->Opt_SC_VBR )
721 : {
722 0 : st->hSC_VBR->Local_VAD = st->localVAD;
723 : }
724 :
725 : /*-----------------------------------------------------------------*
726 : * Collect stereo classifier features
727 : *-----------------------------------------------------------------*/
728 :
729 1132944 : if ( hStereoClassif != NULL )
730 : {
731 782031 : stereo_classifier_features( hStereoClassif, st->idchan, element_mode, localVAD_HE_SAD, lsf_new, epsP, st->pitch, st->voicing, *cor_map_sum, non_staX, sp_div, st->clas );
732 : }
733 :
734 : /*----------------------------------------------------------------*
735 : * 1st stage speech/music classification (GMM model)
736 : *----------------------------------------------------------------*/
737 :
738 1132944 : smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch );
739 :
740 : #ifdef DEBUGGING
741 : if ( st->idchan == 0 )
742 : {
743 : if ( st->force == FORCE_SPEECH )
744 : {
745 : /* enforce speech */
746 : st->sp_aud_decision0 = 0;
747 : }
748 : else if ( st->force == FORCE_MUSIC )
749 : {
750 : /* enforce music */
751 : st->sp_aud_decision0 = 1;
752 : }
753 : }
754 : #endif
755 :
756 : /*----------------------------------------------------------------*
757 : * VAD energy updates
758 : * Update of old per-band energy spectrum
759 : *----------------------------------------------------------------*/
760 :
761 1132944 : long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL );
762 :
763 1132944 : mvr2r( fr_bands + NB_BANDS, st->hNoiseEst->enrO, NB_BANDS );
764 :
765 1132944 : if ( lr_vad_enabled && st->idchan == 0 )
766 : {
767 31968 : long_enr( st, -1, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR );
768 :
769 31968 : mvr2r( fr_bands_LR[0] + NB_BANDS, hCPE->hFrontVad[0]->hNoiseEst->enrO, NB_BANDS );
770 31968 : mvr2r( fr_bands_LR[1] + NB_BANDS, hCPE->hFrontVad[1]->hNoiseEst->enrO, NB_BANDS );
771 : }
772 :
773 : /*----------------------------------------------------------------*
774 : * SNR-based speech/music classification
775 : * AC frame selection
776 : *----------------------------------------------------------------*/
777 :
778 1132944 : st->GSC_IVAS_mode = 0;
779 1132944 : if ( st->idchan == 1 && element_mode == IVAS_CPE_TD )
780 : {
781 : /* No speech/music classification in the secondary channel of TD stereo */
782 3791 : st->sp_aud_decision1 = 0;
783 3791 : st->sp_aud_decision2 = 0;
784 :
785 3791 : st->GSC_noisy_speech = 0;
786 3791 : if ( st->hGSCEnc != NULL )
787 : {
788 3791 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP3;
789 : }
790 : }
791 1129153 : else if ( element_mode != IVAS_CPE_MDCT )
792 : {
793 : /* SNR-based speech/music classification */
794 414383 : if ( ( element_mode >= IVAS_CPE_DFT && element_brate >= IVAS_24k4 ) || ( element_mode == IVAS_SCE && element_brate >= SCE_SMC_THR ) )
795 : {
796 340560 : if ( ivas_format == SBA_FORMAT && st->core_brate != FRAME_NO_DATA && st->last_core_brate == FRAME_NO_DATA && st->sba_br_sw_while_no_data )
797 : {
798 0 : SetModeIndex( st, st->bits_frame_nominal * FRAMES_PER_SEC, element_mode, MCT_flag );
799 0 : st->sba_br_sw_while_no_data = 0;
800 : }
801 340560 : else if ( ivas_format == SBA_FORMAT && st->core_brate == FRAME_NO_DATA && element_brate != last_element_brate )
802 : {
803 0 : st->sba_br_sw_while_no_data = 1;
804 : }
805 :
806 340560 : if ( flag_16k_smc )
807 : {
808 : /* Compute core-coder buffers at internal sampling rate */
809 26562 : error = ivas_compute_core_buffers( st, NULL, old_inp_16k, NULL, input_frame, IVAS_SCE /*last_element_mode*/, INT_FS_16k /*sr_core_tmp*/, ener, A, Aw, epsP, lsp_new, lsp_mid );
810 26562 : if ( error != IVAS_ERR_OK )
811 : {
812 0 : return error;
813 : }
814 :
815 26562 : smc_dec = ivas_acelp_tcx20_switching( st, st->speech_enc, st->wspeech_enc, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc );
816 : }
817 : else
818 : {
819 313998 : smc_dec = ivas_acelp_tcx20_switching( st, inp_12k8, wsp, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc );
820 : }
821 : }
822 : /* Switch to ACELP for non-harmonic transient signals */
823 73823 : else if ( ( ( element_mode >= IVAS_CPE_DFT && element_brate <= IVAS_16k4 ) || ( element_mode == IVAS_SCE && element_brate < SCE_SMC_THR ) ) && ( loc_harm[0] != 1 ) && smc_dec == MUSIC )
824 : {
825 7909 : if ( element_mode == IVAS_SCE )
826 : {
827 4886 : if ( transient_analysis( st->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) )
828 : {
829 376 : smc_dec = SPEECH;
830 : }
831 : }
832 3023 : else if ( element_mode == IVAS_CPE_DFT )
833 : {
834 9069 : for ( i = 0; i < CPE_CHANNELS; i++ )
835 : {
836 6046 : if ( smc_dec != SPEECH && transient_analysis( hCPE->hCoreCoder[i]->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) )
837 : {
838 54 : smc_dec = SPEECH; /* overwrite initial music decision, initial SPEECH_MUSIC never changed */
839 : }
840 : }
841 : }
842 : }
843 :
844 : /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */
845 414383 : ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch );
846 : }
847 :
848 : /*----------------------------------------------------------------*
849 : * Final VAD correction (when HE-SAD is used instead of the normal VAD,
850 : * rewrite the VAD flag by VAD flag with DTX hangover for further processing)
851 : *----------------------------------------------------------------*/
852 :
853 1132944 : if ( st->Opt_DTX_ON && element_mode != IVAS_CPE_DFT )
854 : {
855 92494 : st->vad_flag = *vad_flag_dtx;
856 : }
857 :
858 : /*-----------------------------------------------------------------*
859 : * Update old input signal buffer
860 : *-----------------------------------------------------------------*/
861 :
862 1132944 : mvr2r( &old_inp_12k8[L_FRAME], st->old_inp_12k8, L_INP_MEM );
863 :
864 1132944 : pop_wmops();
865 1132944 : return error;
866 : }
867 :
868 :
869 : /*-------------------------------------------------------------------*
870 : * calculate_energy_buffer()
871 : *
872 : * calculate DFT-based energies
873 : *--------------------------------------------------------------------*/
874 :
875 59679 : static void calculate_energy_buffer(
876 : CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */
877 : float enerBuffer_dft[], /* o : energy buffer */
878 : const int16_t no_channels, /* i : no. of used CLDFB channels */
879 : const int32_t input_Fs /* i : input sampling rate */
880 : )
881 : {
882 : int16_t i, j;
883 : float *pDFT_DMX, *p_nrg_DMX;
884 : float nrg_DMX[CLDFB_NO_CHANNELS_MAX];
885 : float band_res_dft, chan_width_f, chan_width_bins;
886 : int16_t start, stop;
887 :
888 59679 : band_res_dft = ( (float) input_Fs ) / hCPE->hStereoDft->NFFT;
889 59679 : chan_width_f = 24000.f / CLDFB_NO_CHANNELS_MAX;
890 59679 : chan_width_bins = chan_width_f / band_res_dft;
891 :
892 59679 : set_f( nrg_DMX, 0, CLDFB_NO_CHANNELS_MAX );
893 :
894 59679 : pDFT_DMX = hCPE->hStereoDft->DFT[0];
895 59679 : start = 1;
896 59679 : p_nrg_DMX = nrg_DMX;
897 :
898 59679 : *p_nrg_DMX += (float) ( pDFT_DMX[0] * pDFT_DMX[0] );
899 2633379 : for ( i = 0; i < no_channels; i++ )
900 : {
901 2573700 : stop = (int16_t) ( ( i + 1 ) * chan_width_bins + 0.5f );
902 43693221 : for ( j = start; j < stop; j++ )
903 : {
904 41119521 : *p_nrg_DMX += (float) ( pDFT_DMX[2 * j] * pDFT_DMX[2 * j] + pDFT_DMX[2 * j + 1] * pDFT_DMX[2 * j + 1] );
905 : }
906 2573700 : start = stop;
907 2573700 : p_nrg_DMX++;
908 : }
909 :
910 2633379 : for ( i = 0; i < no_channels; i++ ) /* Consider only used channels, dependent on Fs */
911 : {
912 2573700 : enerBuffer_dft[i] = nrg_DMX[i] / 3;
913 : }
914 :
915 : /* Set remaining entries of enerBuffer to zero */
916 1066719 : for ( ; i < CLDFB_NO_CHANNELS_MAX; i++ )
917 : {
918 1007040 : enerBuffer_dft[i] = 0.f;
919 : }
920 :
921 59679 : return;
922 : }
|