Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2026 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #ifdef DEBUGGING
36 : #include "debug.h"
37 : #include <string.h>
38 : #endif
39 : #include "cnst.h"
40 : #include "ivas_cnst.h"
41 : #include "rom_enc.h"
42 : #include "rom_com.h"
43 : #include "prot.h"
44 : #include "ivas_prot.h"
45 : #include "wmc_auto.h"
46 : #include <math.h>
47 :
48 :
49 : /*---------------------------------------------------------------*
50 : * Local constants
51 : *---------------------------------------------------------------*/
52 :
53 : #define SCE_SMC_THR 16000
54 :
55 :
56 : /*-------------------------------------------------------------------*
57 : * Local function prototypes
58 : *--------------------------------------------------------------------*/
59 :
60 : static void calculate_energy_buffer( CPE_ENC_HANDLE hCPE, float enerBuffer_dft[], const int16_t no_channels, const int32_t input_Fs );
61 :
62 :
63 : /*-------------------------------------------------------------------*
64 : * pre_proc_front_ivas()
65 : *
66 : * Front Pre-processing for IVAS
67 : * (resampling, spectral analysis, LP analysis, VAD, OL pitch calculation, classification)
68 : *--------------------------------------------------------------------*/
69 :
70 17850223 : void pre_proc_front_ivas(
71 : SCE_ENC_HANDLE hSCE, /* i/o: SCE encoder structure */
72 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
73 : const int32_t element_brate, /* i : SCE/CPE element bitrate */
74 : const int16_t nb_bits_metadata, /* i : number of metadata bits */
75 : const int16_t input_frame, /* i : frame length */
76 : const int16_t n, /* i : channel number */
77 : float old_inp_12k8[], /* o : buffer of old input signal */
78 : float old_inp_16k[], /* o : buffer of old input signal @16kHz */
79 : float *ener, /* o : residual energy from Levinson-Durbin */
80 : float *relE, /* o : frame relative energy */
81 : float A[NB_SUBFR16k * ( M + 1 )], /* o : A(z) unquantized for the 4 subframes */
82 : float Aw[NB_SUBFR16k * ( M + 1 )], /* o : weighted A(z) unquantized for subframes */
83 : float epsP[M + 1], /* o : LP prediction errors */
84 : float lsp_new[M], /* o : LSPs at the end of the frame */
85 : float lsp_mid[M], /* o : LSPs in the middle of the frame */
86 : int16_t *vad_hover_flag, /* o : VAD hangover flag */
87 : int16_t *attack_flag, /* o : flag signaling attack */
88 : float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: real buffer */
89 : float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i/o: imag buffer */
90 : float old_wsp[], /* o : weighted input signal buffer */
91 : float pitch_fr[NB_SUBFR], /* o : fractional pitch values */
92 : float voicing_fr[NB_SUBFR], /* o : fractional pitch gains */
93 : int16_t *loc_harm, /* o : harmonicity flag */
94 : float *cor_map_sum, /* o : speech/music clasif. parameter */
95 : int16_t *vad_flag_dtx, /* o : HE-SAD flag with additional DTX HO */
96 : float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* o : energy buffer */
97 : float fft_buff[2 * L_FFT], /* o : FFT buffer */
98 : const float tdm_A_PCh[M + 1], /* i : unq. LP coeff. of primary channel */
99 : const float tdm_lsp_new_PCh[M], /* i : unq. LSPs of primary channel */
100 : const float currFlatness, /* i : flatness parameter */
101 : const int16_t tdm_ratio_idx, /* i : Current Ratio_L index */
102 : float fr_bands_LR[][2 * NB_BANDS], /* i : energy in frequency bands */
103 : const float Etot_LR[], /* i : total energy Left & Right channel */
104 : float lf_E_LR[][2 * VOIC_BINS], /* i : per bin spectrum energy in lf, LR channels */
105 : const int16_t localVAD_HE_SAD_LR[], /* i : HE-SAD flag without hangover, LR channels */
106 : float band_energies_LR[2 * NB_BANDS], /* o : energy in critical bands without minimum noise floor E_MIN */
107 : const int16_t flag_16k_smc, /* i : flag to indicate if the OL SMC is run at 16 kHz */
108 : const int16_t front_vad_flag, /* i : front-VAD flag to overwrite VAD decision */
109 : const int16_t force_front_vad, /* i : flag to force VAD decision */
110 : const int16_t front_vad_dtx_flag, /* i : front-VAD DTX flag to overwrite VAD decision*/
111 : const IVAS_FORMAT ivas_format, /* i : IVAS format */
112 : const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
113 : const int32_t last_ivas_total_brate, /* i : last IVAS total bitrate */
114 : const int32_t ivas_total_brate /* i : IVAS total bitrate - for setting the DTX */
115 : )
116 : {
117 : float *inp_12k8, *new_inp_12k8; /* pointers to current frame and new data */
118 : float *wsp; /* weighted input signal buffer */
119 : float Etot; /* total energy */
120 : float fr_bands[2 * NB_BANDS]; /* energy in frequency bands */
121 : float lf_E[2 * VOIC_BINS]; /* per bin spectrum energy in lf */
122 : float tmpN[NB_BANDS]; /* Temporary noise update */
123 : float tmpE[NB_BANDS]; /* Temporary averaged energy of 2 sf. */
124 : float tmpN_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary noise update */
125 : float tmpE_LR[CPE_CHANNELS][NB_BANDS]; /* Temporary averaged energy of 2 sf. */
126 : float cor_map_sum_LR[CPE_CHANNELS]; /* speech/music clasif. parameter */
127 : float non_staX_LR; /* non-stationarity for sp/mus classifier */
128 : float ncharX_LR; /* noise character for sp/mus classifier */
129 : float sp_div_LR; /* spectral diversity feature */
130 : float S_map_LR[L_FFT / 2]; /* short-term correlation map */
131 : float corr_shiftL; /* correlation shift */
132 : float corr_shiftR; /* correlation shift */
133 : int16_t loc_harmLR[CPE_CHANNELS]; /* harmonicity flag */
134 : int16_t lr_vad_enabled; /* LR VAD indicator */
135 : float ee[2]; /* Spectral tilt */
136 : float corr_shift; /* correlation shift */
137 : float sp_div, PS[128]; /* speech/music clasif. parameters */
138 : int16_t L_look; /* length of look-ahead */
139 : float snr_sum_he; /* HE SAD parameters */
140 : float hp_E[2]; /* Energy in HF */
141 : int16_t flag_spitch;
142 : int16_t high_lpn_flag;
143 : float lsf_new[M];
144 : float band_energies[2 * NB_BANDS]; /* energy in critical bands without minimum noise floor E_MIN */
145 : int16_t localVAD_HE_SAD;
146 : float non_staX;
147 : float stab_fac;
148 : int16_t alw_pitch_lag_12k8[2];
149 : float alw_voicing[2];
150 : int16_t last_core_orig;
151 : float dummy;
152 : float mem_decim_dummy[2 * L_FILT_MAX]; /* dummy decimation filter memory */
153 : float S_map[L_FFT / 2];
154 : int16_t i, lMemRecalc, lMemRecalc_12k8;
155 : int16_t smc_dec;
156 : float ncharX, dE1X;
157 : Encoder_State *st;
158 : float *signal_in;
159 : int16_t element_mode;
160 : int32_t input_Fs, last_element_brate;
161 : int16_t *tdm_SM_last_clas, tmpS;
162 : float *res_cod_SNR_M, tmpF[STEREO_DFT_BAND_MAX];
163 : STEREO_CLASSIF_HANDLE hStereoClassif;
164 : float temp1F_icatdmResampBuf[L_FILT_MAX]; /* temp buffers for ICA TDM resamplers */
165 : int16_t old_pitch1; /* previous frame OL pitch[1] @12.8 kHz */
166 : int16_t LR_localVAD;
167 :
168 17850223 : push_wmops( "pre_proc_front" );
169 :
170 : /*------------------------------------------------------------------*
171 : * Initialization
172 : *------------------------------------------------------------------*/
173 :
174 17850223 : tmpS = 0;
175 17850223 : tdm_SM_last_clas = &tmpS;
176 17850223 : set_f( tmpF, 0, STEREO_DFT_BAND_MAX );
177 17850223 : res_cod_SNR_M = tmpF;
178 :
179 17850223 : LR_localVAD = 0;
180 :
181 17850223 : if ( hSCE != NULL )
182 : {
183 4106334 : st = hSCE->hCoreCoder[n];
184 4106334 : signal_in = hSCE->hCoreCoder[n]->input;
185 4106334 : element_mode = IVAS_SCE;
186 4106334 : last_element_brate = hSCE->last_element_brate;
187 4106334 : hStereoClassif = NULL;
188 4106334 : lr_vad_enabled = 0;
189 : }
190 : else /* CPE */
191 : {
192 13743889 : st = hCPE->hCoreCoder[n];
193 13743889 : signal_in = hCPE->hCoreCoder[n]->input;
194 13743889 : element_mode = hCPE->element_mode;
195 13743889 : last_element_brate = hCPE->last_element_brate;
196 13743889 : hStereoClassif = hCPE->hStereoClassif;
197 13743889 : lr_vad_enabled = 0;
198 13743889 : if ( hCPE->hFrontVad[0] != NULL && hCPE->element_mode != IVAS_CPE_MDCT )
199 : {
200 359726 : lr_vad_enabled = 1;
201 : }
202 :
203 13743889 : if ( lr_vad_enabled && n == 0 )
204 : {
205 : /* Combine localVAD and vad_flag from LR processing */
206 350716 : LR_localVAD = hCPE->hCoreCoder[0]->localVAD || hCPE->hCoreCoder[1]->localVAD;
207 : }
208 :
209 13743889 : if ( hCPE->hStereoTD != NULL )
210 : {
211 62354 : tdm_SM_last_clas = &hCPE->hStereoTD->tdm_SM_last_clas[n];
212 62354 : mvs2s( hCPE->hStereoTD->tdm_SM_last_clas, hCPE->hStereoTD->tdm_SM_last2_clas, CPE_CHANNELS );
213 : }
214 :
215 13743889 : if ( hCPE->hStereoDft != NULL )
216 : {
217 912789 : res_cod_SNR_M = hCPE->hStereoDft->res_cod_SNR_M;
218 : }
219 : }
220 :
221 : #ifdef DEBUG_MODE_INFO
222 : if ( !( hCPE != NULL && hCPE->hStereoTD != NULL && n > 0 ) )
223 : {
224 : /* for TD stereo only write out first channel. The existence of a second channel can vary, this is just easier to handle */
225 : int16_t tmp_dmx_in[L_FRAME48k];
226 : mvr2s( signal_in - NS2SA( st->input_Fs, ACELP_LOOK_NS ), tmp_dmx_in, input_frame );
227 : dbgwrite( tmp_dmx_in, sizeof( int16_t ), input_frame, 1, strcat( fname( debug_dir, "ivas_input_dmx", 0, n + 1, ENC ), ".pcm" ) );
228 : }
229 : #endif
230 :
231 17850223 : lMemRecalc_12k8 = 0;
232 17850223 : lMemRecalc = 0;
233 17850223 : if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
234 : {
235 12831100 : lMemRecalc = NS2SA( st->input_Fs, L_MEM_RECALC_NS );
236 12831100 : lMemRecalc_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_NS );
237 : }
238 :
239 17850223 : input_Fs = st->input_Fs;
240 :
241 17850223 : localVAD_HE_SAD = 0;
242 17850223 : snr_sum_he = 0;
243 :
244 17850223 : corr_shiftL = 0;
245 17850223 : corr_shiftR = 0;
246 :
247 17850223 : if ( hSCE != NULL )
248 : {
249 4106334 : *vad_hover_flag = 0;
250 : }
251 17850223 : st->sp_aud_decision1 = 0;
252 17850223 : st->sp_aud_decision2 = 0;
253 17850223 : st->coder_type = GENERIC;
254 17850223 : if ( st->hGSCEnc != NULL )
255 : {
256 5081477 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
257 : }
258 17850223 : *attack_flag = 0;
259 :
260 17850223 : if ( st->Opt_SC_VBR )
261 : {
262 0 : st->hSC_VBR->bump_up = 0;
263 0 : st->hSC_VBR->ppp_mode = 0;
264 0 : st->hSC_VBR->nelp_mode = 0;
265 0 : st->hSC_VBR->avoid_HQ_VBR_NB = 0;
266 : }
267 :
268 17850223 : L_look = L_LOOK_12k8; /* lookahead at 12.8kHz */
269 :
270 17850223 : new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */
271 17850223 : inp_12k8 = new_inp_12k8 - L_look; /* pointer to the current frame of input signal in 12.8kHz core */
272 :
273 17850223 : if ( element_mode != IVAS_CPE_DFT )
274 : {
275 16937434 : new_inp_12k8 -= L_FILT;
276 : }
277 :
278 17850223 : if ( element_mode == IVAS_CPE_DFT )
279 : {
280 912789 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - STEREO_DFT_OVL_12k8 );
281 : }
282 16937434 : else if ( element_mode == IVAS_CPE_TD )
283 : {
284 62354 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - lMemRecalc_12k8 - L_FILT );
285 : }
286 : else
287 : {
288 16875080 : mvr2r( st->old_inp_12k8, old_inp_12k8, L_INP_MEM - L_FILT );
289 : }
290 :
291 17850223 : mvr2r( st->old_wsp, old_wsp, L_WSP_MEM );
292 17850223 : wsp = old_wsp + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */
293 :
294 17850223 : st->rf_mode = st->Opt_RF_ON;
295 :
296 17850223 : last_core_orig = st->last_core;
297 :
298 : /*--------------------------------------------------------------*
299 : * energy analysis
300 : *---------------------------------------------------------------*/
301 :
302 17850223 : if ( element_mode == IVAS_SCE || ( element_mode == IVAS_CPE_MDCT && st->Opt_DTX_ON ) )
303 : {
304 4851490 : analysisCldfbEncoder( st, signal_in, input_frame, realBuffer, imagBuffer, enerBuffer );
305 : }
306 12998733 : else if ( ( element_mode == IVAS_CPE_TD && st->idchan == 0 ) || ( st->idchan == 1 && st->tdm_LRTD_flag ) )
307 : {
308 : /* cldfb analysis only for pri. channel */
309 60053 : analysisCldfbEncoder( st, signal_in - NS2SA( input_Fs, L_MEM_RECALC_TBE_NS ), input_frame, realBuffer, imagBuffer, enerBuffer );
310 : }
311 12938680 : else if ( element_mode == IVAS_CPE_DFT )
312 : {
313 912789 : calculate_energy_buffer( hCPE, enerBuffer, st->cldfbAnaEnc->no_channels, input_Fs );
314 : }
315 : else
316 : {
317 12025891 : set_f( enerBuffer, 0, CLDFB_NO_CHANNELS_MAX );
318 : }
319 :
320 : /*----------------------------------------------------------------*
321 : * Change the sampling frequency to 12.8 kHz
322 : * (if not available from downsampled DMX)
323 : *----------------------------------------------------------------*/
324 :
325 17850223 : if ( element_mode == IVAS_SCE )
326 : {
327 4106334 : modify_Fs( signal_in, input_frame, input_Fs, new_inp_12k8, INT_FS_12k8, st->mem_decim, ( st->max_bwidth == NB ) );
328 :
329 4106334 : mvr2r( st->mem_decim, mem_decim_dummy, 2 * L_FILT_MAX );
330 4106334 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
331 4106334 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( st->input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_12k8 + L_FRAME, INT_FS_12k8, mem_decim_dummy, 0 );
332 : }
333 13743889 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
334 : {
335 : /* reconstruct past segment of the Secondary channel input signal when switching from DFT stereo */
336 12831100 : if ( hCPE->last_element_mode == IVAS_CPE_DFT && st->idchan == 1 )
337 : {
338 21992 : int16_t length_inp = NS2SA( input_Fs, L_MEM_RECALC_SCH_NS );
339 21992 : int16_t length_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_SCH_NS );
340 :
341 21992 : modify_Fs( signal_in - lMemRecalc - length_inp, length_inp, input_Fs, new_inp_12k8 - lMemRecalc_12k8 - length_12k8, INT_FS_12k8, st->mem_decim, 0 );
342 : }
343 :
344 12831100 : modify_Fs( signal_in - lMemRecalc, input_frame, input_Fs, new_inp_12k8 - lMemRecalc_12k8, INT_FS_12k8, st->mem_decim, ( st->max_bwidth == NB ) );
345 12831100 : mvr2r( st->mem_decim, mem_decim_dummy, 2 * L_FILT_MAX );
346 :
347 12831100 : if ( lMemRecalc > 0 )
348 : {
349 12831100 : modify_Fs( signal_in + input_frame - lMemRecalc, lMemRecalc, input_Fs, new_inp_12k8 + L_FRAME - lMemRecalc_12k8, INT_FS_12k8, mem_decim_dummy, ( st->max_bwidth == NB ) );
350 : }
351 12831100 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
352 12831100 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_12k8 + L_FRAME, INT_FS_12k8, mem_decim_dummy, 0 );
353 : }
354 : else /* DFT stereo */
355 : {
356 : /* update the FIR resampling filter memory, needed for switching to time-domain (FIR) resampling */
357 912789 : mvr2r( signal_in + input_frame - NS2SA( input_Fs, L_MEM_RECALC_NS ) - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
358 : }
359 :
360 : /* save input resampled at 12.8kHz, non-preemhasised */
361 17850223 : if ( element_mode == IVAS_CPE_DFT )
362 : {
363 912789 : mvr2r( new_inp_12k8 - STEREO_DFT_OVL_12k8, st->buf_speech_enc + L_FRAME32k - STEREO_DFT_OVL_12k8, L_FRAME + STEREO_DFT_OVL_12k8 );
364 : }
365 16937434 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
366 : {
367 12831100 : mvr2r( new_inp_12k8 - lMemRecalc_12k8, st->buf_speech_enc + L_FRAME32k - lMemRecalc_12k8 - L_FILT, L_FRAME + lMemRecalc_12k8 + L_FILT );
368 : }
369 : else
370 : {
371 4106334 : mvr2r( new_inp_12k8, st->buf_speech_enc + L_FRAME32k, L_FRAME );
372 : }
373 :
374 : /*------------------------------------------------------------------*
375 : * Perform fixed preemphasis (12.8 kHz signal) through 1 - g*z^-1
376 : *-----------------------------------------------------------------*/
377 :
378 17850223 : if ( element_mode == IVAS_CPE_DFT )
379 : {
380 912789 : mvr2r( new_inp_12k8 - STEREO_DFT_OVL_12k8 + L_FRAME, st->inp_12k8_mem_stereo_sw, STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ); /* memory for TD/DFT stereo switching */
381 :
382 912789 : st->mem_preemph = st->mem_preemph_DFT;
383 912789 : st->mem_preemph_DFT = old_inp_12k8[L_INP_MEM - STEREO_DFT_OVL_12k8 + L_FRAME - 1];
384 :
385 912789 : preemph( new_inp_12k8 - STEREO_DFT_OVL_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
386 912789 : dummy = st->mem_preemph;
387 912789 : preemph( new_inp_12k8 - STEREO_DFT_OVL_12k8 + L_FRAME, PREEMPH_FAC, STEREO_DFT_OVL_12k8, &dummy );
388 : }
389 16937434 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
390 : {
391 12831100 : if ( st->idchan == 0 )
392 : {
393 6415550 : if ( hCPE->last_element_mode == IVAS_CPE_DFT )
394 : {
395 21992 : st->mem_preemph = st->mem_preemph_DFT;
396 21992 : mvr2r( st->inp_12k8_mem_stereo_sw, new_inp_12k8 - L_MEM_RECALC_12K8 - ( STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ), STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT );
397 21992 : preemph( new_inp_12k8 - L_MEM_RECALC_12K8 - ( STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT ), PREEMPH_FAC, STEREO_DFT_OVL_12k8 - L_MEM_RECALC_12K8 - L_FILT, &st->mem_preemph );
398 : }
399 :
400 6415550 : st->mem_preemph_DFT = old_inp_12k8[L_INP_MEM - STEREO_DFT_OVL_12k8 + L_FRAME - 1]; /* == inp_12k8[L_FRAME-1] */
401 : }
402 :
403 : /* preemphasise past segment of the Secondary channel input signal when switching from DFT stereo */
404 12831100 : if ( hCPE->last_element_mode == IVAS_CPE_DFT && st->idchan == 1 )
405 : {
406 21992 : int16_t length_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_SCH_NS );
407 21992 : preemph( new_inp_12k8 - lMemRecalc_12k8 - length_12k8, PREEMPH_FAC, length_12k8, &st->mem_preemph );
408 : }
409 :
410 12831100 : preemph( new_inp_12k8 - lMemRecalc_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
411 12831100 : dummy = st->mem_preemph;
412 12831100 : preemph( new_inp_12k8 - lMemRecalc_12k8 + L_FRAME, PREEMPH_FAC, lMemRecalc_12k8 + L_FILT, &dummy );
413 : }
414 : else /* IVAS_SCE or IVAS_CPE_MDCT */
415 : {
416 4106334 : preemph( new_inp_12k8, PREEMPH_FAC, L_FRAME, &st->mem_preemph );
417 4106334 : dummy = st->mem_preemph;
418 4106334 : preemph( new_inp_12k8 + L_FRAME, PREEMPH_FAC, L_FILT, &dummy );
419 : }
420 :
421 : /*-------------------------------------------------------------------------*
422 : * Spectral analysis
423 : *--------------------------------------------------------------------------*/
424 :
425 17850223 : analy_sp( element_mode, hCPE, input_Fs, inp_12k8, st->Bin_E, st->Bin_E_old, fr_bands, lf_E, &Etot, st->min_band, st->max_band, band_energies, PS, fft_buff );
426 :
427 17850223 : if ( hStereoClassif != NULL )
428 : {
429 13743889 : if ( st->lp_speech - Etot > 25 )
430 : {
431 2260928 : hStereoClassif->silence_flag = 2;
432 : }
433 : else
434 : {
435 11482961 : hStereoClassif->silence_flag = hStereoClassif->silence_flag - 1;
436 : }
437 13743889 : hStereoClassif->silence_flag = max( 0, hStereoClassif->silence_flag );
438 : }
439 :
440 : /*----------------------------------------------------------------*
441 : * SAD (1-signal, 0-noise)
442 : *----------------------------------------------------------------*/
443 :
444 17850223 : noise_est_pre( Etot, st->ini_frame, st->hNoiseEst, st->idchan, element_mode, hCPE != NULL ? hCPE->last_element_mode : element_mode );
445 :
446 17850223 : if ( element_mode == IVAS_CPE_TD && ( ( abs( hCPE->hStereoTD->tdm_last_ratio_idx - tdm_ratio_idx ) > 5 && st->idchan == 1 ) || abs( hCPE->hStereoTD->tdm_last_inst_ratio_idx - hCPE->hStereoTD->tdm_inst_ratio_idx ) > 10 ) )
447 : {
448 2249 : st->ini_frame = 1;
449 : }
450 :
451 17850223 : st->vad_flag = wb_vad( st, fr_bands, &i, &i, &i, &snr_sum_he, &localVAD_HE_SAD, &( st->flag_noisy_speech_snr ), NULL, NULL, -1000.0f, -1000.0f );
452 :
453 :
454 17850223 : if ( force_front_vad == 1 || front_vad_flag == 1 )
455 : {
456 : /* overwrite VAD decision with front-VAD decision if external VAD is set to 1*/
457 796565 : st->vad_flag = front_vad_flag;
458 796565 : st->localVAD = front_vad_flag;
459 : }
460 17850223 : if ( ( hCPE != NULL && !( lr_vad_enabled && st->idchan == 0 ) ) || hSCE != NULL )
461 : {
462 17499507 : *vad_flag_dtx = dtx_hangover_addition( st, st->vad_flag, st->lp_speech - st->lp_noise, 0, vad_hover_flag, NULL, NULL, NULL );
463 : }
464 : else
465 : {
466 : /* This only applies to st->idchan==0 now */
467 : /* Add down mix stereo activity to LR vad_flag_dtx */
468 350716 : *vad_flag_dtx = *vad_flag_dtx || st->vad_flag;
469 :
470 :
471 : /* Determine hangover flag status based on LR localVAD and downmix localVAD */
472 350716 : *vad_hover_flag = *vad_flag_dtx && !( LR_localVAD || st->localVAD );
473 : }
474 :
475 17850223 : if ( force_front_vad == 1 || front_vad_dtx_flag == 1 )
476 : {
477 : /* overwrite VAD decision with front-VAD decision if external VAD is set to 1*/
478 369655 : *vad_flag_dtx = front_vad_dtx_flag;
479 : }
480 :
481 : /*----------------------------------------------------------------*
482 : * NB/WB/SWB/FB bandwidth detector
483 : *----------------------------------------------------------------*/
484 :
485 17850223 : if ( st->idchan == 0 && element_mode != IVAS_CPE_MDCT )
486 : {
487 5050300 : bw_detect( st, st->input, NULL, enerBuffer, ivas_format, 0 );
488 : }
489 :
490 17850223 : if ( element_mode != IVAS_CPE_MDCT ) /* in MDCT stereo, set_bw_stereo() is used instead */
491 : {
492 5081477 : set_bw( element_mode, element_brate, st, MODE1 );
493 : }
494 :
495 : /* set the BW of the TD secondary channel in LRTD mode same as BW of the primary channel (only at higher bitrates) */
496 17850223 : if ( st->idchan == 1 && element_mode == IVAS_CPE_TD && st->tdm_LRTD_flag == 1 && st->bits_frame_channel >= IVAS_16k4 / FRAMES_PER_SEC )
497 : {
498 28873 : st->bwidth = hCPE->hCoreCoder[0]->bwidth;
499 : }
500 :
501 : /*----------------------------------------------------------------*
502 : * Noise energy down-ward update and total noise energy estimation
503 : * Long-term energies and relative frame energy updates
504 : * Correlation correction as a function of total noise level
505 : *----------------------------------------------------------------*/
506 :
507 17850223 : noise_est_down( fr_bands, st->hNoiseEst->bckr, tmpN, tmpE, st->min_band, st->max_band, &st->hNoiseEst->totalNoise, Etot, &st->hNoiseEst->Etot_last, &st->hNoiseEst->Etot_v_h2 );
508 :
509 17850223 : if ( lr_vad_enabled && st->idchan == 0 )
510 : {
511 350716 : noise_est_down( fr_bands_LR[0], hCPE->hFrontVad[0]->hNoiseEst->bckr, tmpN_LR[0], tmpE_LR[0], st->min_band, st->max_band, &hCPE->hFrontVad[0]->hNoiseEst->totalNoise, Etot_LR[0], &hCPE->hFrontVad[0]->hNoiseEst->Etot_last, &hCPE->hFrontVad[0]->hNoiseEst->Etot_v_h2 );
512 350716 : noise_est_down( fr_bands_LR[1], hCPE->hFrontVad[1]->hNoiseEst->bckr, tmpN_LR[1], tmpE_LR[1], st->min_band, st->max_band, &hCPE->hFrontVad[1]->hNoiseEst->totalNoise, Etot_LR[1], &hCPE->hFrontVad[1]->hNoiseEst->Etot_last, &hCPE->hFrontVad[1]->hNoiseEst->Etot_v_h2 );
513 350716 : corr_shiftL = correlation_shift( hCPE->hFrontVad[0]->hNoiseEst->totalNoise );
514 350716 : corr_shiftR = correlation_shift( hCPE->hFrontVad[1]->hNoiseEst->totalNoise );
515 : }
516 :
517 17850223 : *relE = Etot - st->lp_speech;
518 :
519 17850223 : corr_shift = correlation_shift( st->hNoiseEst->totalNoise );
520 :
521 : /*----------------------------------------------------------------*
522 : * FD-CNG Noise Estimator
523 : *----------------------------------------------------------------*/
524 :
525 17850223 : if ( st->hFdCngEnc != NULL )
526 : {
527 1931259 : resetFdCngEnc( st );
528 :
529 1931259 : if ( st->idchan == 0 || element_mode == IVAS_CPE_MDCT )
530 : {
531 1931079 : if ( element_mode == IVAS_CPE_TD && lr_vad_enabled && band_energies_LR != NULL )
532 : {
533 9010 : perform_noise_estimation_enc( band_energies_LR, enerBuffer, st->hFdCngEnc, input_Fs, hCPE );
534 : }
535 : else
536 : {
537 1922069 : perform_noise_estimation_enc( band_energies, enerBuffer, st->hFdCngEnc, input_Fs, hCPE );
538 : }
539 : }
540 : }
541 :
542 : /*-----------------------------------------------------------------*
543 : * Select SID or FRAME_NO_DATA frame if DTX enabled
544 : *-----------------------------------------------------------------*/
545 :
546 17850223 : if ( hCPE != NULL && element_mode != IVAS_CPE_DFT && element_mode != IVAS_CPE_MDCT )
547 : {
548 62354 : *vad_flag_dtx = 1;
549 : }
550 :
551 17850223 : if ( st->Opt_DTX_ON == 1 && *vad_flag_dtx == 0 && element_mode == IVAS_CPE_DFT && element_brate <= ACELP_16k40 && hCPE->hStereoDft->hConfig->force_mono_transmission == 1 ) /* force LP_CNG usage for MASA DTX when mono tranmission */
552 : {
553 1256 : st->cng_type = LP_CNG;
554 : }
555 :
556 17850223 : dtx( st, last_ivas_total_brate, ivas_total_brate, *vad_flag_dtx, inp_12k8 );
557 :
558 17850223 : if ( hCPE != NULL && hCPE->hStereoDft != NULL && st->core_brate == SID_2k40 )
559 : {
560 : /* Add another period of expected xcorr updates */
561 6410 : hCPE->hStereoDft->expectedNumUpdates += st->hDtxEnc->max_SID;
562 : }
563 :
564 : /*----------------------------------------------------------------*
565 : * Adjust FD-CNG Noise Estimator
566 : *----------------------------------------------------------------*/
567 :
568 17850223 : if ( st->hFdCngEnc != NULL && ( st->ini_frame == 0 || last_element_brate != element_brate || st->last_bwidth != st->bwidth ) )
569 : {
570 : int32_t total_brate;
571 :
572 106551 : total_brate = ( element_mode == IVAS_SCE ) ? st->total_brate : st->bits_frame_nominal * FRAMES_PER_SEC;
573 106551 : configureFdCngEnc( st->hFdCngEnc, max( st->input_bwidth, WB ), total_brate );
574 106551 : if ( hCPE != NULL )
575 : {
576 11916 : st->hFdCngEnc->hFdCngCom->CngBitrate = hCPE->element_brate - 1;
577 : }
578 : }
579 :
580 17850223 : if ( st->hFdCngEnc != NULL && st->Opt_DTX_ON )
581 : {
582 1931259 : AdjustFirstSID( st );
583 : }
584 :
585 : /*----------------------------------------------------------------*
586 : * LP analysis
587 : *----------------------------------------------------------------*/
588 :
589 17850223 : alw_pitch_lag_12k8[0] = st->old_pitch_la;
590 17850223 : alw_pitch_lag_12k8[1] = st->old_pitch_la;
591 17850223 : alw_voicing[0] = st->voicing[2];
592 17850223 : alw_voicing[1] = st->voicing[2];
593 :
594 17850223 : i = 0;
595 17850223 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 && hCPE->hStereoTD->tdm_low_rate_mode == 1 )
596 : {
597 0 : i = 1;
598 : }
599 :
600 17850223 : analy_lp( inp_12k8, L_FRAME, L_look, ener, A, epsP, lsp_new, lsp_mid, st->lsp_old1, alw_pitch_lag_12k8, alw_voicing, INT_FS_12k8, i );
601 :
602 17850223 : lsp2lsf( lsp_new, lsf_new, M, INT_FS_12k8 );
603 17850223 : stab_fac = lsf_stab( lsf_new, st->lsf_old1, 0, L_FRAME );
604 17850223 : mvr2r( lsf_new, st->lsf_old1, M );
605 :
606 17850223 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 )
607 : {
608 : /*----------------------------------------------------------------*
609 : * Comparison of the LP coefficents to determine if it is possible
610 : * to reuse the primary channel LP coefficients in the secondary channel
611 : *----------------------------------------------------------------*/
612 :
613 31177 : hCPE->hStereoTD->tdm_lp_reuse_flag = tdm_lp_comparison( hCPE->hStereoTD, hCPE->hStereoClassif, st, inp_12k8, tdm_A_PCh, A, M, tdm_lsp_new_PCh, lsp_new, L_FRAME, element_brate - nb_bits_metadata * FRAMES_PER_SEC );
614 : }
615 :
616 : /*----------------------------------------------------------------*
617 : * Compute weighted input (for OL pitch analysis)
618 : * OL pitch analysis
619 : * stable high pitch detection
620 : * 1/4 pitch precision improvement
621 : *----------------------------------------------------------------*/
622 :
623 17850223 : find_wsp( L_FRAME, L_SUBFR, NB_SUBFR, A, Aw, inp_12k8, TILT_FAC, wsp, &st->mem_wsp, GAMMA1, L_look );
624 :
625 17850223 : if ( st->vad_flag == 0 )
626 : {
627 : /* reset the OL pitch tracker memories during inactive frames */
628 3163819 : pitch_ol_init( &st->old_thres, &st->old_pitch, &st->delta_pit, &st->old_corr );
629 : }
630 :
631 17850223 : old_pitch1 = st->pitch[1];
632 :
633 17850223 : pitch_ol( st->pitch, st->voicing, &st->old_pitch, &st->old_corr, corr_shift, &st->old_thres, &st->delta_pit, st->old_wsp2, wsp, st->mem_decim2, *relE, L_look, st->clas, st->input_bwidth, st->Opt_SC_VBR );
634 :
635 : /* Updates for adaptive lag window memory */
636 17850223 : st->old_pitch_la = st->pitch[2];
637 :
638 : /* Detection of very short stable pitch period */
639 17850223 : StableHighPitchDetect( &flag_spitch, st->pitch, st->voicing, st->Bin_E, wsp, st->localVAD, &st->voicing_sm, &st->voicing0_sm, &st->LF_EnergyRatio_sm, &st->predecision_flag, &st->diff_sm, &st->energy_sm );
640 :
641 : /* 1/4 pitch precision improvement */
642 17850223 : if ( element_brate <= IVAS_32k )
643 : {
644 3894909 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[0], &pitch_fr[0], &voicing_fr[0], 0, wsp, 7 );
645 3894909 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[0], &pitch_fr[1], &voicing_fr[1], L_SUBFR, wsp, 7 );
646 3894909 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[1], &pitch_fr[2], &voicing_fr[2], 2 * L_SUBFR, wsp, 7 );
647 3894909 : pitch_ol2( PIT_MIN_EXTEND, st->pitch[1], &pitch_fr[3], &voicing_fr[3], 3 * L_SUBFR, wsp, 7 );
648 : }
649 : else
650 : {
651 13955314 : pitch_fr[0] = st->pitch[0];
652 13955314 : pitch_fr[1] = st->pitch[0];
653 13955314 : pitch_fr[2] = st->pitch[1];
654 13955314 : pitch_fr[3] = st->pitch[1];
655 :
656 13955314 : voicing_fr[0] = st->voicing[0];
657 13955314 : voicing_fr[1] = st->voicing[0];
658 13955314 : voicing_fr[2] = st->voicing[1];
659 13955314 : voicing_fr[3] = st->voicing[1];
660 : }
661 :
662 : /*------------------------------------------------------------------*
663 : * Update estimated noise energy and voicing cut-off frequency
664 : *-----------------------------------------------------------------*/
665 :
666 17850223 : noise_est( st, old_pitch1, tmpN, epsP, Etot, *relE, corr_shift, tmpE, fr_bands, cor_map_sum, &ncharX, &sp_div,
667 17850223 : &non_staX, loc_harm, lf_E, &st->hNoiseEst->harm_cor_cnt, st->hNoiseEst->Etot_l_lp, &dummy /*sp_floor*/, S_map, hStereoClassif, NULL, st->ini_frame );
668 :
669 17850223 : if ( lr_vad_enabled && st->idchan == 0 )
670 : {
671 : /* Run noise_est for Left and Right channel */
672 350716 : *loc_harmLR = *loc_harm;
673 350716 : noise_est( st, old_pitch1, tmpN_LR[0], epsP, Etot_LR[0], Etot_LR[0] - hCPE->hFrontVad[0]->lp_speech, corr_shiftL, tmpE_LR[0], fr_bands_LR[0], &cor_map_sum_LR[0], &ncharX_LR, &sp_div_LR,
674 350716 : &non_staX_LR, loc_harmLR, lf_E_LR[0], &hCPE->hFrontVad[0]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[0]->hNoiseEst->Etot_l_lp, &dummy, S_map_LR, NULL, hCPE->hFrontVad[0], hCPE->hFrontVad[0]->ini_frame );
675 :
676 : /* Note: the index [0] in the last argument is intended, the ini_frame counter is only maintained in the zero-th channel's VAD handle */
677 350716 : noise_est( st, old_pitch1, tmpN_LR[1], epsP, Etot_LR[1], Etot_LR[1] - hCPE->hFrontVad[1]->lp_speech, corr_shiftR, tmpE_LR[1], fr_bands_LR[1], &cor_map_sum_LR[1], &ncharX_LR, &sp_div_LR,
678 350716 : &non_staX_LR, loc_harmLR, lf_E_LR[1], &hCPE->hFrontVad[1]->hNoiseEst->harm_cor_cnt, hCPE->hFrontVad[1]->hNoiseEst->Etot_l_lp, &dummy, S_map_LR, NULL, hCPE->hFrontVad[1], hCPE->hFrontVad[0]->ini_frame );
679 : }
680 :
681 : /*------------------------------------------------------------------*
682 : * Update parameters used in the VAD and DTX
683 : *-----------------------------------------------------------------*/
684 :
685 17850223 : vad_param_updt( st, corr_shift, corr_shift, A, old_pitch1, NULL, 1 );
686 :
687 17850223 : if ( lr_vad_enabled && st->idchan == 0 )
688 : {
689 350716 : vad_param_updt( st, corr_shiftL, corr_shiftR, A, old_pitch1, &hCPE->hFrontVad[0], CPE_CHANNELS );
690 : }
691 :
692 : /*-----------------------------------------------------------------*
693 : * Find spectral tilt
694 : * UC and VC frame selection
695 : *-----------------------------------------------------------------*/
696 :
697 17850223 : find_tilt( fr_bands, st->hNoiseEst->bckr, ee, st->pitch, st->voicing, lf_E, corr_shift, st->input_bwidth, st->max_band, hp_E, MODE1, &( st->bckr_tilt_lt ), st->Opt_SC_VBR );
698 :
699 17850223 : st->coder_type = find_uv( st, pitch_fr, voicing_fr, inp_12k8, ee, &dE1X, corr_shift, *relE, Etot, hp_E, &flag_spitch, last_core_orig, hStereoClassif );
700 :
701 : /*-----------------------------------------------------------------*
702 : * channel aware mode configuration *
703 : *-----------------------------------------------------------------*/
704 :
705 17850223 : st->rf_mode = 0;
706 17850223 : st->rf_target_bits_write = 0;
707 :
708 : /*-----------------------------------------------------------------*
709 : * Signal classification for FEC
710 : * TC frame selection
711 : *-----------------------------------------------------------------*/
712 :
713 17850223 : st->clas = signal_clas( st, inp_12k8, ee, *relE, L_look, tdm_SM_last_clas );
714 :
715 17850223 : select_TC( MODE1, st->tc_cnt, &st->coder_type, st->localVAD );
716 :
717 17850223 : if ( st->Opt_SC_VBR )
718 : {
719 0 : st->hSC_VBR->Local_VAD = st->localVAD;
720 : }
721 :
722 : /*-----------------------------------------------------------------*
723 : * Collect stereo classifier features
724 : *-----------------------------------------------------------------*/
725 :
726 17850223 : if ( hStereoClassif != NULL )
727 : {
728 13743889 : stereo_classifier_features( hStereoClassif, st->idchan, element_mode, localVAD_HE_SAD, lsf_new, epsP, st->pitch, st->voicing, *cor_map_sum, non_staX, sp_div, st->clas );
729 : }
730 :
731 : /*----------------------------------------------------------------*
732 : * 1st stage speech/music classification (GMM model)
733 : *----------------------------------------------------------------*/
734 :
735 17850223 : smc_dec = ivas_smc_gmm( st, hStereoClassif, localVAD_HE_SAD, Etot, lsp_new, *cor_map_sum, epsP, PS, non_staX, *relE, &high_lpn_flag, flag_spitch );
736 :
737 : #ifdef DEBUGGING
738 : if ( st->idchan == 0 )
739 : {
740 : if ( st->force == FORCE_SPEECH )
741 : {
742 : /* enforce speech */
743 : st->sp_aud_decision0 = 0;
744 : }
745 : else if ( st->force == FORCE_MUSIC )
746 : {
747 : /* enforce music */
748 : st->sp_aud_decision0 = 1;
749 : }
750 : }
751 : #endif
752 :
753 : /*----------------------------------------------------------------*
754 : * VAD energy updates
755 : * Update of old per-band energy spectrum
756 : *----------------------------------------------------------------*/
757 :
758 17850223 : long_enr( st, Etot, localVAD_HE_SAD, high_lpn_flag, NULL, 1, NULL, NULL );
759 :
760 17850223 : mvr2r( fr_bands + NB_BANDS, st->hNoiseEst->enrO, NB_BANDS );
761 :
762 17850223 : if ( lr_vad_enabled && st->idchan == 0 )
763 : {
764 350716 : long_enr( st, -1, localVAD_HE_SAD, high_lpn_flag, hCPE->hFrontVad, CPE_CHANNELS, localVAD_HE_SAD_LR, Etot_LR );
765 :
766 350716 : mvr2r( fr_bands_LR[0] + NB_BANDS, hCPE->hFrontVad[0]->hNoiseEst->enrO, NB_BANDS );
767 350716 : mvr2r( fr_bands_LR[1] + NB_BANDS, hCPE->hFrontVad[1]->hNoiseEst->enrO, NB_BANDS );
768 : }
769 :
770 : /*----------------------------------------------------------------*
771 : * SNR-based speech/music classification
772 : * AC frame selection
773 : *----------------------------------------------------------------*/
774 :
775 17850223 : st->GSC_IVAS_mode = 0;
776 17850223 : if ( st->idchan == 1 && element_mode == IVAS_CPE_TD )
777 : {
778 : /* No speech/music classification in the secondary channel of TD stereo */
779 31177 : st->sp_aud_decision1 = 0;
780 31177 : st->sp_aud_decision2 = 0;
781 :
782 31177 : st->GSC_noisy_speech = 0;
783 31177 : if ( st->hGSCEnc != NULL )
784 : {
785 31177 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP3;
786 : }
787 : }
788 17819046 : else if ( element_mode != IVAS_CPE_MDCT )
789 : {
790 : /* SNR-based speech/music classification */
791 5050300 : if ( ( element_mode >= IVAS_CPE_DFT && element_brate >= IVAS_24k4 ) || ( element_mode == IVAS_SCE && element_brate >= SCE_SMC_THR ) )
792 : {
793 4116678 : if ( ivas_format == SBA_FORMAT && st->core_brate != FRAME_NO_DATA && st->last_core_brate == FRAME_NO_DATA && st->sba_br_sw_while_no_data )
794 : {
795 7 : SetModeIndex( st, st->bits_frame_nominal * FRAMES_PER_SEC, element_mode, MCT_flag );
796 7 : st->sba_br_sw_while_no_data = 0;
797 : }
798 4116671 : else if ( ivas_format == SBA_FORMAT && st->core_brate == FRAME_NO_DATA && element_brate != last_element_brate )
799 : {
800 7 : st->sba_br_sw_while_no_data = 1;
801 : }
802 :
803 4116678 : if ( flag_16k_smc )
804 : {
805 : /* Compute core-coder buffers at internal sampling rate */
806 623173 : ivas_compute_core_buffers( st, NULL, old_inp_16k, NULL, input_frame, IVAS_SCE /*last_element_mode*/, INT_FS_16k /*sr_core_tmp*/, ener, A, Aw, epsP, lsp_new, lsp_mid );
807 :
808 623173 : smc_dec = ivas_acelp_tcx20_switching( st, st->speech_enc, st->wspeech_enc, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc );
809 : }
810 : else
811 : {
812 3493505 : smc_dec = ivas_acelp_tcx20_switching( st, inp_12k8, wsp, non_staX, pitch_fr, voicing_fr, currFlatness, lsp_mid, stab_fac, res_cod_SNR_M, flag_16k_smc );
813 : }
814 : }
815 : /* Switch to ACELP for non-harmonic transient signals */
816 933622 : else if ( ( ( element_mode >= IVAS_CPE_DFT && element_brate <= IVAS_16k4 ) || ( element_mode == IVAS_SCE && element_brate < SCE_SMC_THR ) ) && ( loc_harm[0] != 1 ) && smc_dec == MUSIC )
817 : {
818 90756 : if ( element_mode == IVAS_SCE )
819 : {
820 55651 : if ( transient_analysis( st->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) )
821 : {
822 3839 : smc_dec = SPEECH;
823 : }
824 : }
825 35105 : else if ( element_mode == IVAS_CPE_DFT )
826 : {
827 105306 : for ( i = 0; i < CPE_CHANNELS; i++ )
828 : {
829 70204 : if ( smc_dec != SPEECH && transient_analysis( hCPE->hCoreCoder[i]->hTranDet, st->hNoiseEst->cor_map, st->hNoiseEst->multi_harm_limit ) )
830 : {
831 1480 : smc_dec = SPEECH; /* overwrite initial music decision, initial SPEECH_MUSIC never changed */
832 : }
833 : }
834 : }
835 : }
836 :
837 : /* 2nd stage speech/music classification (ACELP/GSC/TCX core selection) */
838 5050300 : ivas_smc_mode_selection( st, element_brate, smc_dec, *relE, Etot, attack_flag, inp_12k8, S_map, flag_spitch );
839 : }
840 :
841 : /*----------------------------------------------------------------*
842 : * Final VAD correction (when HE-SAD is used instead of the normal VAD,
843 : * rewrite the VAD flag by VAD flag with DTX hangover for further processing)
844 : *----------------------------------------------------------------*/
845 :
846 17850223 : if ( st->Opt_DTX_ON && element_mode != IVAS_CPE_DFT )
847 : {
848 1598383 : st->vad_flag = *vad_flag_dtx;
849 : }
850 :
851 : /*-----------------------------------------------------------------*
852 : * Update old input signal buffer
853 : *-----------------------------------------------------------------*/
854 :
855 17850223 : mvr2r( &old_inp_12k8[L_FRAME], st->old_inp_12k8, L_INP_MEM );
856 :
857 17850223 : pop_wmops();
858 17850223 : return;
859 : }
860 :
861 :
862 : /*-------------------------------------------------------------------*
863 : * calculate_energy_buffer()
864 : *
865 : * calculate DFT-based energies
866 : *--------------------------------------------------------------------*/
867 :
868 912789 : static void calculate_energy_buffer(
869 : CPE_ENC_HANDLE hCPE, /* i : CPE encoder structure */
870 : float enerBuffer_dft[], /* o : energy buffer */
871 : const int16_t no_channels, /* i : no. of used CLDFB channels */
872 : const int32_t input_Fs /* i : input sampling rate */
873 : )
874 : {
875 : int16_t i, j;
876 : float *pDFT_DMX, *p_nrg_DMX;
877 : float nrg_DMX[CLDFB_NO_CHANNELS_MAX];
878 : float band_res_dft, chan_width_f, chan_width_bins;
879 : int16_t start, stop;
880 :
881 912789 : band_res_dft = ( (float) input_Fs ) / hCPE->hStereoDft->NFFT;
882 912789 : chan_width_f = 24000.f / CLDFB_NO_CHANNELS_MAX;
883 912789 : chan_width_bins = chan_width_f / band_res_dft;
884 :
885 912789 : set_f( nrg_DMX, 0, CLDFB_NO_CHANNELS_MAX );
886 :
887 912789 : pDFT_DMX = hCPE->hStereoDft->DFT[0];
888 912789 : start = 1;
889 912789 : p_nrg_DMX = nrg_DMX;
890 :
891 912789 : *p_nrg_DMX += (float) ( pDFT_DMX[0] * pDFT_DMX[0] );
892 43353449 : for ( i = 0; i < no_channels; i++ )
893 : {
894 42440660 : stop = (int16_t) ( ( i + 1 ) * chan_width_bins + 0.5f );
895 720578431 : for ( j = start; j < stop; j++ )
896 : {
897 678137771 : *p_nrg_DMX += (float) ( pDFT_DMX[2 * j] * pDFT_DMX[2 * j] + pDFT_DMX[2 * j + 1] * pDFT_DMX[2 * j + 1] );
898 : }
899 42440660 : start = stop;
900 42440660 : p_nrg_DMX++;
901 : }
902 :
903 43353449 : for ( i = 0; i < no_channels; i++ ) /* Consider only used channels, dependent on Fs */
904 : {
905 42440660 : enerBuffer_dft[i] = nrg_DMX[i] / 3;
906 : }
907 :
908 : /* Set remaining entries of enerBuffer to zero */
909 13239469 : for ( ; i < CLDFB_NO_CHANNELS_MAX; i++ )
910 : {
911 12326680 : enerBuffer_dft[i] = 0.f;
912 : }
913 :
914 912789 : return;
915 : }
|