Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #include <math.h>
37 : #include "cnst.h"
38 : #include "rom_enc.h"
39 : #include "rom_com.h"
40 : #include "prot.h"
41 : #include "ivas_prot.h"
42 : #include "ivas_cnst.h"
43 : #include "ivas_rom_com.h"
44 : #include "ivas_rom_enc.h"
45 : #ifdef DEBUGGING
46 : #include "debug.h"
47 : #endif
48 : #include "wmc_auto.h"
49 :
50 :
51 : /*-------------------------------------------------------------------------
52 : * Local constants
53 : *------------------------------------------------------------------------*/
54 :
55 : #define LP_GCC_PHAT_UP 0.9f /* LP filter coefficient, going up */
56 : #define LP_GCC_PHAT_DOWN 0.1f /* LP filter coefficient, going down */
57 : #define ITD_CNT_MAX 2 /* Minimum number of consecutive ITD estimates for ITD hangover */
58 : #define ITD_HO_GCC_PHAT_MAX 0.6f /* LP GCC PHAT value which gives zero hangover */
59 : #define ITD_HO_GCC_PHAT_MIN 0.3f /* LP GCC PHAT value which gives ITD_HO_MAX ITD hangover frames */
60 : #define ITD_HO_MAX 6 /* Maximum number of ITD hangover frames */
61 : #define ITD_HO_GCC_PHAT_INCL ( -( ITD_HO_MAX - 1 ) / ( ITD_HO_GCC_PHAT_MAX - ITD_HO_GCC_PHAT_MIN ) )
62 : #define ITD_HO_GCC_PHAT_OFFS ( -ITD_HO_GCC_PHAT_INCL * ITD_HO_GCC_PHAT_MAX + 1 )
63 : #define SFM_PROD_GRP 4 /*maximum grouping of products for calculating SFM in ITD estimation*/
64 : #define B_DENOM 0.083333333333333f
65 : #define L_SAMPLES 20
66 : #define SUBDIV ( 2 * STEREO_DFT_ITD_MAX_ANA / L_SAMPLES )
67 : #define DENOM 0.05f
68 :
69 : #define XSPEC_ALPHA ( 1.f / 32 )
70 : #define CORR_FILT 0.8f
71 : #define CORR_RESET_FRAMES_MAX 20
72 :
73 : #define ITD_VAD_NOISE_INIT_FRAMES 30
74 : #define ITD_VAD_THRSHOLD 0.001f
75 : #define ITD_VAD_MS_SNR_UPDATE_THRESH 15.0f
76 : #define HIGHT_SNR_VOICE_TH 10000.0f
77 : #define MAX_ITD_VAD_HANGOVER 10
78 :
79 : #define XCORR_LB_NUM_BANDS 3
80 : #define XCORR_LB_BAND_WIDTH 8
81 :
82 : #define ITD_MAX_MDCT 80
83 :
84 :
85 : /*-------------------------------------------------------------------------
86 : * set_band_limits()
87 : *
88 : * configure bands as used in DFT Stereo
89 : *------------------------------------------------------------------------*/
90 :
91 140390 : static void set_band_limits(
92 : int16_t *nbands,
93 : int16_t band_limits[STEREO_DFT_BAND_MAX + 1],
94 : int16_t NFFT )
95 : {
96 140390 : band_limits[0] = 1;
97 140390 : *nbands = 0;
98 1756905 : while ( band_limits[( *nbands )++] < NFFT / 2 )
99 : {
100 1616515 : band_limits[*nbands] = (int16_t) round_f( dft_band_limits_erb4[*nbands] * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) );
101 : }
102 140390 : ( *nbands )--;
103 140390 : band_limits[*nbands] = NFFT / 2; /*Nyquist Freq*/
104 :
105 140390 : return;
106 : }
107 :
108 :
109 : /*-------------------------------------------------------------------------
110 : * stereo_dft_hybrid_ITD_flag()
111 : *
112 : * Get the hybrid ITD flag
113 : *------------------------------------------------------------------------*/
114 :
115 745226 : void stereo_dft_hybrid_ITD_flag(
116 : STEREO_DFT_CONFIG_DATA_HANDLE hConfig, /* o : DFT stereo configuration */
117 : const int32_t input_Fs, /* i : CPE element sampling rate */
118 : const int16_t hybrid_itd_max /* i : flag for hybrid ITD for very large ITDs */
119 : )
120 : {
121 745226 : if ( hConfig != NULL )
122 : {
123 745226 : if ( hConfig->res_cod_mode || ( hConfig->ada_wb_res_cod_mode && input_Fs == 16000 ) || ( hybrid_itd_max == 1 ) )
124 : {
125 236675 : hConfig->hybrid_itd_flag = 1;
126 : }
127 : else
128 : {
129 508551 : hConfig->hybrid_itd_flag = 0;
130 : }
131 : }
132 : else
133 : {
134 0 : assert( 0 && "Stereo Dft Config Data Handle is uninitialized" );
135 : }
136 :
137 745226 : return;
138 : }
139 :
140 :
141 : /*-------------------------------------------------------------------------
142 : * stereo_dft_quantize_itd()
143 : *
144 : * Quantize the ITD
145 : *------------------------------------------------------------------------*/
146 :
147 868212 : static void stereo_dft_quantize_itd(
148 : const int16_t in,
149 : float *out,
150 : const int32_t input_Fs,
151 : int16_t *ind )
152 : {
153 : int16_t itd;
154 :
155 868212 : itd = (int16_t) ( sign( in ) * 0.5f + in );
156 :
157 : /*Limit ITD*/
158 868212 : if ( ( ABSVAL( itd ) > STEREO_DFT_ITD_MAX ) || ( ABSVAL( itd ) < STEREO_DFT_ITD_MIN ) )
159 : {
160 664187 : itd = 0;
161 : }
162 : else
163 : {
164 204025 : *ind = ( ( itd < 0 ) << ( STEREO_DFT_ITD_NBITS - 1 ) ) + ABSVAL( itd ) - STEREO_DFT_ITD_MIN;
165 : }
166 :
167 : /*Convert back @ fs*/
168 868212 : *out = (float) ( itd * input_Fs ) / ( (float) ( STEREO_DFT_ITD_FS ) );
169 :
170 868212 : return;
171 : }
172 :
173 :
174 : /*-------------------------------------------------------------------------
175 : * itd_vad_ms_snr_calc()
176 : *
177 : *
178 : *-------------------------------------------------------------------------*/
179 :
180 868212 : static float itd_vad_ms_snr_calc(
181 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
182 : float *Spd,
183 : float *E_band )
184 : {
185 : float snr[STEREO_DFT_ITD_VAD_BAND_NUM];
186 : float msnr[STEREO_DFT_ITD_VAD_BAND_NUM];
187 : float ms_snr;
188 : int16_t i, j;
189 :
190 18232452 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
191 : {
192 17364240 : E_band[i] = 0;
193 290851020 : for ( j = itd_vad_band_tbl[i]; j < itd_vad_band_tbl[i + 1]; j++ )
194 : {
195 273486780 : E_band[i] += Spd[j];
196 : }
197 17364240 : E_band[i] = E_band[i] / ( itd_vad_band_tbl[i + 1] - itd_vad_band_tbl[i] );
198 : }
199 :
200 868212 : ms_snr = 0;
201 18232452 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
202 : {
203 17364240 : snr[i] = E_band[i] / E_band_n[i];
204 17364240 : if ( snr[i] < 1 )
205 : {
206 1655980 : snr[i] = 1;
207 : }
208 17364240 : msnr[i] = snr[i] - 1.0f;
209 17364240 : if ( msnr[i] < 6 )
210 : {
211 2995545 : msnr[i] = powf( msnr[i], 2 ) / 6.0f;
212 : }
213 17364240 : ms_snr += msnr[i];
214 : }
215 :
216 868212 : return ( ms_snr );
217 : }
218 :
219 :
220 : /*-------------------------------------------------------------------------
221 : * itd_vad_background_update()
222 : *
223 : *
224 : *-------------------------------------------------------------------------*/
225 :
226 868212 : static void itd_vad_background_update(
227 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
228 : int16_t *vad_frm_cnt,
229 : const float ms_snr,
230 : float *E_band )
231 : {
232 : int16_t i;
233 : float energy;
234 :
235 868212 : energy = 0.0f;
236 18232452 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
237 : {
238 17364240 : energy += E_band[i] / (float) STEREO_DFT_ITD_VAD_BAND_NUM;
239 : }
240 :
241 868212 : if ( *vad_frm_cnt < ITD_VAD_NOISE_INIT_FRAMES )
242 : {
243 118428 : ( *vad_frm_cnt )++;
244 : }
245 :
246 868212 : if ( ( *vad_frm_cnt < ITD_VAD_NOISE_INIT_FRAMES ) && energy < 40000000 )
247 : {
248 352506 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
249 : {
250 : /* using the init values as frame (-1) values */
251 335720 : E_band_n[i] = ( E_band_n[i] * ( (float) ( *vad_frm_cnt ) ) + E_band[i] ) / ( (float) ( *vad_frm_cnt + 1 ) );
252 : }
253 : }
254 : else
255 : {
256 851426 : if ( ms_snr < ITD_VAD_MS_SNR_UPDATE_THRESH )
257 : {
258 951762 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
259 : {
260 906440 : E_band_n[i] = 0.96f * E_band_n[i] + 0.04f * E_band[i];
261 906440 : if ( E_band_n[i] < 1.0f )
262 : {
263 54712 : E_band_n[i] = 1.0f;
264 : }
265 : }
266 : }
267 : }
268 :
269 868212 : return;
270 : }
271 :
272 : /*-------------------------------------------------------------------------
273 : * stereo_dft_enc_itd_vad()
274 : *
275 : *
276 : *-------------------------------------------------------------------------*/
277 :
278 868212 : static int16_t stereo_dft_enc_itd_vad(
279 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
280 : int16_t *vad_frm_cnt,
281 : float *Spd_L,
282 : float *Spd_R,
283 : float *mssnr )
284 : {
285 : int16_t i;
286 : float E_band[STEREO_DFT_ITD_VAD_BAND_NUM];
287 : int16_t vad_flag_itd;
288 :
289 : float Spd[STEREO_DFT_N_16k_ENC / 2 + 1];
290 :
291 : /* Spd is later only used starting at itd_vad_band_tbl[0], so only compute values starting from there */
292 : /* -> this avoids uninitialized values in Spd_L and Spd_R at index 0 to be used */
293 274354992 : for ( i = itd_vad_band_tbl[0]; i < STEREO_DFT_N_16k_ENC / 2; i++ )
294 : {
295 273486780 : Spd[i] = 0.5f * ( Spd_L[i] + Spd_R[i] );
296 : }
297 :
298 868212 : *mssnr = itd_vad_ms_snr_calc( E_band_n, Spd, E_band );
299 :
300 868212 : itd_vad_background_update( E_band_n, vad_frm_cnt, *mssnr, E_band );
301 :
302 868212 : if ( *mssnr < ITD_VAD_THRSHOLD )
303 : {
304 28714 : vad_flag_itd = 0;
305 : }
306 : else
307 : {
308 839498 : vad_flag_itd = 1;
309 : }
310 :
311 868212 : return ( vad_flag_itd );
312 : }
313 :
314 :
315 : /*-------------------------------------------------------------------------
316 : * calc_mean_E_ratio()
317 : *
318 : * calculates mean energy of main-to-background signal ratio
319 : *-------------------------------------------------------------------------*/
320 :
321 868212 : static float calc_mean_E_ratio(
322 : ITD_DATA_HANDLE hItd,
323 : int16_t nbands,
324 : int16_t band_limits[],
325 : const float sfm,
326 : const float nrg_L[STEREO_DFT_N_32k_ENC / 2],
327 : const float nrg_R[STEREO_DFT_N_32k_ENC / 2],
328 : float *total_mEr )
329 : {
330 : float sum_xcorr[2];
331 : float Sxcorr;
332 : float Er[STEREO_DFT_BAND_MAX], fi[STEREO_DFT_BAND_MAX], a, acorr;
333 : float sum_nrg_L, sum_nrg_R;
334 : int16_t b, i;
335 : float sum_Er;
336 : float total_fi;
337 : float grand_nrg_L, grand_nrg_R, grand_sum_xcorr_real, grand_sum_xcorr_img;
338 :
339 868212 : grand_nrg_L = 0.0f;
340 868212 : grand_nrg_R = 0.0f;
341 868212 : grand_sum_xcorr_real = 0.0f;
342 868212 : grand_sum_xcorr_img = 0.0f;
343 :
344 : /*take bands up to 32kHz bandwidth as ITD is always calculated at 32kHz sampling rate*/
345 868212 : nbands -= ( band_limits[nbands] > STEREO_DFT_N_32k_ENC / 2 );
346 :
347 868212 : sum_Er = 0;
348 9035806 : for ( b = 0; b < nbands; b++ )
349 : {
350 : /*reset buffers*/
351 8167594 : sum_xcorr[0] = 0.f;
352 8167594 : sum_xcorr[1] = 0.f;
353 8167594 : sum_nrg_L = 0.f;
354 8167594 : sum_nrg_R = 0.f;
355 :
356 501084982 : for ( i = band_limits[b]; i < min( band_limits[b + 1], STEREO_DFT_N_32k_ENC / 2 ); i++ )
357 : {
358 492917388 : sum_xcorr[0] += hItd->xcorr_smooth[2 * i];
359 492917388 : sum_xcorr[1] += hItd->xcorr_smooth[2 * i + 1];
360 492917388 : sum_nrg_L += nrg_L[i];
361 492917388 : sum_nrg_R += nrg_R[i];
362 : }
363 :
364 8167594 : Sxcorr = sum_xcorr[0] * sum_xcorr[0] + sum_xcorr[1] * sum_xcorr[1];
365 8167594 : hItd->acorr_L[b] = ( 1.f - sfm ) * hItd->acorr_L[b] + sfm * sum_nrg_L;
366 8167594 : hItd->acorr_R[b] = ( 1.f - sfm ) * hItd->acorr_R[b] + sfm * sum_nrg_R;
367 :
368 8167594 : a = hItd->acorr_L[b] - hItd->acorr_R[b];
369 8167594 : acorr = hItd->acorr_L[b] + hItd->acorr_R[b];
370 8167594 : fi[b] = sqrtf( a * a + 4 * Sxcorr );
371 8167594 : Er[b] = ( acorr + fi[b] ) / ( acorr - fi[b] + EPSILON );
372 8167594 : sum_Er += Er[b];
373 :
374 8167594 : grand_nrg_L += sum_nrg_L;
375 8167594 : grand_nrg_R += sum_nrg_R;
376 8167594 : grand_sum_xcorr_real += sum_xcorr[0];
377 8167594 : grand_sum_xcorr_img += sum_xcorr[1];
378 : }
379 :
380 868212 : Sxcorr = grand_sum_xcorr_real * grand_sum_xcorr_real + grand_sum_xcorr_img * grand_sum_xcorr_img;
381 868212 : a = grand_nrg_L - grand_nrg_R;
382 868212 : acorr = grand_nrg_L + grand_nrg_R;
383 868212 : total_fi = sqrtf( a * a + 4 * Sxcorr );
384 868212 : *total_mEr = ( acorr + total_fi ) / ( acorr - total_fi + EPSILON );
385 :
386 868212 : return ( sum_Er * B_DENOM );
387 : }
388 :
389 :
390 : /*-------------------------------------------------------------------------
391 : * resetEstimates()
392 : *
393 : * resets long term estimates to initial values.
394 : *-------------------------------------------------------------------------*/
395 :
396 0 : static void resetEstimates(
397 : ITD_DATA_HANDLE hItd )
398 : {
399 0 : set_zero( hItd->xcorr_smooth, STEREO_DFT_N_32k_ENC );
400 0 : set_zero( hItd->acorr_L, STEREO_DFT_BAND_MAX );
401 0 : set_zero( hItd->acorr_R, STEREO_DFT_BAND_MAX );
402 0 : hItd->cohSNR = 15;
403 :
404 0 : return;
405 : }
406 :
407 : /*-------------------------------------------------------------------------
408 : * td_sm_filter()
409 : *
410 : * time-domain smoothing filter for smoothing the cross-correlation vector
411 : *-------------------------------------------------------------------------*/
412 :
413 620604 : static void td_sm_filter(
414 : float *x,
415 : float *y,
416 : const int16_t L )
417 : {
418 : int16_t i;
419 : float tmp_x[STEREO_DFT_N_32k_ENC + 1];
420 : float a0, a1;
421 :
422 620604 : set_f( tmp_x, 0, STEREO_DFT_N_32k_ENC + 1 );
423 620604 : mvr2r( x, tmp_x, L );
424 :
425 620604 : a0 = 0.5f;
426 620604 : a1 = 0.25f;
427 :
428 620604 : y[0] = a0 * tmp_x[0] + a1 * x[1];
429 248862204 : for ( i = 1; i < L; i++ )
430 : {
431 248241600 : y[i] = a1 * tmp_x[i + 1] + a0 * tmp_x[i] + a1 * tmp_x[i - 1];
432 : }
433 :
434 620604 : return;
435 : }
436 :
437 : /*-------------------------------------------------------------------------
438 : * peak_detect()
439 : *
440 : * function for calculating the threshold for peak detection of the
441 : * cross-correlation vector
442 : *-------------------------------------------------------------------------*/
443 :
444 868212 : static float peak_detect(
445 : float *xcorr_itd,
446 : float *max_max,
447 : int16_t *index,
448 : int16_t *zero_itd_flag,
449 : const float snr,
450 : const int16_t vad,
451 : float *second_max,
452 : int16_t *second_max_lag,
453 : const float prev_itd,
454 : const int16_t flag_noisy_speech_snr,
455 : const int16_t detected_itd_flag,
456 : float *prev_max,
457 : int16_t *prev_index,
458 : float *prev_avg_max,
459 : float *total_max )
460 : {
461 : int16_t i;
462 : float tmp_max[SUBDIV], tmp_xcorr_itd[2 * STEREO_DFT_ITD_MAX_ANA + 1], tmp_xcorr_itd_sm[2 * STEREO_DFT_ITD_MAX_ANA + 1];
463 : int16_t index_subd[SUBDIV], ind;
464 : float avg_max, max_low, max_high, sum_max, tmp_max_max;
465 : float thres_diff;
466 : float wfac;
467 : int16_t d, i1, i2;
468 :
469 868212 : wfac = 2.5f;
470 868212 : if ( snr > 50.f )
471 : {
472 574557 : wfac = 3.f;
473 : }
474 :
475 : /*detect maxima outside the [-5, 5] ms boundaries */
476 868212 : maximum( xcorr_itd, STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA, &max_low );
477 868212 : maximum( xcorr_itd + STEREO_DFT_N_32k_ENC / 2 + STEREO_DFT_ITD_MAX_ANA + 1, STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA - 1, &max_high );
478 :
479 : /* create temp buffer that includes xcorr within [-5, 5] ms */
480 868212 : mvr2r( xcorr_itd + STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA, tmp_xcorr_itd, 2 * STEREO_DFT_ITD_MAX_ANA + 1 );
481 :
482 868212 : *index = maximumAbs( tmp_xcorr_itd, 2 * STEREO_DFT_ITD_MAX_ANA + 1, max_max );
483 868212 : *total_max = *max_max;
484 :
485 868212 : d = max( 2, (int16_t) round_f( fabsf( prev_itd ) / 16.f ) );
486 868212 : i1 = max( 0, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA - d );
487 868212 : i2 = min( 2 * STEREO_DFT_ITD_MAX_ANA, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA + d );
488 868212 : *second_max_lag = maximumAbs( tmp_xcorr_itd + i1, i2 - i1 + 1, second_max );
489 868212 : *second_max_lag += i1;
490 :
491 :
492 : /*if maximum is out of boundaries signal zero itd OR maximum value is negative*/
493 868212 : *zero_itd_flag = ( *max_max < max_low || *max_max < max_high );
494 :
495 868212 : if ( *zero_itd_flag )
496 : {
497 16953 : return 0;
498 : }
499 : else
500 : {
501 851259 : sum_max = 0;
502 :
503 851259 : if ( snr >= 25.f )
504 : {
505 : /*apply smoothing filter*/
506 620604 : td_sm_filter( tmp_xcorr_itd, tmp_xcorr_itd_sm, 2 * STEREO_DFT_ITD_MAX_ANA + 1 );
507 :
508 : /*subdivide the area of interest and look for local maxima*/
509 12412080 : for ( i = 0; i < SUBDIV - 1; i++ )
510 : {
511 11791476 : index_subd[i] = maximumAbs( &tmp_xcorr_itd_sm[i * L_SAMPLES], L_SAMPLES, &tmp_max[i] );
512 11791476 : sum_max += tmp_max[i];
513 : }
514 :
515 620604 : index_subd[i] = maximumAbs( &tmp_xcorr_itd_sm[i * L_SAMPLES], L_SAMPLES + 1, &tmp_max[i] );
516 620604 : sum_max += tmp_max[i];
517 :
518 620604 : ind = maximumAbs( tmp_max, SUBDIV, &tmp_max_max );
519 :
520 : /*final position of maxmimum*/
521 620604 : *index = index_subd[ind] + ind * L_SAMPLES;
522 620604 : *max_max = tmp_max_max;
523 : /*calculate average of all maxima to determine the threshold*/
524 620604 : avg_max = sum_max * DENOM;
525 :
526 620604 : d = max( 2, (int16_t) round_f( fabsf( prev_itd ) / 16.f ) );
527 620604 : i1 = max( 0, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA - d );
528 620604 : i2 = min( 2 * STEREO_DFT_ITD_MAX_ANA, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA + d );
529 620604 : *second_max_lag = maximumAbs( tmp_xcorr_itd_sm + i1, i2 - i1 + 1, second_max );
530 620604 : *second_max_lag += i1;
531 : }
532 : else
533 : {
534 : /*determine weight for threshold depending on snr value*/
535 230655 : if ( snr <= 20.f && snr > 15.f )
536 : {
537 80022 : wfac = snr * 0.1f + 0.5f;
538 : }
539 : else
540 : {
541 150633 : wfac = 2.5f;
542 : }
543 :
544 4613100 : for ( i = 0; i < SUBDIV - 1; i++ )
545 : {
546 4382445 : index_subd[i] = maximumAbs( &tmp_xcorr_itd[i * L_SAMPLES], L_SAMPLES, &tmp_max[i] );
547 4382445 : sum_max += tmp_max[i];
548 : }
549 :
550 230655 : index_subd[i] = maximumAbs( &tmp_xcorr_itd[i * L_SAMPLES], L_SAMPLES + 1, &tmp_max[i] );
551 230655 : sum_max += tmp_max[i];
552 :
553 : /*calculate average of all maxima to determine the threshold*/
554 230655 : avg_max = sum_max * DENOM;
555 : }
556 :
557 : /*relax threshold if threshold is very close to max: when 7<snr<=15 and (thres-max)<0.05 or 15<snr<30 and (thres-max)<0.01*/
558 :
559 851259 : thres_diff = wfac * avg_max - *max_max;
560 :
561 851259 : if ( vad && thres_diff > 0.f && ( ( thres_diff < 0.05f && ( snr <= 15 && snr > 7.f ) ) || ( thres_diff < 0.01f && ( snr > 15.f && snr < 30.f ) ) ) )
562 : {
563 7081 : wfac = 2.0f;
564 : }
565 :
566 851259 : if ( flag_noisy_speech_snr == 1 )
567 : {
568 234732 : if ( vad == 0 )
569 : {
570 73635 : wfac = 2.5f;
571 : }
572 161097 : else if ( detected_itd_flag == 0 && *max_max > 1.5f * avg_max && *prev_max > 1.5f * *prev_avg_max && abs( *index - *prev_index ) <= 2 )
573 : {
574 319 : wfac = 1.5f;
575 : }
576 : else
577 : {
578 160778 : wfac = 2.0f;
579 : }
580 : }
581 851259 : *prev_max = *max_max;
582 851259 : *prev_avg_max = avg_max;
583 851259 : *prev_index = *index;
584 :
585 851259 : return ( wfac * avg_max );
586 : }
587 : }
588 :
589 :
590 : /*-------------------------------------------------------------------------
591 : * Compute stereo parameter: ITD
592 : * ITD: Interchannel Time Difference
593 : *------------------------------------------------------------------------*/
594 :
595 868212 : void stereo_dft_enc_compute_itd(
596 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
597 : float *DFT_L,
598 : float *DFT_R,
599 : const int16_t k_offset,
600 : const int16_t input_frame,
601 : const int16_t vad_flag_dtx[],
602 : const int16_t vad_hover_flag[],
603 : float *bin_nrgL,
604 : float *bin_nrgR )
605 : {
606 : int16_t i, j;
607 : STEREO_DFT_ENC_DATA_HANDLE hStereoDft;
608 : ITD_DATA_HANDLE hItd;
609 : float *pDFT_L, *pDFT_R;
610 : float abs_L, abs_R, prod_L, prod_R, sum_abs_L, sum_abs_R;
611 : float log_prod_L, log_prod_R;
612 : float sum_nrg_L, sum_nrg_R;
613 : float sfm_L, sfm_R;
614 : float xcorr[STEREO_DFT_N_32k_ENC];
615 : int16_t itd, itd_td;
616 : float xcorr_itd[STEREO_DFT_N_32k_ENC];
617 : float tmpf1, tmpf2, tmpf3;
618 : float thres, alpha;
619 : int16_t index;
620 : float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp;
621 : float xcorr_lb[STEREO_DFT_XCORR_LB_MAX];
622 : float num_cor, den_cor_cur, den_cor_prev, cor_lb_avrg;
623 : float cor_lb[XCORR_LB_NUM_BANDS];
624 : float Spd_L[STEREO_DFT_N_32k_ENC / 2 + 1];
625 : float Spd_R[STEREO_DFT_N_32k_ENC / 2 + 1];
626 : int16_t vad_flag_itd;
627 : float mssnr;
628 : int16_t itd_cal_flag;
629 : int16_t NFFT, NFFT_mid;
630 : int16_t zero_itd;
631 : float mEr;
632 : float cohSNR;
633 : float *pNrgL, *pNrgR;
634 : float second_max;
635 : int16_t second_max_lag;
636 : int16_t fc_condition_1, fc_condition_2, fc_condition_3, fc_condition_4, fc_condition_5, fc_condition_6_a, fc_condition_6_b, fc_condition_6_c;
637 : int16_t fc_condition_1234;
638 : int16_t split, shift, flag_noisy_speech_snr;
639 : float gcc_phat[2 * XTALK_PHAT_LEN + 1];
640 : float grand_dot_prod_real, grand_dot_prod_img;
641 : float xcorr_abs[STEREO_DFT_N_32k_ENC], sum_xcorr, prod_LL, prod_RR, total_mEr, total_max;
642 : STEREO_CLASSIF_HANDLE hStereoClassif;
643 : const float *dft_trigo32k;
644 : float trigo_enc[STEREO_DFT_N_32k_ENC / 2 + 1];
645 : float cng_xcorr_filt;
646 : float sum_nrg_delta;
647 : int16_t prev_itd_max;
648 : int16_t itd_max_flip;
649 :
650 868212 : if ( hCPE->element_mode == IVAS_CPE_DFT )
651 : {
652 727822 : hStereoDft = hCPE->hStereoDft;
653 727822 : hItd = hCPE->hStereoDft->hItd;
654 727822 : NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT );
655 727822 : dft_trigo32k = hStereoDft->dft_trigo_32k;
656 : }
657 : else
658 : {
659 140390 : hStereoDft = NULL;
660 140390 : hItd = hCPE->hStereoMdct->hItd;
661 140390 : NFFT = min( STEREO_DFT_N_32k_ENC, hCPE->hStereoMdct->hDft_ana->NFFT );
662 140390 : dft_trigo32k = hCPE->hStereoMdct->hDft_ana->dft_trigo_32k;
663 : }
664 868212 : hStereoClassif = hCPE->hStereoClassif;
665 :
666 278696052 : for ( i = 0; i < STEREO_DFT_N_32k_ENC / 4; i++ )
667 : {
668 277827840 : trigo_enc[i] = dft_trigo32k[i];
669 277827840 : trigo_enc[STEREO_DFT_N_32k_ENC / 2 - i] = dft_trigo32k[i];
670 : }
671 868212 : trigo_enc[STEREO_DFT_N_32k_ENC / 4] = dft_trigo32k[STEREO_DFT_N_32k_ENC / 4];
672 :
673 868212 : flag_noisy_speech_snr = hCPE->hCoreCoder[0]->flag_noisy_speech_snr; /* flag from the previous frame */
674 :
675 : /* initializations to avoid compilation warnings */
676 868212 : sum_nrg_L = 0.0f;
677 868212 : sum_nrg_R = 0.0f;
678 868212 : sum_nrg_L_lb = 0.0f;
679 868212 : mssnr = 0.0f;
680 868212 : sfm_L = 0.0f;
681 :
682 :
683 868212 : NFFT_mid = (int16_t) ( ( min( STEREO_DFT_N_16k_ENC, NFFT ) ) * 0.5f );
684 :
685 868212 : pDFT_L = DFT_L;
686 868212 : pDFT_R = DFT_R;
687 868212 : pNrgL = bin_nrgL;
688 868212 : pNrgR = bin_nrgR;
689 868212 : xcorr[0] = 0.f;
690 868212 : xcorr[1] = 0.f;
691 868212 : log_prod_L = logf( max( FLT_MIN, ABSVAL( pDFT_L[0] ) ) );
692 868212 : log_prod_R = logf( max( FLT_MIN, ABSVAL( pDFT_R[0] ) ) );
693 868212 : prod_L = 1.0f;
694 868212 : prod_R = 1.0f;
695 868212 : sum_nrg_L = pDFT_L[0] * pDFT_L[0] + FLT_MIN;
696 868212 : sum_nrg_R = pDFT_R[0] * pDFT_R[0] + FLT_MIN;
697 868212 : sum_abs_L = ABSVAL( pDFT_L[0] ) + EPSILON;
698 868212 : sum_abs_R = ABSVAL( pDFT_R[0] ) + EPSILON;
699 868212 : xcorr_lb[0] = pDFT_L[0] * pDFT_L[0] + EPSILON;
700 868212 : sum_nrg_L_lb = xcorr_lb[0];
701 868212 : prod_LL = 1.0f;
702 868212 : prod_RR = 1.0f;
703 868212 : grand_dot_prod_real = EPSILON;
704 868212 : grand_dot_prod_img = EPSILON;
705 :
706 277827840 : for ( i = 1, j = 0; i < NFFT_mid; i++, j++ )
707 : {
708 276959628 : xcorr[2 * i] = pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1];
709 276959628 : xcorr[2 * i + 1] = pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1];
710 :
711 276959628 : pNrgL[i] = pDFT_L[2 * i] * pDFT_L[2 * i] + pDFT_L[2 * i + 1] * pDFT_L[2 * i + 1];
712 276959628 : pNrgR[i] = pDFT_R[2 * i] * pDFT_R[2 * i] + pDFT_R[2 * i + 1] * pDFT_R[2 * i + 1];
713 :
714 276959628 : Spd_L[i] = pNrgL[i];
715 276959628 : Spd_R[i] = pNrgR[i];
716 :
717 276959628 : abs_L = sqrtf( pNrgL[i] );
718 276959628 : abs_R = sqrtf( pNrgR[i] );
719 :
720 276959628 : sum_nrg_L += pNrgL[i];
721 276959628 : sum_nrg_R += pNrgR[i];
722 :
723 276959628 : sum_abs_L += abs_L;
724 276959628 : sum_abs_R += abs_R;
725 276959628 : prod_L *= abs_L;
726 276959628 : prod_R *= abs_R;
727 :
728 276959628 : grand_dot_prod_real += xcorr[2 * i];
729 276959628 : grand_dot_prod_img += xcorr[2 * i + 1];
730 276959628 : xcorr_abs[i] = sqrtf( xcorr[2 * i] * xcorr[2 * i] + xcorr[2 * i + 1] * xcorr[2 * i + 1] );
731 :
732 276959628 : prod_LL = prod_L;
733 276959628 : prod_RR = prod_R;
734 :
735 276959628 : if ( j == SFM_PROD_GRP || i == NFFT_mid - 1 )
736 : {
737 69456960 : prod_L = max( FLT_MIN, prod_L );
738 69456960 : prod_R = max( FLT_MIN, prod_R );
739 69456960 : log_prod_L += logf( prod_L );
740 69456960 : log_prod_R += logf( prod_R );
741 69456960 : prod_L = 1;
742 69456960 : prod_R = 1;
743 69456960 : j = 0;
744 : }
745 : }
746 :
747 : /* collect UNCLR classifier parameters */
748 : {
749 : float IPD, d_IPD, g_IPD, g_ILD, angle_rot, g_side, g_pred, abs_L_R, grand_nrg_DMX;
750 :
751 868212 : if ( hCPE->last_element_mode != IVAS_CPE_DFT )
752 : {
753 157489 : hStereoClassif->prev_g_IPD = 0.5f;
754 157489 : hStereoClassif->prev_IPD = 0.0f;
755 : }
756 :
757 :
758 868212 : abs_L_R = sqrtf( grand_dot_prod_real * grand_dot_prod_real + grand_dot_prod_img * grand_dot_prod_img );
759 868212 : grand_nrg_DMX = sum_nrg_L + sum_nrg_R + 2 * abs_L_R;
760 :
761 868212 : g_ILD = sqrtf( sum_nrg_L / ( sum_nrg_R + 1.0f ) );
762 868212 : g_ILD = fabsf( ( g_ILD - 1 ) / ( g_ILD + 1 ) );
763 868212 : hStereoClassif->unclr_fv[E_gainILD] = g_ILD;
764 868212 : hStereoClassif->xtalk_fv[E_gainILD] = g_ILD;
765 :
766 868212 : IPD = atan2f( grand_dot_prod_img, grand_dot_prod_real );
767 868212 : hStereoClassif->unclr_fv[E_IPD] = IPD;
768 868212 : hStereoClassif->xtalk_fv[E_IPD] = IPD;
769 :
770 868212 : d_IPD = fabsf( IPD - hStereoClassif->prev_IPD );
771 868212 : hStereoClassif->unclr_fv[E_d_IPD] = IPD; /* VM: need to replace IPD by d_IPD and re-train the UNCLR classifier for DFT stereo */
772 868212 : hStereoClassif->xtalk_fv[E_d_IPD] = d_IPD;
773 868212 : hStereoClassif->prev_IPD = IPD;
774 :
775 868212 : g_IPD = ( sum_nrg_L + sum_nrg_R + 2 * grand_dot_prod_real ) / grand_nrg_DMX;
776 868212 : if ( g_IPD >= 1.0f )
777 : {
778 125436 : g_IPD = hStereoClassif->prev_g_IPD;
779 : }
780 : else
781 : {
782 742776 : hStereoClassif->prev_g_IPD = g_IPD;
783 : }
784 868212 : g_IPD = logf( 1.0f - g_IPD );
785 868212 : hStereoClassif->unclr_fv[E_gainIPD] = g_IPD;
786 868212 : hStereoClassif->xtalk_fv[E_gainIPD] = g_IPD;
787 :
788 868212 : if ( sum_nrg_L >= sum_nrg_R )
789 : {
790 461351 : sum_nrg_delta = max( sum_nrg_L - sum_nrg_R, 1.0f );
791 : }
792 : else
793 : {
794 406861 : sum_nrg_delta = min( sum_nrg_L - sum_nrg_R, -1.0f );
795 : }
796 868212 : angle_rot = fabsf( atanf( 2.0f * ( grand_dot_prod_real ) / sum_nrg_delta ) );
797 868212 : hStereoClassif->unclr_fv[E_angle_rot] = angle_rot;
798 868212 : hStereoClassif->xtalk_fv[E_angle_rot] = angle_rot;
799 :
800 868212 : g_side = fabsf( sum_nrg_L - sum_nrg_R ) / ( grand_nrg_DMX );
801 868212 : g_side = max( 0.01f, min( g_side, 0.99f ) );
802 868212 : hStereoClassif->unclr_fv[E_g_side] = g_side;
803 868212 : hStereoClassif->xtalk_fv[E_g_side] = g_side;
804 :
805 868212 : g_pred = logf( max( 0, ( ( 1 - g_side ) * sum_nrg_L + ( 1 + g_side ) * sum_nrg_R - 2 * abs_L_R ) ) + 1.0f );
806 868212 : g_pred = max( 14.0f, g_pred );
807 868212 : hStereoClassif->unclr_fv[E_g_pred] = g_pred;
808 868212 : hStereoClassif->xtalk_fv[E_g_pred] = g_pred;
809 : }
810 :
811 868212 : mvr2r( &Spd_L[1], &xcorr_lb[1], STEREO_DFT_XCORR_LB_MAX - 1 );
812 868212 : sum_nrg_L_lb = sum_nrg_L_lb + sum_f( &Spd_L[1], 11 );
813 :
814 868212 : vad_flag_itd = stereo_dft_enc_itd_vad( hItd->E_band_n, &( hItd->vad_frm_cnt ), Spd_L, Spd_R, &mssnr );
815 :
816 868212 : vad_flag_itd = vad_flag_itd && vad_flag_dtx[0];
817 :
818 868212 : if ( sum_nrg_L < EPSILON )
819 : {
820 14378 : sfm_L = 0;
821 : }
822 : else
823 : {
824 853834 : sfm_L = expf( log_prod_L / ( NFFT_mid ) ) / ( sum_abs_L / ( NFFT_mid ) );
825 : }
826 :
827 868212 : if ( sum_nrg_R < EPSILON )
828 : {
829 16790 : sfm_R = 0;
830 : }
831 : else
832 : {
833 851422 : sfm_R = expf( log_prod_R / ( NFFT_mid ) ) / ( sum_abs_R / ( NFFT_mid ) );
834 : }
835 :
836 868212 : if ( sfm_R > sfm_L )
837 : {
838 407383 : sfm_L = sfm_R;
839 : }
840 868212 : if ( hCPE->element_mode == IVAS_CPE_DFT )
841 : {
842 727822 : hStereoDft->sfm = sfm_L;
843 : }
844 :
845 221566132 : for ( ; i < NFFT / 2; i++ )
846 : {
847 220697920 : xcorr[2 * i] = pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1];
848 220697920 : xcorr[2 * i + 1] = pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1];
849 :
850 220697920 : pNrgL[i] = pDFT_L[2 * i] * pDFT_L[2 * i] + pDFT_L[2 * i + 1] * pDFT_L[2 * i + 1];
851 220697920 : pNrgR[i] = pDFT_R[2 * i] * pDFT_R[2 * i] + pDFT_R[2 * i + 1] * pDFT_R[2 * i + 1];
852 : /* Calculate L and R energy power spectrum */
853 220697920 : Spd_L[i] = pNrgL[i];
854 220697920 : Spd_R[i] = pNrgR[i];
855 : }
856 :
857 57998132 : for ( ; i < STEREO_DFT_N_32k_ENC / 2; i++ )
858 : {
859 57129920 : xcorr[2 * i] = 0.f;
860 57129920 : xcorr[2 * i + 1] = 0.f;
861 : }
862 :
863 868212 : hItd->xcorr_smooth[0] = 0.f;
864 868212 : hItd->xcorr_smooth[1] = 0.f;
865 868212 : xcorr[0] = sign( hItd->xcorr_smooth[0] );
866 868212 : xcorr[1] = sign( hItd->xcorr_smooth[1] );
867 :
868 :
869 868212 : if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) )
870 : {
871 : float alphaD, c, s, c1, s1, ctmp, vtmp;
872 : #ifdef NONBE_FIX_NONBE_BETWEEN_OPTIMIZATION_LEVELS_2
873 : volatile float alphaD_tmp;
874 : #endif
875 5854 : alphaD = -2.f * EVS_PI * ( (float) hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) / hStereoDft->NFFT;
876 : #ifdef NONBE_FIX_NONBE_BETWEEN_OPTIMIZATION_LEVELS_2
877 5854 : alphaD_tmp = alphaD;
878 5854 : c1 = cosf( alphaD_tmp );
879 5854 : s1 = sinf( alphaD_tmp );
880 : #else
881 : c1 = cosf( alphaD );
882 : s1 = sinf( alphaD );
883 : #endif
884 5854 : c = 1.f; /* cos(0) */
885 5854 : s = 0.f; /* sin(0) */
886 :
887 3513920 : for ( i = 1; i < NFFT / 2; i++ )
888 : {
889 3508066 : ctmp = c;
890 3508066 : c = c * c1 - s * s1;
891 3508066 : s = ctmp * s1 + s * c1;
892 3508066 : vtmp = hItd->xcorr_smooth[2 * i] * c - hItd->xcorr_smooth[2 * i + 1] * s;
893 3508066 : hItd->xcorr_smooth[2 * i + 1] = hItd->xcorr_smooth[2 * i] * s + hItd->xcorr_smooth[2 * i + 1] * c;
894 3508066 : hItd->xcorr_smooth[2 * i] = vtmp;
895 : }
896 : }
897 :
898 868212 : tmpf3 = 2.f;
899 868212 : if ( flag_noisy_speech_snr )
900 : {
901 236307 : alpha = -0.8f;
902 : }
903 : else
904 : {
905 631905 : alpha = -1.0f;
906 : }
907 :
908 868212 : if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT )
909 : {
910 291386 : if ( hCPE->hFrontVad[0] != NULL )
911 : {
912 : /* Determine if we are in hangover */
913 291386 : if ( vad_hover_flag[0] && vad_hover_flag[1] )
914 : {
915 : /* Determine if we are in the first DTX hangover frame (also triggers for VAD hangover frame) */
916 4897 : if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX )
917 : {
918 : /* Reset cross spectrum when there is hangover */
919 826 : set_f( hStereoDft->xspec_smooth, 0.0f, STEREO_DFT_N_32k_ENC );
920 826 : hStereoDft->resetFrames = 0;
921 826 : hStereoDft->currentNumUpdates = 0;
922 : /* Expected minimum number of updates including first SID */
923 826 : hStereoDft->expectedNumUpdates = 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho );
924 : }
925 4071 : else if ( hStereoDft->currentNumUpdates >= hStereoDft->expectedNumUpdates )
926 : {
927 1283 : hStereoDft->expectedNumUpdates += 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho );
928 : }
929 4897 : if ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates == 0 )
930 : {
931 0 : cng_xcorr_filt = max( CORR_FILT, sfm_L );
932 : }
933 : else
934 : {
935 4897 : cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L );
936 : }
937 4897 : hStereoDft->currentNumUpdates++;
938 2780480 : for ( i = 1; i < NFFT / 2; i++ )
939 : {
940 : /* Low pass filter cross L/R power spectrum */
941 2775583 : hStereoDft->xspec_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i];
942 2775583 : hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1];
943 :
944 : /* Low pass filter L/R power spectrum */
945 : /* Calculate coherence as cross spectral density divided by L*R power spectrum */
946 2775583 : hStereoDft->Spd_L_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_L_smooth[i] + cng_xcorr_filt * Spd_L[i];
947 2775583 : hStereoDft->Spd_R_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_R_smooth[i] + cng_xcorr_filt * Spd_R[i];
948 : }
949 : }
950 286489 : else if ( vad_flag_dtx[0] == 0 )
951 : {
952 27497 : hStereoDft->resetFrames = 0;
953 : }
954 : else
955 : {
956 258992 : if ( hStereoDft->resetFrames < CORR_RESET_FRAMES_MAX + 1 )
957 : {
958 34770 : hStereoDft->resetFrames++;
959 : }
960 258992 : if ( !vad_hover_flag[0] && !vad_hover_flag[1] )
961 : {
962 251086 : hStereoDft->expectedNumUpdates = hStereoDft->currentNumUpdates;
963 : }
964 : }
965 : }
966 291386 : if ( ( vad_flag_dtx[0] == 0 ) || ( hCPE->hFrontVad[0] == NULL && ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA ) ) || hCPE->hStereoCng->first_SID_after_TD )
967 : {
968 46348 : if ( vad_flag_dtx[0] == 0 )
969 : {
970 : /* expectedNumUpdates updated after call to dtx() in SID frames */
971 27497 : if ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates == 0 )
972 : {
973 20 : cng_xcorr_filt = max( CORR_FILT, sfm_L );
974 : }
975 : else
976 : {
977 27477 : cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L );
978 : }
979 27497 : hStereoDft->currentNumUpdates++;
980 27497 : hStereoDft->sfm = cng_xcorr_filt;
981 : }
982 : else /* use sfm for active frames */
983 : {
984 18851 : cng_xcorr_filt = sfm_L;
985 : }
986 :
987 : /* Copy state of xspec_smooth to xcorr_smooth in first CNG frame */
988 46348 : if ( hCPE->hStereoCng->cng_counter == 0 && vad_flag_dtx[0] == 0 )
989 : {
990 775 : mvr2r( hStereoDft->xspec_smooth, hItd->xcorr_smooth, NFFT );
991 : }
992 27424320 : for ( i = 1; i < NFFT / 2; i++ )
993 : {
994 : /* Low pass filter cross L/R power spectrum */
995 27377972 : hStereoDft->xspec_smooth[2 * i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i] + XSPEC_ALPHA * xcorr[2 * i];
996 27377972 : hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i + 1] + XSPEC_ALPHA * xcorr[2 * i + 1];
997 27377972 : hItd->xcorr_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i];
998 27377972 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1];
999 27377972 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1000 27377972 : tmpf1 += EPSILON;
1001 27377972 : tmpf2 = tmpf1;
1002 27377972 : tmpf1 = powf( tmpf1, alpha );
1003 27377972 : tmpf3 += tmpf2 * tmpf1;
1004 27377972 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1005 27377972 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1006 :
1007 : /* Low pass filter L/R power spectrum */
1008 : /* Calculate coherence as cross spectral density divided by L*R power spectrum */
1009 27377972 : hStereoDft->Spd_L_smooth[i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->Spd_L_smooth[i] + XSPEC_ALPHA * Spd_L[i];
1010 27377972 : hStereoDft->Spd_R_smooth[i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->Spd_R_smooth[i] + XSPEC_ALPHA * Spd_R[i];
1011 : }
1012 : }
1013 : else
1014 : {
1015 139422080 : for ( i = 1; i < NFFT / 2; i++ )
1016 : {
1017 139177042 : hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i];
1018 139177042 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1];
1019 139177042 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1020 139177042 : tmpf1 += EPSILON;
1021 139177042 : tmpf2 = tmpf1;
1022 139177042 : tmpf1 = powf( tmpf1, alpha );
1023 139177042 : tmpf3 += tmpf2 * tmpf1;
1024 139177042 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1025 139177042 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1026 : }
1027 : }
1028 : }
1029 : else
1030 : {
1031 331679360 : for ( i = 1; i < NFFT / 2; i++ )
1032 : {
1033 331102534 : hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i];
1034 331102534 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1];
1035 331102534 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1036 331102534 : tmpf1 += EPSILON;
1037 331102534 : tmpf2 = tmpf1;
1038 331102534 : tmpf1 = powf( tmpf1, alpha );
1039 331102534 : tmpf3 += tmpf2 * tmpf1;
1040 331102534 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1041 331102534 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1042 : }
1043 : }
1044 868212 : tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
1045 997919732 : for ( i = 0; i < NFFT; i++ )
1046 : {
1047 997051520 : xcorr[i] *= tmpf1;
1048 : }
1049 : /*calculate mean E ratio of main to background signal for cohSNR*/
1050 868212 : if ( hCPE->element_mode == IVAS_CPE_DFT )
1051 : {
1052 727822 : mEr = calc_mean_E_ratio( hItd, hStereoDft->nbands, hStereoDft->band_limits, sfm_L, pNrgL, pNrgR, &total_mEr );
1053 : }
1054 : else
1055 : {
1056 : int16_t nbands;
1057 : int16_t band_limits[STEREO_DFT_BAND_MAX + 1];
1058 :
1059 140390 : set_s( band_limits, 0, STEREO_DFT_BAND_MAX + 1 );
1060 140390 : set_band_limits( &nbands, band_limits, hCPE->hStereoMdct->hDft_ana->NFFT );
1061 140390 : mEr = calc_mean_E_ratio( hItd, nbands, band_limits, sfm_L, pNrgL, pNrgR, &total_mEr );
1062 : }
1063 :
1064 : /*calculate total cohSNR for frame in dB*/
1065 868212 : if ( mEr > 1.0f )
1066 : {
1067 837527 : cohSNR = 20 * log10f( mEr );
1068 : }
1069 : else
1070 : {
1071 30685 : cohSNR = 0;
1072 : }
1073 :
1074 : /* collect UNCLR classifier parameters */
1075 : {
1076 : float es_em, d_prodL_prodR;
1077 :
1078 868212 : if ( total_mEr < 1.0f )
1079 : {
1080 248132 : hStereoClassif->unclr_fv[E_cohSNR] = 0;
1081 : }
1082 : else
1083 : {
1084 620080 : hStereoClassif->unclr_fv[E_cohSNR] = 20 * log10f( total_mEr );
1085 : }
1086 :
1087 868212 : es_em = fabsf( sum_nrg_L - sum_nrg_R ) / ( sum_nrg_L + sum_nrg_R + 1e-5f );
1088 868212 : hStereoClassif->unclr_fv[E_es_em] = es_em;
1089 868212 : hStereoClassif->xtalk_fv[E_es_em] = es_em;
1090 :
1091 868212 : d_prodL_prodR = logf( max( prod_LL, prod_RR ) / ( min( prod_LL, prod_RR ) + 1e-5f ) + 1.0f );
1092 868212 : hStereoClassif->unclr_fv[E_d_prodL_prodR] = d_prodL_prodR;
1093 868212 : hStereoClassif->xtalk_fv[E_d_prodL_prodR] = d_prodL_prodR;
1094 :
1095 868212 : sum_xcorr = 0.0f;
1096 277827840 : for ( i = 1; i < NFFT_mid; i++ )
1097 : {
1098 276959628 : xcorr_abs[i] = logf( xcorr_abs[i] / ( sum_nrg_L + sum_nrg_R + 1e-5f ) + 1e-5f );
1099 276959628 : sum_xcorr += xcorr_abs[i];
1100 : }
1101 :
1102 868212 : hStereoClassif->unclr_fv[E_sum_xcorr] = sum_xcorr;
1103 868212 : hStereoClassif->xtalk_fv[E_sum_xcorr] = sum_xcorr;
1104 :
1105 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1106 : dbgwrite( &hStereoClassif->unclr_fv[E_cohSNR], sizeof( float ), 1, 1, "res/stereo_clf_cohSNR.x" );
1107 : dbgwrite( &hStereoClassif->unclr_fv[E_es_em], sizeof( float ), 1, 1, "res/stereo_clf_es_em.x" );
1108 : dbgwrite( &hStereoClassif->unclr_fv[E_d_prodL_prodR], sizeof( float ), 1, 1, "res/stereo_clf_d_prodL_prodR.x" );
1109 : dbgwrite( &hStereoClassif->unclr_fv[E_sum_xcorr], sizeof( float ), 1, 1, "res/stereo_clf_sum_xcorr.x" );
1110 : #endif
1111 : }
1112 :
1113 : /* reset estimates when silence is detected*/
1114 868212 : if ( ( sum_nrg_L && sum_nrg_R ) < EPSILON )
1115 : {
1116 0 : resetEstimates( hItd );
1117 : }
1118 :
1119 : /*smooth cohSNR with time */
1120 868212 : if ( ( hItd->cohSNR - cohSNR ) < 10.0f )
1121 : {
1122 346252 : tmpf1 = max( 0.05f, min( 0.25f, sfm_L * 0.5f ) );
1123 346252 : hItd->cohSNR = ( 1.f - tmpf1 ) * hItd->cohSNR + tmpf1 * cohSNR;
1124 : }
1125 : else
1126 : {
1127 521960 : hItd->cohSNR = hItd->cohSNR - 0.05f;
1128 : }
1129 :
1130 868212 : cohSNR = hItd->cohSNR;
1131 :
1132 868212 : rfft( xcorr, trigo_enc, STEREO_DFT_N_32k_ENC, +1 );
1133 :
1134 868212 : itd_td = hItd->td_itd_32k[k_offset]; /* This ITD always operates at 32kHz*/
1135 868212 : shift = ( STEREO_DFT_N_32k_ENC / 2 - itd_td ) % STEREO_DFT_N_32k_ENC;
1136 868212 : split = STEREO_DFT_N_32k_ENC - shift;
1137 :
1138 868212 : mvr2r( &xcorr[0], &xcorr_itd[shift], split );
1139 868212 : mvr2r( &xcorr[split], &xcorr_itd[0], shift );
1140 :
1141 868212 : mvr2r( &xcorr_itd[STEREO_DFT_N_32k_ENC / 2 - XTALK_PHAT_LEN], gcc_phat, 2 * XTALK_PHAT_LEN + 1 );
1142 :
1143 : #ifdef DEBUG_MODE_DFT
1144 : dbgwrite( gcc_phat, sizeof( float ), 2 * XTALK_PHAT_LEN + 1, 1, "res/gcc_phat" );
1145 : #endif
1146 :
1147 868212 : thres = peak_detect( xcorr_itd, &tmpf1, &index, &zero_itd, cohSNR, hCPE->hCoreCoder[0]->vad_flag, &second_max, &second_max_lag, hItd->prev_itd, flag_noisy_speech_snr, hItd->detected_itd_flag, &hItd->prev_max, &hItd->prev_index, &hItd->prev_avg_max, &total_max );
1148 :
1149 868212 : hStereoClassif->ave_ener_L = sum_nrg_L / ( NFFT_mid * NFFT_mid );
1150 868212 : hStereoClassif->ave_ener_R = sum_nrg_R / ( NFFT_mid * NFFT_mid );
1151 :
1152 868212 : if ( hCPE->hCoreCoder[0]->input_Fs == 16000 )
1153 : {
1154 178531 : total_max *= 2.0f;
1155 : }
1156 868212 : hStereoClassif->unclr_fv[E_xcorr_itd_value] = total_max;
1157 868212 : hStereoClassif->xtalk_fv[E_xcorr_itd_value] = total_max;
1158 :
1159 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1160 : {
1161 : int16_t stmp = index - STEREO_DFT_ITD_MAX_ANA;
1162 : dbgwrite( &stmp, sizeof( int16_t ), 1, 1, "res/raw_itd.x" );
1163 : }
1164 : dbgwrite( &hStereoClassif->unclr_fv[E_xcorr_itd_value], sizeof( float ), 1, 1, "res/stereo_clf_raw_itd.x" );
1165 : #endif
1166 :
1167 : #ifdef DEBUG_MODE_DFT
1168 : {
1169 : int16_t tmp;
1170 :
1171 : tmp = (int16_t) ( tmpf1 * 100.f / thres );
1172 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_thres0.pcm" );
1173 : }
1174 : #endif
1175 : /*for tonal music items increase thresholing by a factor up to 2.*/
1176 868212 : if ( hCPE->hCoreCoder[0]->sp_aud_decision0 && ( index - STEREO_DFT_ITD_MAX_ANA ) != hItd->prev_itd && !flag_noisy_speech_snr && hCPE->hCoreCoder[0]->vad_flag && tmpf1 < 0.3 )
1177 : {
1178 35526 : thres *= 1.0f + 1.f * min( 1.f, max( 0.f, ( -1.0f * sfm_L + 0.5f ) / ( 0.5f - 0.2f ) ) );
1179 : }
1180 :
1181 : #ifdef DEBUG_MODE_DFT
1182 : {
1183 : int16_t tmp;
1184 :
1185 : tmp = (int16_t) ( sfm_L * 100.f );
1186 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_sfm.pcm" );
1187 : tmp = (int16_t) ( tmpf1 * 100.f / thres );
1188 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_thres.pcm" );
1189 : }
1190 : #endif
1191 :
1192 868212 : itd_cal_flag = 0;
1193 : /*smooth threshold value depending on sfm for music items*/
1194 868212 : if ( hCPE->hCoreCoder[0]->ini_frame == 0 || hCPE->last_element_mode != IVAS_CPE_DFT || !hCPE->hCoreCoder[0]->sp_aud_decision0 || flag_noisy_speech_snr || cohSNR < 20 )
1195 : {
1196 653025 : hItd->itd_thres = thres;
1197 : }
1198 : else
1199 : {
1200 215187 : hItd->itd_thres = ( 1.0f - sfm_L ) * hItd->itd_thres + sfm_L * thres;
1201 : }
1202 :
1203 868212 : if ( flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->vad_flag == 1 && hItd->detected_itd_flag == 0 && ( hItd->currFlatness < 1.5f || hCPE->hCoreCoder[0]->sp_aud_decision0 == 1 ) )
1204 : {
1205 73640 : hItd->itd_thres *= 1.5f;
1206 : }
1207 868212 : if ( hCPE->hCoreCoder[0]->vad_flag == 0 || hItd->detected_itd_flag == 0 )
1208 : {
1209 200264 : hItd->itd_tracking = 0;
1210 : }
1211 667948 : else if ( abs( hItd->prev_itd ) > 2 )
1212 : {
1213 149441 : hItd->itd_tracking = 1;
1214 : }
1215 :
1216 868212 : if ( hItd->itd_tracking == 1 && ( second_max > hItd->itd_thres || tmpf1 - second_max < min( tmpf1 * 0.5f, 0.2f ) ) )
1217 : {
1218 255422 : index = second_max_lag;
1219 : }
1220 :
1221 868212 : if ( hItd->itd_tracking == 1 && abs( hItd->prev_itd - ( index - STEREO_DFT_ITD_MAX_ANA ) ) <= max( 2, (int16_t) round_f( abs( hItd->prev_itd ) / 16.f ) ) )
1222 : {
1223 255422 : hItd->itd_thres *= 0.75f;
1224 : }
1225 :
1226 868212 : if ( tmpf1 > hItd->itd_thres && !zero_itd )
1227 : {
1228 : /* LP filter GCC PHAT peak to follow peak envelope */
1229 739529 : if ( tmpf1 > hItd->lp_phat_peak )
1230 : {
1231 236362 : alpha = LP_GCC_PHAT_UP;
1232 : }
1233 : else
1234 : {
1235 503167 : alpha = LP_GCC_PHAT_DOWN;
1236 : }
1237 739529 : hItd->lp_phat_peak = alpha * tmpf1 + ( 1 - alpha ) * hItd->lp_phat_peak;
1238 739529 : hItd->itd_cnt++;
1239 739529 : if ( hItd->itd_cnt > ITD_CNT_MAX || hItd->itd_hangover > 0 )
1240 : {
1241 : /* If max count is reached, or if an ITD candidate is found during hangover,
1242 : set itd_cnt = ITD_CNT_MAX to ensure hangover is applied */
1243 707582 : hItd->itd_cnt = ITD_CNT_MAX;
1244 : }
1245 739529 : hItd->itd_hangover = 0;
1246 :
1247 739529 : itd = index - STEREO_DFT_ITD_MAX_ANA;
1248 739529 : hItd->itd_nonzero_cnt = 0; /* (1+0+9) <= hItd->itd_nonzero_cnt <= (1+6+3) */
1249 739529 : itd_cal_flag = 1; /* Indicates P>T case */
1250 739529 : hItd->valid_itd_cnt = hItd->itd_cnt; /* Store last non-zero value (when P>T) before reset */
1251 739529 : hItd->detected_itd_flag = 1;
1252 : }
1253 : else
1254 : {
1255 : /* Set prev_itd hangover period */
1256 128683 : if ( hItd->itd_cnt == ITD_CNT_MAX )
1257 : {
1258 6118 : hItd->itd_hangover = max( 0, min( ITD_HO_MAX, (int16_t) ( hItd->lp_phat_peak * ITD_HO_GCC_PHAT_INCL + ITD_HO_GCC_PHAT_OFFS ) ) );
1259 : }
1260 :
1261 128683 : if ( hItd->itd_hangover > 0 )
1262 : {
1263 18033 : itd = hItd->prev_itd;
1264 18033 : if ( hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER )
1265 : {
1266 13526 : hItd->itd_nonzero_cnt++;
1267 : }
1268 18033 : hItd->itd_hangover--;
1269 18033 : hItd->detected_itd_flag = 1;
1270 : }
1271 : else
1272 : {
1273 110650 : itd = 0;
1274 110650 : hItd->detected_itd_flag = 0;
1275 : }
1276 :
1277 : /* Reset */
1278 128683 : hItd->itd_cnt = 0;
1279 128683 : hItd->lp_phat_peak = 0.0f;
1280 : }
1281 :
1282 : {
1283 : /* stereo Xtalk classifier */
1284 868212 : xtalk_classifier_dft( hCPE, itd, gcc_phat );
1285 : }
1286 :
1287 : /*avoid enabling ITD fine control for music*/
1288 868212 : if ( !hCPE->hCoreCoder[0]->sp_aud_decision0 || flag_noisy_speech_snr || cohSNR < 20 )
1289 : {
1290 : /* ITD fine control base on vad and correlation parameters */
1291 615905 : cor_lb_avrg = 0.0f;
1292 615905 : par_L_avrg = 0.0f;
1293 2463620 : for ( i = 0; i < XCORR_LB_NUM_BANDS; i++ )
1294 : {
1295 1847715 : num_cor = xcorr_lb[i * XCORR_LB_BAND_WIDTH] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1296 1847715 : den_cor_cur = xcorr_lb[i * XCORR_LB_BAND_WIDTH] * xcorr_lb[i * XCORR_LB_BAND_WIDTH] + 1.0f;
1297 1847715 : den_cor_prev = hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH] + 1.0f;
1298 1847715 : xcorr_max = xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1299 1847715 : sum_nrg_L_tmp = xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1300 14781720 : for ( j = 1; j < XCORR_LB_BAND_WIDTH; j++ )
1301 : {
1302 12934005 : num_cor += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1303 12934005 : den_cor_cur += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1304 12934005 : den_cor_prev += hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1305 12934005 : if ( xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] > xcorr_max )
1306 : {
1307 3420389 : xcorr_max = xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1308 : }
1309 12934005 : sum_nrg_L_tmp += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1310 : }
1311 1847715 : cor_lb[i] = num_cor / ( sqrtf( den_cor_cur ) * sqrtf( den_cor_prev ) );
1312 1847715 : cor_lb_avrg += cor_lb[i];
1313 1847715 : par_L[i] = xcorr_max / ( sum_nrg_L_tmp + FLT_MIN );
1314 1847715 : par_L_avrg += par_L[i];
1315 : }
1316 615905 : cor_lb_avrg /= XCORR_LB_NUM_BANDS;
1317 615905 : par_L_avrg /= XCORR_LB_NUM_BANDS;
1318 :
1319 : /*Breakdown of fine-control conditions */
1320 615905 : fc_condition_1 = abs( hItd->prev_itd ) > 0.2f * abs( itd );
1321 615905 : fc_condition_2 = cor_lb_avrg > 0.85f;
1322 615905 : fc_condition_3 = ( cor_lb_avrg > 0.7f && ( cor_lb[0] > 0.9f || cor_lb[1] > 0.9f || cor_lb[2] > 0.9f ) && hItd->prev_sum_nrg_L_lb > 0.5f * sum_nrg_L_lb && hItd->prev_sum_nrg_L_lb < 2.0f * sum_nrg_L_lb );
1323 615905 : fc_condition_4 = par_L_avrg > 0.6f;
1324 615905 : fc_condition_5 = hItd->prev_itd != 0;
1325 615905 : fc_condition_6_a = itd * hItd->prev_itd < 0; /* ITD sign change */
1326 615905 : fc_condition_6_b = itd * hItd->prev_itd == 0; /* ITD jump to zero */
1327 615905 : fc_condition_6_c = abs( itd - hItd->prev_itd ) > 0.5f * ( ( abs( itd ) > abs( hItd->prev_itd ) ) ? abs( itd ) : abs( hItd->prev_itd ) ); /* Magnitude of the ITD jump */
1328 :
1329 : /* Combining conditions 1,2,3,4 */
1330 615905 : fc_condition_1234 = fc_condition_1 && ( fc_condition_2 || fc_condition_3 || fc_condition_4 );
1331 :
1332 615905 : if ( ( fc_condition_1234 && ( ( fc_condition_5 && fc_condition_6_b ) || fc_condition_6_c ) ) || ( fc_condition_1234 && fc_condition_6_a ) )
1333 : {
1334 6261 : itd = hItd->prev_itd;
1335 :
1336 6261 : if ( hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER )
1337 : {
1338 5532 : hItd->itd_nonzero_cnt++;
1339 : }
1340 6261 : hItd->detected_itd_flag = 1;
1341 : }
1342 :
1343 : /* stop the fine control when inactive or very high mssnr is detected*/
1344 615905 : if ( mssnr < 6e-7f * HIGHT_SNR_VOICE_TH || mssnr > 200 * HIGHT_SNR_VOICE_TH )
1345 : {
1346 250986 : hItd->itd_nonzero_cnt = MAX_ITD_VAD_HANGOVER;
1347 : }
1348 :
1349 615905 : if ( vad_flag_itd )
1350 : {
1351 : /* Fine-control for hangover if set HR period = 0 or if HR period expires */
1352 : /* However fine-control shouldn't be used when HR is disabled because itd_cnt < 2 - hence the extra last condition */
1353 564687 : if ( hItd->itd_hangover == 0 && hItd->prev_itd != 0 && itd == 0 && itd_cal_flag != 1 && hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER && hItd->valid_itd_cnt == ITD_CNT_MAX && hItd->pre_vad == 1 )
1354 : {
1355 929 : itd = hItd->prev_itd;
1356 929 : hItd->itd_nonzero_cnt++;
1357 929 : hItd->detected_itd_flag = 1;
1358 : }
1359 564687 : hItd->pre_vad = 1;
1360 : }
1361 : else
1362 : {
1363 51218 : hItd->pre_vad = 0;
1364 : }
1365 :
1366 615905 : if ( itd == 0 && itd_cal_flag != 1 )
1367 : {
1368 79968 : hItd->itd_nonzero_cnt = 0;
1369 : }
1370 :
1371 615905 : hItd->prev_sum_nrg_L_lb = sum_nrg_L_lb;
1372 615905 : mvr2r( xcorr_lb, hItd->prev_xcorr_lb, STEREO_DFT_XCORR_LB_MAX );
1373 : }
1374 : /*save previous flag*/
1375 868212 : prev_itd_max = hItd->hybrid_itd_max;
1376 : /* enable hybrid ITD handling for very large ITDs*/
1377 868212 : hItd->hybrid_itd_max = ( abs( itd ) > STEREO_DFT_ITD_MAX && abs( itd ) < STEREO_DFT_ITD_MAX_ANA && !hCPE->hCoreCoder[0]->sp_aud_decision0 && hCPE->element_brate < IVAS_32k );
1378 : /* Update memory */
1379 868212 : hItd->prev_itd = itd;
1380 :
1381 868212 : itd = check_bounds_s( itd, -STEREO_DFT_ITD_MAX, STEREO_DFT_ITD_MAX );
1382 :
1383 : /*Inverse the time diff*/
1384 868212 : hItd->itd[k_offset] = -1.f * itd;
1385 :
1386 : /* collect UNCLR classifier parameters */
1387 868212 : hStereoClassif->unclr_fv[E_ITD] = hItd->itd[k_offset];
1388 :
1389 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1390 : dbgwrite( &hItd->itd[k_offset], sizeof( float ), 1, 1, "res/stereo_clf_ITD.x" );
1391 : #endif
1392 :
1393 : /* limit ITD range for MDCT stereo even more */
1394 868212 : if ( hCPE->element_mode == IVAS_CPE_MDCT && fabsf( hItd->itd[k_offset] ) > ITD_MAX_MDCT )
1395 : {
1396 6272 : itd = 0;
1397 : }
1398 :
1399 868212 : stereo_dft_quantize_itd( -1 * itd, hItd->itd + k_offset, input_frame * FRAMES_PER_SEC, hItd->itd_index + k_offset );
1400 :
1401 868212 : hItd->deltaItd[k_offset] = hItd->itd[k_offset] - hItd->td_itd[k_offset];
1402 :
1403 868212 : if ( hItd->hybrid_itd_max )
1404 : {
1405 : /*check if there is an ITD flip*/
1406 2076 : itd_max_flip = ( hItd->itd[k_offset] * hItd->itd[k_offset - 1] < 0 );
1407 :
1408 2076 : if ( hItd->deltaItd[k_offset - 1] != 0 && itd_max_flip == 0 )
1409 : {
1410 2022 : int16_t tmp_itd = (int16_t) floorf( ( ( hItd->prev_itd ) * ( (float) input_frame / 640 ) ) + 0.5f );
1411 2022 : hItd->deltaItd[k_offset] = -1.0f * tmp_itd - hItd->td_itd[k_offset];
1412 : }
1413 : }
1414 : /*signal change for next frame*/
1415 868212 : if ( prev_itd_max == 1 && hItd->hybrid_itd_max == 0 )
1416 : {
1417 58 : hItd->hybrid_itd_max = -1;
1418 : }
1419 :
1420 : #ifdef DEBUG_MODE_DFT
1421 : {
1422 : int16_t tmp;
1423 : static FILE *log_fid = NULL;
1424 :
1425 : tmp = (int16_t) hItd->itd[k_offset];
1426 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_combined_fc.pcm" );
1427 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_combined_fc.txt" );
1428 :
1429 : if ( log_fid == NULL )
1430 : log_fid = fopen( "./res/itd_log_td_itd.txt", "w" );
1431 : fprintf( log_fid, "frame: %d\t itd: %f\t td_itd: %d\t delta_itd: %f\n", frame, hItd->itd[1], (int16_t) hItd->td_itd[1], hItd->deltaItd[1] );
1432 : }
1433 : #endif
1434 :
1435 868212 : return;
1436 : }
|