Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #include <math.h>
37 : #include "cnst.h"
38 : #include "rom_enc.h"
39 : #include "rom_com.h"
40 : #include "prot.h"
41 : #include "ivas_prot.h"
42 : #include "ivas_cnst.h"
43 : #include "ivas_rom_com.h"
44 : #include "ivas_rom_enc.h"
45 : #ifdef DEBUGGING
46 : #include "debug.h"
47 : #endif
48 : #include "wmc_auto.h"
49 :
50 :
51 : /*-------------------------------------------------------------------------
52 : * Local constants
53 : *------------------------------------------------------------------------*/
54 :
55 : #define LP_GCC_PHAT_UP 0.9f /* LP filter coefficient, going up */
56 : #define LP_GCC_PHAT_DOWN 0.1f /* LP filter coefficient, going down */
57 : #define ITD_CNT_MAX 2 /* Minimum number of consecutive ITD estimates for ITD hangover */
58 : #define ITD_HO_GCC_PHAT_MAX 0.6f /* LP GCC PHAT value which gives zero hangover */
59 : #define ITD_HO_GCC_PHAT_MIN 0.3f /* LP GCC PHAT value which gives ITD_HO_MAX ITD hangover frames */
60 : #define ITD_HO_MAX 6 /* Maximum number of ITD hangover frames */
61 : #define ITD_HO_GCC_PHAT_INCL ( -( ITD_HO_MAX - 1 ) / ( ITD_HO_GCC_PHAT_MAX - ITD_HO_GCC_PHAT_MIN ) )
62 : #define ITD_HO_GCC_PHAT_OFFS ( -ITD_HO_GCC_PHAT_INCL * ITD_HO_GCC_PHAT_MAX + 1 )
63 : #define SFM_PROD_GRP 4 /*maximum grouping of products for calculating SFM in ITD estimation*/
64 : #define B_DENOM 0.083333333333333f
65 : #define L_SAMPLES 20
66 : #define SUBDIV ( 2 * STEREO_DFT_ITD_MAX_ANA / L_SAMPLES )
67 : #define DENOM 0.05f
68 :
69 : #define XSPEC_ALPHA ( 1.f / 32 )
70 : #define CORR_FILT 0.8f
71 : #define CORR_RESET_FRAMES_MAX 20
72 :
73 : #define ITD_VAD_NOISE_INIT_FRAMES 30
74 : #define ITD_VAD_THRSHOLD 0.001f
75 : #define ITD_VAD_MS_SNR_UPDATE_THRESH 15.0f
76 : #define HIGHT_SNR_VOICE_TH 10000.0f
77 : #define MAX_ITD_VAD_HANGOVER 10
78 :
79 : #define XCORR_LB_NUM_BANDS 3
80 : #define XCORR_LB_BAND_WIDTH 8
81 :
82 : #define ITD_MAX_MDCT 80
83 :
84 :
85 : /*-------------------------------------------------------------------------
86 : * set_band_limits()
87 : *
88 : * configure bands as used in DFT Stereo
89 : *------------------------------------------------------------------------*/
90 :
91 19095 : static void set_band_limits(
92 : int16_t *nbands,
93 : int16_t band_limits[STEREO_DFT_BAND_MAX + 1],
94 : int16_t NFFT )
95 : {
96 19095 : band_limits[0] = 1;
97 19095 : *nbands = 0;
98 244550 : while ( band_limits[( *nbands )++] < NFFT / 2 )
99 : {
100 225455 : band_limits[*nbands] = (int16_t) round_f( dft_band_limits_erb4[*nbands] * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) );
101 : }
102 19095 : ( *nbands )--;
103 19095 : band_limits[*nbands] = NFFT / 2; /*Nyquist Freq*/
104 :
105 19095 : return;
106 : }
107 :
108 :
109 : /*-------------------------------------------------------------------------
110 : * stereo_dft_hybrid_ITD_flag()
111 : *
112 : * Get the hybrid ITD flag
113 : *------------------------------------------------------------------------*/
114 :
115 138183 : void stereo_dft_hybrid_ITD_flag(
116 : STEREO_DFT_CONFIG_DATA_HANDLE hConfig, /* o : DFT stereo configuration */
117 : const int32_t input_Fs, /* i : CPE element sampling rate */
118 : const int16_t hybrid_itd_max /* i : flag for hybrid ITD for very large ITDs */
119 : )
120 : {
121 138183 : if ( hConfig != NULL )
122 : {
123 138183 : if ( hConfig->res_cod_mode || ( hConfig->ada_wb_res_cod_mode && input_Fs == 16000 ) || ( hybrid_itd_max == 1 ) )
124 : {
125 29555 : hConfig->hybrid_itd_flag = 1;
126 : }
127 : else
128 : {
129 108628 : hConfig->hybrid_itd_flag = 0;
130 : }
131 : }
132 : else
133 : {
134 0 : assert( 0 && "Stereo Dft Config Data Handle is uninitialized" );
135 : }
136 :
137 138183 : return;
138 : }
139 :
140 :
141 : /*-------------------------------------------------------------------------
142 : * stereo_dft_quantize_itd()
143 : *
144 : * Quantize the ITD
145 : *------------------------------------------------------------------------*/
146 :
147 153720 : static void stereo_dft_quantize_itd(
148 : const int16_t in,
149 : float *out,
150 : const int32_t input_Fs,
151 : int16_t *ind )
152 : {
153 : int16_t itd;
154 :
155 153720 : itd = (int16_t) ( sign( in ) * 0.5f + in );
156 :
157 : /*Limit ITD*/
158 153720 : if ( ( ABSVAL( itd ) > STEREO_DFT_ITD_MAX ) || ( ABSVAL( itd ) < STEREO_DFT_ITD_MIN ) )
159 : {
160 98506 : itd = 0;
161 : }
162 : else
163 : {
164 55214 : *ind = ( ( itd < 0 ) << ( STEREO_DFT_ITD_NBITS - 1 ) ) + ABSVAL( itd ) - STEREO_DFT_ITD_MIN;
165 : }
166 :
167 : /*Convert back @ fs*/
168 153720 : *out = (float) ( itd * input_Fs ) / ( (float) ( STEREO_DFT_ITD_FS ) );
169 :
170 153720 : return;
171 : }
172 :
173 :
174 : /*-------------------------------------------------------------------------
175 : * itd_vad_ms_snr_calc()
176 : *
177 : *
178 : *-------------------------------------------------------------------------*/
179 :
180 153720 : static float itd_vad_ms_snr_calc(
181 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
182 : float *Spd,
183 : float *E_band )
184 : {
185 : float snr[STEREO_DFT_ITD_VAD_BAND_NUM];
186 : float msnr[STEREO_DFT_ITD_VAD_BAND_NUM];
187 : float ms_snr;
188 : int16_t i, j;
189 :
190 3228120 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
191 : {
192 3074400 : E_band[i] = 0;
193 51496200 : for ( j = itd_vad_band_tbl[i]; j < itd_vad_band_tbl[i + 1]; j++ )
194 : {
195 48421800 : E_band[i] += Spd[j];
196 : }
197 3074400 : E_band[i] = E_band[i] / ( itd_vad_band_tbl[i + 1] - itd_vad_band_tbl[i] );
198 : }
199 :
200 153720 : ms_snr = 0;
201 3228120 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
202 : {
203 3074400 : snr[i] = E_band[i] / E_band_n[i];
204 3074400 : if ( snr[i] < 1 )
205 : {
206 572222 : snr[i] = 1;
207 : }
208 3074400 : msnr[i] = snr[i] - 1.0f;
209 3074400 : if ( msnr[i] < 6 )
210 : {
211 926569 : msnr[i] = powf( msnr[i], 2 ) / 6.0f;
212 : }
213 3074400 : ms_snr += msnr[i];
214 : }
215 :
216 153720 : return ( ms_snr );
217 : }
218 :
219 :
220 : /*-------------------------------------------------------------------------
221 : * itd_vad_background_update()
222 : *
223 : *
224 : *-------------------------------------------------------------------------*/
225 :
226 153720 : static void itd_vad_background_update(
227 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
228 : int16_t *vad_frm_cnt,
229 : const float ms_snr,
230 : float *E_band )
231 : {
232 : int16_t i;
233 : float energy;
234 :
235 153720 : energy = 0.0f;
236 3228120 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
237 : {
238 3074400 : energy += E_band[i] / (float) STEREO_DFT_ITD_VAD_BAND_NUM;
239 : }
240 :
241 153720 : if ( *vad_frm_cnt < ITD_VAD_NOISE_INIT_FRAMES )
242 : {
243 37380 : ( *vad_frm_cnt )++;
244 : }
245 :
246 153720 : if ( ( *vad_frm_cnt < ITD_VAD_NOISE_INIT_FRAMES ) && energy < 40000000 )
247 : {
248 77847 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
249 : {
250 : /* using the init values as frame (-1) values */
251 74140 : E_band_n[i] = ( E_band_n[i] * ( (float) ( *vad_frm_cnt ) ) + E_band[i] ) / ( (float) ( *vad_frm_cnt + 1 ) );
252 : }
253 : }
254 : else
255 : {
256 150013 : if ( ms_snr < ITD_VAD_MS_SNR_UPDATE_THRESH )
257 : {
258 286650 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
259 : {
260 273000 : E_band_n[i] = 0.96f * E_band_n[i] + 0.04f * E_band[i];
261 273000 : if ( E_band_n[i] < 1.0f )
262 : {
263 0 : E_band_n[i] = 1.0f;
264 : }
265 : }
266 : }
267 : }
268 :
269 153720 : return;
270 : }
271 :
272 : /*-------------------------------------------------------------------------
273 : * stereo_dft_enc_itd_vad()
274 : *
275 : *
276 : *-------------------------------------------------------------------------*/
277 :
278 153720 : static int16_t stereo_dft_enc_itd_vad(
279 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
280 : int16_t *vad_frm_cnt,
281 : float *Spd_L,
282 : float *Spd_R,
283 : float *mssnr )
284 : {
285 : int16_t i;
286 : float E_band[STEREO_DFT_ITD_VAD_BAND_NUM];
287 : int16_t vad_flag_itd;
288 :
289 : float Spd[STEREO_DFT_N_16k_ENC / 2 + 1];
290 :
291 : /* Spd is later only used starting at itd_vad_band_tbl[0], so only compute values starting from there */
292 : /* -> this avoids uninitialized values in Spd_L and Spd_R at index 0 to be used */
293 48575520 : for ( i = itd_vad_band_tbl[0]; i < STEREO_DFT_N_16k_ENC / 2; i++ )
294 : {
295 48421800 : Spd[i] = 0.5f * ( Spd_L[i] + Spd_R[i] );
296 : }
297 :
298 153720 : *mssnr = itd_vad_ms_snr_calc( E_band_n, Spd, E_band );
299 :
300 153720 : itd_vad_background_update( E_band_n, vad_frm_cnt, *mssnr, E_band );
301 :
302 153720 : if ( *mssnr < ITD_VAD_THRSHOLD )
303 : {
304 7190 : vad_flag_itd = 0;
305 : }
306 : else
307 : {
308 146530 : vad_flag_itd = 1;
309 : }
310 :
311 153720 : return ( vad_flag_itd );
312 : }
313 :
314 :
315 : /*-------------------------------------------------------------------------
316 : * calc_mean_E_ratio()
317 : *
318 : * calculates mean energy of main-to-background signal ratio
319 : *-------------------------------------------------------------------------*/
320 :
321 153720 : static float calc_mean_E_ratio(
322 : ITD_DATA_HANDLE hItd,
323 : int16_t nbands,
324 : int16_t band_limits[],
325 : const float sfm,
326 : const float nrg_L[STEREO_DFT_N_32k_ENC / 2],
327 : const float nrg_R[STEREO_DFT_N_32k_ENC / 2],
328 : float *total_mEr )
329 : {
330 : float sum_xcorr[2];
331 : float Sxcorr;
332 : float Er[STEREO_DFT_BAND_MAX], fi[STEREO_DFT_BAND_MAX], a, acorr;
333 : float sum_nrg_L, sum_nrg_R;
334 : int16_t b, i;
335 : float sum_Er;
336 : float total_fi;
337 : float grand_nrg_L, grand_nrg_R, grand_sum_xcorr_real, grand_sum_xcorr_img;
338 :
339 153720 : grand_nrg_L = 0.0f;
340 153720 : grand_nrg_R = 0.0f;
341 153720 : grand_sum_xcorr_real = 0.0f;
342 153720 : grand_sum_xcorr_img = 0.0f;
343 :
344 : /*take bands up to 32kHz bandwidth as ITD is always calculated at 32kHz sampling rate*/
345 153720 : nbands -= ( band_limits[nbands] > STEREO_DFT_N_32k_ENC / 2 );
346 :
347 153720 : sum_Er = 0;
348 1515630 : for ( b = 0; b < nbands; b++ )
349 : {
350 : /*reset buffers*/
351 1361910 : sum_xcorr[0] = 0.f;
352 1361910 : sum_xcorr[1] = 0.f;
353 1361910 : sum_nrg_L = 0.f;
354 1361910 : sum_nrg_R = 0.f;
355 :
356 92503550 : for ( i = band_limits[b]; i < min( band_limits[b + 1], STEREO_DFT_N_32k_ENC / 2 ); i++ )
357 : {
358 91141640 : sum_xcorr[0] += hItd->xcorr_smooth[2 * i];
359 91141640 : sum_xcorr[1] += hItd->xcorr_smooth[2 * i + 1];
360 91141640 : sum_nrg_L += nrg_L[i];
361 91141640 : sum_nrg_R += nrg_R[i];
362 : }
363 :
364 1361910 : Sxcorr = sum_xcorr[0] * sum_xcorr[0] + sum_xcorr[1] * sum_xcorr[1];
365 1361910 : hItd->acorr_L[b] = ( 1.f - sfm ) * hItd->acorr_L[b] + sfm * sum_nrg_L;
366 1361910 : hItd->acorr_R[b] = ( 1.f - sfm ) * hItd->acorr_R[b] + sfm * sum_nrg_R;
367 :
368 1361910 : a = hItd->acorr_L[b] - hItd->acorr_R[b];
369 1361910 : acorr = hItd->acorr_L[b] + hItd->acorr_R[b];
370 1361910 : fi[b] = sqrtf( a * a + 4 * Sxcorr );
371 1361910 : Er[b] = ( acorr + fi[b] ) / ( acorr - fi[b] + EPSILON );
372 1361910 : sum_Er += Er[b];
373 :
374 1361910 : grand_nrg_L += sum_nrg_L;
375 1361910 : grand_nrg_R += sum_nrg_R;
376 1361910 : grand_sum_xcorr_real += sum_xcorr[0];
377 1361910 : grand_sum_xcorr_img += sum_xcorr[1];
378 : }
379 :
380 153720 : Sxcorr = grand_sum_xcorr_real * grand_sum_xcorr_real + grand_sum_xcorr_img * grand_sum_xcorr_img;
381 153720 : a = grand_nrg_L - grand_nrg_R;
382 153720 : acorr = grand_nrg_L + grand_nrg_R;
383 153720 : total_fi = sqrtf( a * a + 4 * Sxcorr );
384 153720 : *total_mEr = ( acorr + total_fi ) / ( acorr - total_fi + EPSILON );
385 :
386 153720 : return ( sum_Er * B_DENOM );
387 : }
388 :
389 :
390 : /*-------------------------------------------------------------------------
391 : * resetEstimates()
392 : *
393 : * resets long term estimates to initial values.
394 : *-------------------------------------------------------------------------*/
395 :
396 0 : static void resetEstimates(
397 : ITD_DATA_HANDLE hItd )
398 : {
399 0 : set_zero( hItd->xcorr_smooth, STEREO_DFT_N_32k_ENC );
400 0 : set_zero( hItd->acorr_L, STEREO_DFT_BAND_MAX );
401 0 : set_zero( hItd->acorr_R, STEREO_DFT_BAND_MAX );
402 0 : hItd->cohSNR = 15;
403 :
404 0 : return;
405 : }
406 :
407 : /*-------------------------------------------------------------------------
408 : * td_sm_filter()
409 : *
410 : * time-domain smoothing filter for smoothing the cross-correlation vector
411 : *-------------------------------------------------------------------------*/
412 :
413 64144 : static void td_sm_filter(
414 : float *x,
415 : float *y,
416 : const int16_t L )
417 : {
418 : int16_t i;
419 : float tmp_x[STEREO_DFT_N_32k_ENC + 1];
420 : float a0, a1;
421 :
422 64144 : set_f( tmp_x, 0, STEREO_DFT_N_32k_ENC + 1 );
423 64144 : mvr2r( x, tmp_x, L );
424 :
425 64144 : a0 = 0.5f;
426 64144 : a1 = 0.25f;
427 :
428 64144 : y[0] = a0 * tmp_x[0] + a1 * x[1];
429 25721744 : for ( i = 1; i < L; i++ )
430 : {
431 25657600 : y[i] = a1 * tmp_x[i + 1] + a0 * tmp_x[i] + a1 * tmp_x[i - 1];
432 : }
433 :
434 64144 : return;
435 : }
436 :
437 : /*-------------------------------------------------------------------------
438 : * peak_detect()
439 : *
440 : * function for calculating the threshold for peak detection of the
441 : * cross-correlation vector
442 : *-------------------------------------------------------------------------*/
443 :
444 153720 : static float peak_detect(
445 : float *xcorr_itd,
446 : float *max_max,
447 : int16_t *index,
448 : int16_t *zero_itd_flag,
449 : const float snr,
450 : const int16_t vad,
451 : float *second_max,
452 : int16_t *second_max_lag,
453 : const float prev_itd,
454 : const int16_t flag_noisy_speech_snr,
455 : const int16_t detected_itd_flag,
456 : float *prev_max,
457 : int16_t *prev_index,
458 : float *prev_avg_max,
459 : float *total_max )
460 : {
461 : int16_t i;
462 : float tmp_max[SUBDIV], tmp_xcorr_itd[2 * STEREO_DFT_ITD_MAX_ANA + 1], tmp_xcorr_itd_sm[2 * STEREO_DFT_ITD_MAX_ANA + 1];
463 : int16_t index_subd[SUBDIV], ind;
464 : float avg_max, max_low, max_high, sum_max, tmp_max_max;
465 : float thres_diff;
466 : float wfac;
467 : int16_t d, i1, i2;
468 :
469 153720 : wfac = 2.5f;
470 153720 : if ( snr > 50.f )
471 : {
472 42618 : wfac = 3.f;
473 : }
474 :
475 : /*detect maxima outside the [-5, 5] ms boundaries */
476 153720 : maximum( xcorr_itd, STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA, &max_low );
477 153720 : maximum( xcorr_itd + STEREO_DFT_N_32k_ENC / 2 + STEREO_DFT_ITD_MAX_ANA + 1, STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA - 1, &max_high );
478 :
479 : /* create temp buffer that includes xcorr within [-5, 5] ms */
480 153720 : mvr2r( xcorr_itd + STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA, tmp_xcorr_itd, 2 * STEREO_DFT_ITD_MAX_ANA + 1 );
481 :
482 153720 : *index = maximumAbs( tmp_xcorr_itd, 2 * STEREO_DFT_ITD_MAX_ANA + 1, max_max );
483 153720 : *total_max = *max_max;
484 :
485 153720 : d = max( 2, (int16_t) round_f( fabsf( prev_itd ) / 16.f ) );
486 153720 : i1 = max( 0, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA - d );
487 153720 : i2 = min( 2 * STEREO_DFT_ITD_MAX_ANA, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA + d );
488 153720 : *second_max_lag = maximumAbs( tmp_xcorr_itd + i1, i2 - i1 + 1, second_max );
489 153720 : *second_max_lag += i1;
490 :
491 :
492 : /*if maximum is out of boundaries signal zero itd OR maximum value is negative*/
493 153720 : *zero_itd_flag = ( *max_max < max_low || *max_max < max_high );
494 :
495 153720 : if ( *zero_itd_flag )
496 : {
497 4678 : return 0;
498 : }
499 : else
500 : {
501 149042 : sum_max = 0;
502 :
503 149042 : if ( snr >= 25.f )
504 : {
505 : /*apply smoothing filter*/
506 64144 : td_sm_filter( tmp_xcorr_itd, tmp_xcorr_itd_sm, 2 * STEREO_DFT_ITD_MAX_ANA + 1 );
507 :
508 : /*subdivide the area of interest and look for local maxima*/
509 1282880 : for ( i = 0; i < SUBDIV - 1; i++ )
510 : {
511 1218736 : index_subd[i] = maximumAbs( &tmp_xcorr_itd_sm[i * L_SAMPLES], L_SAMPLES, &tmp_max[i] );
512 1218736 : sum_max += tmp_max[i];
513 : }
514 :
515 64144 : index_subd[i] = maximumAbs( &tmp_xcorr_itd_sm[i * L_SAMPLES], L_SAMPLES + 1, &tmp_max[i] );
516 64144 : sum_max += tmp_max[i];
517 :
518 64144 : ind = maximumAbs( tmp_max, SUBDIV, &tmp_max_max );
519 :
520 : /*final position of maxmimum*/
521 64144 : *index = index_subd[ind] + ind * L_SAMPLES;
522 64144 : *max_max = tmp_max_max;
523 : /*calculate average of all maxima to determine the threshold*/
524 64144 : avg_max = sum_max * DENOM;
525 :
526 64144 : d = max( 2, (int16_t) round_f( fabsf( prev_itd ) / 16.f ) );
527 64144 : i1 = max( 0, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA - d );
528 64144 : i2 = min( 2 * STEREO_DFT_ITD_MAX_ANA, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA + d );
529 64144 : *second_max_lag = maximumAbs( tmp_xcorr_itd_sm + i1, i2 - i1 + 1, second_max );
530 64144 : *second_max_lag += i1;
531 : }
532 : else
533 : {
534 : /*determine weight for threshold depending on snr value*/
535 84898 : if ( snr <= 20.f && snr > 15.f )
536 : {
537 29853 : wfac = snr * 0.1f + 0.5f;
538 : }
539 : else
540 : {
541 55045 : wfac = 2.5f;
542 : }
543 :
544 1697960 : for ( i = 0; i < SUBDIV - 1; i++ )
545 : {
546 1613062 : index_subd[i] = maximumAbs( &tmp_xcorr_itd[i * L_SAMPLES], L_SAMPLES, &tmp_max[i] );
547 1613062 : sum_max += tmp_max[i];
548 : }
549 :
550 84898 : index_subd[i] = maximumAbs( &tmp_xcorr_itd[i * L_SAMPLES], L_SAMPLES + 1, &tmp_max[i] );
551 84898 : sum_max += tmp_max[i];
552 :
553 : /*calculate average of all maxima to determine the threshold*/
554 84898 : avg_max = sum_max * DENOM;
555 : }
556 :
557 : /*relax threshold if threshold is very close to max: when 7<snr<=15 and (thres-max)<0.05 or 15<snr<30 and (thres-max)<0.01*/
558 :
559 149042 : thres_diff = wfac * avg_max - *max_max;
560 :
561 149042 : if ( vad && thres_diff > 0.f && ( ( thres_diff < 0.05f && ( snr <= 15 && snr > 7.f ) ) || ( thres_diff < 0.01f && ( snr > 15.f && snr < 30.f ) ) ) )
562 : {
563 3149 : wfac = 2.0f;
564 : }
565 :
566 149042 : if ( flag_noisy_speech_snr == 1 )
567 : {
568 47872 : if ( vad == 0 )
569 : {
570 15605 : wfac = 2.5f;
571 : }
572 32267 : else if ( detected_itd_flag == 0 && *max_max > 1.5f * avg_max && *prev_max > 1.5f * *prev_avg_max && abs( *index - *prev_index ) <= 2 )
573 : {
574 138 : wfac = 1.5f;
575 : }
576 : else
577 : {
578 32129 : wfac = 2.0f;
579 : }
580 : }
581 149042 : *prev_max = *max_max;
582 149042 : *prev_avg_max = avg_max;
583 149042 : *prev_index = *index;
584 :
585 149042 : return ( wfac * avg_max );
586 : }
587 : }
588 :
589 :
590 : /*-------------------------------------------------------------------------
591 : * Compute stereo parameter: ITD
592 : * ITD: Interchannel Time Difference
593 : *------------------------------------------------------------------------*/
594 :
595 153720 : void stereo_dft_enc_compute_itd(
596 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
597 : float *DFT_L,
598 : float *DFT_R,
599 : const int16_t k_offset,
600 : const int16_t input_frame,
601 : const int16_t vad_flag_dtx[],
602 : const int16_t vad_hover_flag[],
603 : float *bin_nrgL,
604 : float *bin_nrgR )
605 : {
606 : int16_t i, j;
607 : STEREO_DFT_ENC_DATA_HANDLE hStereoDft;
608 : ITD_DATA_HANDLE hItd;
609 : float *pDFT_L, *pDFT_R;
610 : float abs_L, abs_R, prod_L, prod_R, sum_abs_L, sum_abs_R;
611 : float log_prod_L, log_prod_R;
612 : float sum_nrg_L, sum_nrg_R;
613 : float sfm_L, sfm_R;
614 : float xcorr[STEREO_DFT_N_32k_ENC];
615 : int16_t itd, itd_td;
616 : float xcorr_itd[STEREO_DFT_N_32k_ENC];
617 : float tmpf1, tmpf2, tmpf3;
618 : float thres, alpha;
619 : int16_t index;
620 : float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp;
621 : float xcorr_lb[STEREO_DFT_XCORR_LB_MAX];
622 : float num_cor, den_cor_cur, den_cor_prev, cor_lb_avrg;
623 : float cor_lb[XCORR_LB_NUM_BANDS];
624 : float Spd_L[STEREO_DFT_N_32k_ENC / 2 + 1];
625 : float Spd_R[STEREO_DFT_N_32k_ENC / 2 + 1];
626 : int16_t vad_flag_itd;
627 : float mssnr;
628 : int16_t itd_cal_flag;
629 : int16_t NFFT, NFFT_mid;
630 : int16_t zero_itd;
631 : float mEr;
632 : float cohSNR;
633 : float *pNrgL, *pNrgR;
634 : float second_max;
635 : int16_t second_max_lag;
636 : int16_t fc_condition_1, fc_condition_2, fc_condition_3, fc_condition_4, fc_condition_5, fc_condition_6_a, fc_condition_6_b, fc_condition_6_c;
637 : int16_t fc_condition_1234;
638 : int16_t split, shift, flag_noisy_speech_snr;
639 : float gcc_phat[2 * XTALK_PHAT_LEN + 1];
640 : float grand_dot_prod_real, grand_dot_prod_img;
641 : float xcorr_abs[STEREO_DFT_N_32k_ENC], sum_xcorr, prod_LL, prod_RR, total_mEr, total_max;
642 : STEREO_CLASSIF_HANDLE hStereoClassif;
643 : const float *dft_trigo32k;
644 : float trigo_enc[STEREO_DFT_N_32k_ENC / 2 + 1];
645 : float cng_xcorr_filt;
646 : float sum_nrg_delta;
647 : int16_t prev_itd_max;
648 : int16_t itd_max_flip;
649 :
650 153720 : if ( hCPE->element_mode == IVAS_CPE_DFT )
651 : {
652 134625 : hStereoDft = hCPE->hStereoDft;
653 134625 : hItd = hCPE->hStereoDft->hItd;
654 134625 : NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT );
655 134625 : dft_trigo32k = hStereoDft->dft_trigo_32k;
656 : }
657 : else
658 : {
659 19095 : hStereoDft = NULL;
660 19095 : hItd = hCPE->hStereoMdct->hItd;
661 19095 : NFFT = min( STEREO_DFT_N_32k_ENC, hCPE->hStereoMdct->hDft_ana->NFFT );
662 19095 : dft_trigo32k = hCPE->hStereoMdct->hDft_ana->dft_trigo_32k;
663 : }
664 153720 : hStereoClassif = hCPE->hStereoClassif;
665 :
666 49344120 : for ( i = 0; i < STEREO_DFT_N_32k_ENC / 4; i++ )
667 : {
668 49190400 : trigo_enc[i] = dft_trigo32k[i];
669 49190400 : trigo_enc[STEREO_DFT_N_32k_ENC / 2 - i] = dft_trigo32k[i];
670 : }
671 153720 : trigo_enc[STEREO_DFT_N_32k_ENC / 4] = dft_trigo32k[STEREO_DFT_N_32k_ENC / 4];
672 :
673 153720 : flag_noisy_speech_snr = hCPE->hCoreCoder[0]->flag_noisy_speech_snr; /* flag from the previous frame */
674 :
675 : /* initializations to avoid compilation warnings */
676 153720 : sum_nrg_L = 0.0f;
677 153720 : sum_nrg_R = 0.0f;
678 153720 : sum_nrg_L_lb = 0.0f;
679 153720 : mssnr = 0.0f;
680 153720 : sfm_L = 0.0f;
681 :
682 :
683 153720 : NFFT_mid = (int16_t) ( ( min( STEREO_DFT_N_16k_ENC, NFFT ) ) * 0.5f );
684 :
685 153720 : pDFT_L = DFT_L;
686 153720 : pDFT_R = DFT_R;
687 153720 : pNrgL = bin_nrgL;
688 153720 : pNrgR = bin_nrgR;
689 153720 : xcorr[0] = 0.f;
690 153720 : xcorr[1] = 0.f;
691 153720 : log_prod_L = logf( max( FLT_MIN, ABSVAL( pDFT_L[0] ) ) );
692 153720 : log_prod_R = logf( max( FLT_MIN, ABSVAL( pDFT_R[0] ) ) );
693 153720 : prod_L = 1.0f;
694 153720 : prod_R = 1.0f;
695 153720 : sum_nrg_L = pDFT_L[0] * pDFT_L[0] + FLT_MIN;
696 153720 : sum_nrg_R = pDFT_R[0] * pDFT_R[0] + FLT_MIN;
697 153720 : sum_abs_L = ABSVAL( pDFT_L[0] ) + EPSILON;
698 153720 : sum_abs_R = ABSVAL( pDFT_R[0] ) + EPSILON;
699 153720 : xcorr_lb[0] = pDFT_L[0] * pDFT_L[0] + EPSILON;
700 153720 : sum_nrg_L_lb = xcorr_lb[0];
701 153720 : prod_LL = 1.0f;
702 153720 : prod_RR = 1.0f;
703 153720 : grand_dot_prod_real = EPSILON;
704 153720 : grand_dot_prod_img = EPSILON;
705 :
706 49190400 : for ( i = 1, j = 0; i < NFFT_mid; i++, j++ )
707 : {
708 49036680 : xcorr[2 * i] = pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1];
709 49036680 : xcorr[2 * i + 1] = pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1];
710 :
711 49036680 : pNrgL[i] = pDFT_L[2 * i] * pDFT_L[2 * i] + pDFT_L[2 * i + 1] * pDFT_L[2 * i + 1];
712 49036680 : pNrgR[i] = pDFT_R[2 * i] * pDFT_R[2 * i] + pDFT_R[2 * i + 1] * pDFT_R[2 * i + 1];
713 :
714 49036680 : Spd_L[i] = pNrgL[i];
715 49036680 : Spd_R[i] = pNrgR[i];
716 :
717 49036680 : abs_L = sqrtf( pNrgL[i] );
718 49036680 : abs_R = sqrtf( pNrgR[i] );
719 :
720 49036680 : sum_nrg_L += pNrgL[i];
721 49036680 : sum_nrg_R += pNrgR[i];
722 :
723 49036680 : sum_abs_L += abs_L;
724 49036680 : sum_abs_R += abs_R;
725 49036680 : prod_L *= abs_L;
726 49036680 : prod_R *= abs_R;
727 :
728 49036680 : grand_dot_prod_real += xcorr[2 * i];
729 49036680 : grand_dot_prod_img += xcorr[2 * i + 1];
730 49036680 : xcorr_abs[i] = sqrtf( xcorr[2 * i] * xcorr[2 * i] + xcorr[2 * i + 1] * xcorr[2 * i + 1] );
731 :
732 49036680 : prod_LL = prod_L;
733 49036680 : prod_RR = prod_R;
734 :
735 49036680 : if ( j == SFM_PROD_GRP || i == NFFT_mid - 1 )
736 : {
737 12297600 : prod_L = max( FLT_MIN, prod_L );
738 12297600 : prod_R = max( FLT_MIN, prod_R );
739 12297600 : log_prod_L += logf( prod_L );
740 12297600 : log_prod_R += logf( prod_R );
741 12297600 : prod_L = 1;
742 12297600 : prod_R = 1;
743 12297600 : j = 0;
744 : }
745 : }
746 :
747 : /* collect UNCLR classifier parameters */
748 : {
749 : float IPD, d_IPD, g_IPD, g_ILD, angle_rot, g_side, g_pred, abs_L_R, grand_nrg_DMX;
750 :
751 153720 : if ( hCPE->last_element_mode != IVAS_CPE_DFT )
752 : {
753 21954 : hStereoClassif->prev_g_IPD = 0.5f;
754 21954 : hStereoClassif->prev_IPD = 0.0f;
755 : }
756 :
757 :
758 153720 : abs_L_R = sqrtf( grand_dot_prod_real * grand_dot_prod_real + grand_dot_prod_img * grand_dot_prod_img );
759 153720 : grand_nrg_DMX = sum_nrg_L + sum_nrg_R + 2 * abs_L_R;
760 :
761 153720 : g_ILD = sqrtf( sum_nrg_L / ( sum_nrg_R + 1.0f ) );
762 153720 : g_ILD = fabsf( ( g_ILD - 1 ) / ( g_ILD + 1 ) );
763 153720 : hStereoClassif->unclr_fv[E_gainILD] = g_ILD;
764 153720 : hStereoClassif->xtalk_fv[E_gainILD] = g_ILD;
765 :
766 153720 : IPD = atan2f( grand_dot_prod_img, grand_dot_prod_real );
767 153720 : hStereoClassif->unclr_fv[E_IPD] = IPD;
768 153720 : hStereoClassif->xtalk_fv[E_IPD] = IPD;
769 :
770 153720 : d_IPD = fabsf( IPD - hStereoClassif->prev_IPD );
771 153720 : hStereoClassif->unclr_fv[E_d_IPD] = IPD; /* VM: need to replace IPD by d_IPD and re-train the UNCLR classifier for DFT stereo */
772 153720 : hStereoClassif->xtalk_fv[E_d_IPD] = d_IPD;
773 153720 : hStereoClassif->prev_IPD = IPD;
774 :
775 153720 : g_IPD = ( sum_nrg_L + sum_nrg_R + 2 * grand_dot_prod_real ) / grand_nrg_DMX;
776 153720 : if ( g_IPD >= 1.0f )
777 : {
778 15387 : g_IPD = hStereoClassif->prev_g_IPD;
779 : }
780 : else
781 : {
782 138333 : hStereoClassif->prev_g_IPD = g_IPD;
783 : }
784 153720 : g_IPD = logf( 1.0f - g_IPD );
785 153720 : hStereoClassif->unclr_fv[E_gainIPD] = g_IPD;
786 153720 : hStereoClassif->xtalk_fv[E_gainIPD] = g_IPD;
787 :
788 153720 : if ( sum_nrg_L >= sum_nrg_R )
789 : {
790 74963 : sum_nrg_delta = max( sum_nrg_L - sum_nrg_R, 1.0f );
791 : }
792 : else
793 : {
794 78757 : sum_nrg_delta = min( sum_nrg_L - sum_nrg_R, -1.0f );
795 : }
796 153720 : angle_rot = fabsf( atanf( 2.0f * ( grand_dot_prod_real ) / sum_nrg_delta ) );
797 153720 : hStereoClassif->unclr_fv[E_angle_rot] = angle_rot;
798 153720 : hStereoClassif->xtalk_fv[E_angle_rot] = angle_rot;
799 :
800 153720 : g_side = fabsf( sum_nrg_L - sum_nrg_R ) / ( grand_nrg_DMX );
801 153720 : g_side = max( 0.01f, min( g_side, 0.99f ) );
802 153720 : hStereoClassif->unclr_fv[E_g_side] = g_side;
803 153720 : hStereoClassif->xtalk_fv[E_g_side] = g_side;
804 :
805 153720 : g_pred = logf( max( 0, ( ( 1 - g_side ) * sum_nrg_L + ( 1 + g_side ) * sum_nrg_R - 2 * abs_L_R ) ) + 1.0f );
806 153720 : g_pred = max( 14.0f, g_pred );
807 153720 : hStereoClassif->unclr_fv[E_g_pred] = g_pred;
808 153720 : hStereoClassif->xtalk_fv[E_g_pred] = g_pred;
809 : }
810 :
811 153720 : mvr2r( &Spd_L[1], &xcorr_lb[1], STEREO_DFT_XCORR_LB_MAX - 1 );
812 153720 : sum_nrg_L_lb = sum_nrg_L_lb + sum_f( &Spd_L[1], 11 );
813 :
814 153720 : vad_flag_itd = stereo_dft_enc_itd_vad( hItd->E_band_n, &( hItd->vad_frm_cnt ), Spd_L, Spd_R, &mssnr );
815 :
816 153720 : vad_flag_itd = vad_flag_itd && vad_flag_dtx[0];
817 :
818 153720 : if ( sum_nrg_L < EPSILON )
819 : {
820 208 : sfm_L = 0;
821 : }
822 : else
823 : {
824 153512 : sfm_L = expf( log_prod_L / ( NFFT_mid ) ) / ( sum_abs_L / ( NFFT_mid ) );
825 : }
826 :
827 153720 : if ( sum_nrg_R < EPSILON )
828 : {
829 243 : sfm_R = 0;
830 : }
831 : else
832 : {
833 153477 : sfm_R = expf( log_prod_R / ( NFFT_mid ) ) / ( sum_abs_R / ( NFFT_mid ) );
834 : }
835 :
836 153720 : if ( sfm_R > sfm_L )
837 : {
838 61519 : sfm_L = sfm_R;
839 : }
840 153720 : if ( hCPE->element_mode == IVAS_CPE_DFT )
841 : {
842 134625 : hStereoDft->sfm = sfm_L;
843 : }
844 :
845 42783160 : for ( ; i < NFFT / 2; i++ )
846 : {
847 42629440 : xcorr[2 * i] = pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1];
848 42629440 : xcorr[2 * i + 1] = pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1];
849 :
850 42629440 : pNrgL[i] = pDFT_L[2 * i] * pDFT_L[2 * i] + pDFT_L[2 * i + 1] * pDFT_L[2 * i + 1];
851 42629440 : pNrgR[i] = pDFT_R[2 * i] * pDFT_R[2 * i] + pDFT_R[2 * i + 1] * pDFT_R[2 * i + 1];
852 : /* Calculate L and R energy power spectrum */
853 42629440 : Spd_L[i] = pNrgL[i];
854 42629440 : Spd_R[i] = pNrgR[i];
855 : }
856 :
857 6714680 : for ( ; i < STEREO_DFT_N_32k_ENC / 2; i++ )
858 : {
859 6560960 : xcorr[2 * i] = 0.f;
860 6560960 : xcorr[2 * i + 1] = 0.f;
861 : }
862 :
863 153720 : hItd->xcorr_smooth[0] = 0.f;
864 153720 : hItd->xcorr_smooth[1] = 0.f;
865 153720 : xcorr[0] = sign( hItd->xcorr_smooth[0] );
866 153720 : xcorr[1] = sign( hItd->xcorr_smooth[1] );
867 :
868 :
869 153720 : if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) )
870 : {
871 : float alphaD, c, s, c1, s1, ctmp, vtmp;
872 : volatile float alphaD_tmp;
873 :
874 1062 : alphaD = -2.f * EVS_PI * ( (float) hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) / hStereoDft->NFFT;
875 1062 : alphaD_tmp = alphaD;
876 1062 : c1 = cosf( alphaD_tmp );
877 1062 : s1 = sinf( alphaD_tmp );
878 1062 : c = 1.f; /* cos(0) */
879 1062 : s = 0.f; /* sin(0) */
880 :
881 628480 : for ( i = 1; i < NFFT / 2; i++ )
882 : {
883 627418 : ctmp = c;
884 627418 : c = c * c1 - s * s1;
885 627418 : s = ctmp * s1 + s * c1;
886 627418 : vtmp = hItd->xcorr_smooth[2 * i] * c - hItd->xcorr_smooth[2 * i + 1] * s;
887 627418 : hItd->xcorr_smooth[2 * i + 1] = hItd->xcorr_smooth[2 * i] * s + hItd->xcorr_smooth[2 * i + 1] * c;
888 627418 : hItd->xcorr_smooth[2 * i] = vtmp;
889 : }
890 : }
891 :
892 153720 : tmpf3 = 2.f;
893 153720 : if ( flag_noisy_speech_snr )
894 : {
895 49179 : alpha = -0.8f;
896 : }
897 : else
898 : {
899 104541 : alpha = -1.0f;
900 : }
901 :
902 153720 : if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT )
903 : {
904 34018 : if ( hCPE->hFrontVad[0] != NULL )
905 : {
906 : /* Determine if we are in hangover */
907 34018 : if ( vad_hover_flag[0] && vad_hover_flag[1] )
908 : {
909 : /* Determine if we are in the first DTX hangover frame (also triggers for VAD hangover frame) */
910 991 : if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX )
911 : {
912 : /* Reset cross spectrum when there is hangover */
913 178 : set_f( hStereoDft->xspec_smooth, 0.0f, STEREO_DFT_N_32k_ENC );
914 178 : hStereoDft->resetFrames = 0;
915 178 : hStereoDft->currentNumUpdates = 0;
916 : /* Expected minimum number of updates including first SID */
917 178 : hStereoDft->expectedNumUpdates = 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho );
918 : }
919 813 : else if ( hStereoDft->currentNumUpdates >= hStereoDft->expectedNumUpdates )
920 : {
921 159 : hStereoDft->expectedNumUpdates += 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho );
922 : }
923 991 : if ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates == 0 )
924 : {
925 0 : cng_xcorr_filt = max( CORR_FILT, sfm_L );
926 : }
927 : else
928 : {
929 991 : cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L );
930 : }
931 991 : hStereoDft->currentNumUpdates++;
932 578880 : for ( i = 1; i < NFFT / 2; i++ )
933 : {
934 : /* Low pass filter cross L/R power spectrum */
935 577889 : hStereoDft->xspec_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i];
936 577889 : hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1];
937 :
938 : /* Low pass filter L/R power spectrum */
939 : /* Calculate coherence as cross spectral density divided by L*R power spectrum */
940 577889 : hStereoDft->Spd_L_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_L_smooth[i] + cng_xcorr_filt * Spd_L[i];
941 577889 : hStereoDft->Spd_R_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_R_smooth[i] + cng_xcorr_filt * Spd_R[i];
942 : }
943 : }
944 33027 : else if ( vad_flag_dtx[0] == 0 )
945 : {
946 11240 : hStereoDft->resetFrames = 0;
947 : }
948 : else
949 : {
950 21787 : if ( hStereoDft->resetFrames < CORR_RESET_FRAMES_MAX + 1 )
951 : {
952 8264 : hStereoDft->resetFrames++;
953 : }
954 21787 : if ( !vad_hover_flag[0] && !vad_hover_flag[1] )
955 : {
956 20038 : hStereoDft->expectedNumUpdates = hStereoDft->currentNumUpdates;
957 : }
958 : }
959 : }
960 34018 : if ( ( vad_flag_dtx[0] == 0 ) || ( hCPE->hFrontVad[0] == NULL && ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA ) ) || hCPE->hStereoCng->first_SID_after_TD )
961 : {
962 16852 : if ( vad_flag_dtx[0] == 0 )
963 : {
964 : /* expectedNumUpdates updated after call to dtx() in SID frames */
965 11240 : if ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates == 0 )
966 : {
967 2 : cng_xcorr_filt = max( CORR_FILT, sfm_L );
968 : }
969 : else
970 : {
971 11238 : cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L );
972 : }
973 11240 : hStereoDft->currentNumUpdates++;
974 11240 : hStereoDft->sfm = cng_xcorr_filt;
975 : }
976 : else /* use sfm for active frames */
977 : {
978 5612 : cng_xcorr_filt = sfm_L;
979 : }
980 :
981 : /* Copy state of xspec_smooth to xcorr_smooth in first CNG frame */
982 16852 : if ( hCPE->hStereoCng->cng_counter == 0 && vad_flag_dtx[0] == 0 )
983 : {
984 223 : mvr2r( hStereoDft->xspec_smooth, hItd->xcorr_smooth, NFFT );
985 : }
986 9728320 : for ( i = 1; i < NFFT / 2; i++ )
987 : {
988 : /* Low pass filter cross L/R power spectrum */
989 9711468 : hStereoDft->xspec_smooth[2 * i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i] + XSPEC_ALPHA * xcorr[2 * i];
990 9711468 : hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i + 1] + XSPEC_ALPHA * xcorr[2 * i + 1];
991 9711468 : hItd->xcorr_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i];
992 9711468 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1];
993 9711468 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
994 9711468 : tmpf1 += EPSILON;
995 9711468 : tmpf2 = tmpf1;
996 9711468 : tmpf1 = powf( tmpf1, alpha );
997 9711468 : tmpf3 += tmpf2 * tmpf1;
998 9711468 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
999 9711468 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1000 :
1001 : /* Low pass filter L/R power spectrum */
1002 : /* Calculate coherence as cross spectral density divided by L*R power spectrum */
1003 9711468 : hStereoDft->Spd_L_smooth[i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->Spd_L_smooth[i] + XSPEC_ALPHA * Spd_L[i];
1004 9711468 : hStereoDft->Spd_R_smooth[i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->Spd_R_smooth[i] + XSPEC_ALPHA * Spd_R[i];
1005 : }
1006 : }
1007 : else
1008 : {
1009 9895360 : for ( i = 1; i < NFFT / 2; i++ )
1010 : {
1011 9878194 : hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i];
1012 9878194 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1];
1013 9878194 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1014 9878194 : tmpf1 += EPSILON;
1015 9878194 : tmpf2 = tmpf1;
1016 9878194 : tmpf1 = powf( tmpf1, alpha );
1017 9878194 : tmpf3 += tmpf2 * tmpf1;
1018 9878194 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1019 9878194 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1020 : }
1021 : }
1022 : }
1023 : else
1024 : {
1025 72196160 : for ( i = 1; i < NFFT / 2; i++ )
1026 : {
1027 72076458 : hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i];
1028 72076458 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1];
1029 72076458 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1030 72076458 : tmpf1 += EPSILON;
1031 72076458 : tmpf2 = tmpf1;
1032 72076458 : tmpf1 = powf( tmpf1, alpha );
1033 72076458 : tmpf3 += tmpf2 * tmpf1;
1034 72076458 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1035 72076458 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1036 : }
1037 : }
1038 153720 : tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
1039 183793400 : for ( i = 0; i < NFFT; i++ )
1040 : {
1041 183639680 : xcorr[i] *= tmpf1;
1042 : }
1043 : /*calculate mean E ratio of main to background signal for cohSNR*/
1044 153720 : if ( hCPE->element_mode == IVAS_CPE_DFT )
1045 : {
1046 134625 : mEr = calc_mean_E_ratio( hItd, hStereoDft->nbands, hStereoDft->band_limits, sfm_L, pNrgL, pNrgR, &total_mEr );
1047 : }
1048 : else
1049 : {
1050 : int16_t nbands;
1051 : int16_t band_limits[STEREO_DFT_BAND_MAX + 1];
1052 :
1053 19095 : set_s( band_limits, 0, STEREO_DFT_BAND_MAX + 1 );
1054 19095 : set_band_limits( &nbands, band_limits, hCPE->hStereoMdct->hDft_ana->NFFT );
1055 19095 : mEr = calc_mean_E_ratio( hItd, nbands, band_limits, sfm_L, pNrgL, pNrgR, &total_mEr );
1056 : }
1057 :
1058 : /*calculate total cohSNR for frame in dB*/
1059 153720 : if ( mEr > 1.0f )
1060 : {
1061 146539 : cohSNR = 20 * log10f( mEr );
1062 : }
1063 : else
1064 : {
1065 7181 : cohSNR = 0;
1066 : }
1067 :
1068 : /* collect UNCLR classifier parameters */
1069 : {
1070 : float es_em, d_prodL_prodR;
1071 :
1072 153720 : if ( total_mEr < 1.0f )
1073 : {
1074 33435 : hStereoClassif->unclr_fv[E_cohSNR] = 0;
1075 : }
1076 : else
1077 : {
1078 120285 : hStereoClassif->unclr_fv[E_cohSNR] = 20 * log10f( total_mEr );
1079 : }
1080 :
1081 153720 : es_em = fabsf( sum_nrg_L - sum_nrg_R ) / ( sum_nrg_L + sum_nrg_R + 1e-5f );
1082 153720 : hStereoClassif->unclr_fv[E_es_em] = es_em;
1083 153720 : hStereoClassif->xtalk_fv[E_es_em] = es_em;
1084 :
1085 153720 : d_prodL_prodR = logf( max( prod_LL, prod_RR ) / ( min( prod_LL, prod_RR ) + 1e-5f ) + 1.0f );
1086 153720 : hStereoClassif->unclr_fv[E_d_prodL_prodR] = d_prodL_prodR;
1087 153720 : hStereoClassif->xtalk_fv[E_d_prodL_prodR] = d_prodL_prodR;
1088 :
1089 153720 : sum_xcorr = 0.0f;
1090 49190400 : for ( i = 1; i < NFFT_mid; i++ )
1091 : {
1092 49036680 : xcorr_abs[i] = logf( xcorr_abs[i] / ( sum_nrg_L + sum_nrg_R + 1e-5f ) + 1e-5f );
1093 49036680 : sum_xcorr += xcorr_abs[i];
1094 : }
1095 :
1096 153720 : hStereoClassif->unclr_fv[E_sum_xcorr] = sum_xcorr;
1097 153720 : hStereoClassif->xtalk_fv[E_sum_xcorr] = sum_xcorr;
1098 :
1099 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1100 : dbgwrite( &hStereoClassif->unclr_fv[E_cohSNR], sizeof( float ), 1, 1, "res/stereo_clf_cohSNR.x" );
1101 : dbgwrite( &hStereoClassif->unclr_fv[E_es_em], sizeof( float ), 1, 1, "res/stereo_clf_es_em.x" );
1102 : dbgwrite( &hStereoClassif->unclr_fv[E_d_prodL_prodR], sizeof( float ), 1, 1, "res/stereo_clf_d_prodL_prodR.x" );
1103 : dbgwrite( &hStereoClassif->unclr_fv[E_sum_xcorr], sizeof( float ), 1, 1, "res/stereo_clf_sum_xcorr.x" );
1104 : #endif
1105 : }
1106 :
1107 : /* reset estimates when silence is detected*/
1108 153720 : if ( ( sum_nrg_L && sum_nrg_R ) < EPSILON )
1109 : {
1110 0 : resetEstimates( hItd );
1111 : }
1112 :
1113 : /*smooth cohSNR with time */
1114 153720 : if ( ( hItd->cohSNR - cohSNR ) < 10.0f )
1115 : {
1116 110319 : tmpf1 = max( 0.05f, min( 0.25f, sfm_L * 0.5f ) );
1117 110319 : hItd->cohSNR = ( 1.f - tmpf1 ) * hItd->cohSNR + tmpf1 * cohSNR;
1118 : }
1119 : else
1120 : {
1121 43401 : hItd->cohSNR = hItd->cohSNR - 0.05f;
1122 : }
1123 :
1124 153720 : cohSNR = hItd->cohSNR;
1125 :
1126 153720 : rfft( xcorr, trigo_enc, STEREO_DFT_N_32k_ENC, +1 );
1127 :
1128 153720 : itd_td = hItd->td_itd_32k[k_offset]; /* This ITD always operates at 32kHz*/
1129 153720 : shift = ( STEREO_DFT_N_32k_ENC / 2 - itd_td ) % STEREO_DFT_N_32k_ENC;
1130 153720 : split = STEREO_DFT_N_32k_ENC - shift;
1131 :
1132 153720 : mvr2r( &xcorr[0], &xcorr_itd[shift], split );
1133 153720 : mvr2r( &xcorr[split], &xcorr_itd[0], shift );
1134 :
1135 153720 : mvr2r( &xcorr_itd[STEREO_DFT_N_32k_ENC / 2 - XTALK_PHAT_LEN], gcc_phat, 2 * XTALK_PHAT_LEN + 1 );
1136 :
1137 : #ifdef DEBUG_MODE_DFT
1138 : dbgwrite( gcc_phat, sizeof( float ), 2 * XTALK_PHAT_LEN + 1, 1, "res/gcc_phat" );
1139 : #endif
1140 :
1141 153720 : thres = peak_detect( xcorr_itd, &tmpf1, &index, &zero_itd, cohSNR, hCPE->hCoreCoder[0]->vad_flag, &second_max, &second_max_lag, hItd->prev_itd, flag_noisy_speech_snr, hItd->detected_itd_flag, &hItd->prev_max, &hItd->prev_index, &hItd->prev_avg_max, &total_max );
1142 :
1143 153720 : hStereoClassif->ave_ener_L = sum_nrg_L / ( NFFT_mid * NFFT_mid );
1144 153720 : hStereoClassif->ave_ener_R = sum_nrg_R / ( NFFT_mid * NFFT_mid );
1145 :
1146 153720 : if ( hCPE->hCoreCoder[0]->input_Fs == 16000 )
1147 : {
1148 20503 : total_max *= 2.0f;
1149 : }
1150 153720 : hStereoClassif->unclr_fv[E_xcorr_itd_value] = total_max;
1151 153720 : hStereoClassif->xtalk_fv[E_xcorr_itd_value] = total_max;
1152 :
1153 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1154 : {
1155 : int16_t stmp = index - STEREO_DFT_ITD_MAX_ANA;
1156 : dbgwrite( &stmp, sizeof( int16_t ), 1, 1, "res/raw_itd.x" );
1157 : }
1158 : dbgwrite( &hStereoClassif->unclr_fv[E_xcorr_itd_value], sizeof( float ), 1, 1, "res/stereo_clf_raw_itd.x" );
1159 : #endif
1160 :
1161 : #ifdef DEBUG_MODE_DFT
1162 : {
1163 : int16_t tmp;
1164 :
1165 : tmp = (int16_t) ( tmpf1 * 100.f / thres );
1166 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_thres0.pcm" );
1167 : }
1168 : #endif
1169 : /*for tonal music items increase thresholing by a factor up to 2.*/
1170 153720 : if ( hCPE->hCoreCoder[0]->sp_aud_decision0 && ( index - STEREO_DFT_ITD_MAX_ANA ) != hItd->prev_itd && !flag_noisy_speech_snr && hCPE->hCoreCoder[0]->vad_flag && tmpf1 < 0.3 )
1171 : {
1172 8401 : thres *= 1.0f + 1.f * min( 1.f, max( 0.f, ( -1.0f * sfm_L + 0.5f ) / ( 0.5f - 0.2f ) ) );
1173 : }
1174 :
1175 : #ifdef DEBUG_MODE_DFT
1176 : {
1177 : int16_t tmp;
1178 :
1179 : tmp = (int16_t) ( sfm_L * 100.f );
1180 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_sfm.pcm" );
1181 : tmp = (int16_t) ( tmpf1 * 100.f / thres );
1182 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_thres.pcm" );
1183 : }
1184 : #endif
1185 :
1186 153720 : itd_cal_flag = 0;
1187 : /*smooth threshold value depending on sfm for music items*/
1188 153720 : if ( hCPE->hCoreCoder[0]->ini_frame == 0 || hCPE->last_element_mode != IVAS_CPE_DFT || !hCPE->hCoreCoder[0]->sp_aud_decision0 || flag_noisy_speech_snr || cohSNR < 20 )
1189 : {
1190 125453 : hItd->itd_thres = thres;
1191 : }
1192 : else
1193 : {
1194 28267 : hItd->itd_thres = ( 1.0f - sfm_L ) * hItd->itd_thres + sfm_L * thres;
1195 : }
1196 :
1197 153720 : if ( flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->vad_flag == 1 && hItd->detected_itd_flag == 0 && ( hItd->currFlatness < 1.5f || hCPE->hCoreCoder[0]->sp_aud_decision0 == 1 ) )
1198 : {
1199 14211 : hItd->itd_thres *= 1.5f;
1200 : }
1201 153720 : if ( hCPE->hCoreCoder[0]->vad_flag == 0 || hItd->detected_itd_flag == 0 )
1202 : {
1203 40125 : hItd->itd_tracking = 0;
1204 : }
1205 113595 : else if ( abs( hItd->prev_itd ) > 2 )
1206 : {
1207 53826 : hItd->itd_tracking = 1;
1208 : }
1209 :
1210 153720 : if ( hItd->itd_tracking == 1 && ( second_max > hItd->itd_thres || tmpf1 - second_max < min( tmpf1 * 0.5f, 0.2f ) ) )
1211 : {
1212 61665 : index = second_max_lag;
1213 : }
1214 :
1215 153720 : if ( hItd->itd_tracking == 1 && abs( hItd->prev_itd - ( index - STEREO_DFT_ITD_MAX_ANA ) ) <= max( 2, (int16_t) round_f( abs( hItd->prev_itd ) / 16.f ) ) )
1216 : {
1217 61665 : hItd->itd_thres *= 0.75f;
1218 : }
1219 :
1220 153720 : if ( tmpf1 > hItd->itd_thres && !zero_itd )
1221 : {
1222 : /* LP filter GCC PHAT peak to follow peak envelope */
1223 124059 : if ( tmpf1 > hItd->lp_phat_peak )
1224 : {
1225 38889 : alpha = LP_GCC_PHAT_UP;
1226 : }
1227 : else
1228 : {
1229 85170 : alpha = LP_GCC_PHAT_DOWN;
1230 : }
1231 124059 : hItd->lp_phat_peak = alpha * tmpf1 + ( 1 - alpha ) * hItd->lp_phat_peak;
1232 124059 : hItd->itd_cnt++;
1233 124059 : if ( hItd->itd_cnt > ITD_CNT_MAX || hItd->itd_hangover > 0 )
1234 : {
1235 : /* If max count is reached, or if an ITD candidate is found during hangover,
1236 : set itd_cnt = ITD_CNT_MAX to ensure hangover is applied */
1237 117323 : hItd->itd_cnt = ITD_CNT_MAX;
1238 : }
1239 124059 : hItd->itd_hangover = 0;
1240 :
1241 124059 : itd = index - STEREO_DFT_ITD_MAX_ANA;
1242 124059 : hItd->itd_nonzero_cnt = 0; /* (1+0+9) <= hItd->itd_nonzero_cnt <= (1+6+3) */
1243 124059 : itd_cal_flag = 1; /* Indicates P>T case */
1244 124059 : hItd->valid_itd_cnt = hItd->itd_cnt; /* Store last non-zero value (when P>T) before reset */
1245 124059 : hItd->detected_itd_flag = 1;
1246 : }
1247 : else
1248 : {
1249 : /* Set prev_itd hangover period */
1250 29661 : if ( hItd->itd_cnt == ITD_CNT_MAX )
1251 : {
1252 1212 : hItd->itd_hangover = max( 0, min( ITD_HO_MAX, (int16_t) ( hItd->lp_phat_peak * ITD_HO_GCC_PHAT_INCL + ITD_HO_GCC_PHAT_OFFS ) ) );
1253 : }
1254 :
1255 29661 : if ( hItd->itd_hangover > 0 )
1256 : {
1257 3349 : itd = hItd->prev_itd;
1258 3349 : if ( hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER )
1259 : {
1260 2630 : hItd->itd_nonzero_cnt++;
1261 : }
1262 3349 : hItd->itd_hangover--;
1263 3349 : hItd->detected_itd_flag = 1;
1264 : }
1265 : else
1266 : {
1267 26312 : itd = 0;
1268 26312 : hItd->detected_itd_flag = 0;
1269 : }
1270 :
1271 : /* Reset */
1272 29661 : hItd->itd_cnt = 0;
1273 29661 : hItd->lp_phat_peak = 0.0f;
1274 : }
1275 :
1276 : {
1277 : /* stereo Xtalk classifier */
1278 153720 : xtalk_classifier_dft( hCPE, itd, gcc_phat );
1279 : }
1280 :
1281 : /*avoid enabling ITD fine control for music*/
1282 153720 : if ( !hCPE->hCoreCoder[0]->sp_aud_decision0 || flag_noisy_speech_snr || cohSNR < 20 )
1283 : {
1284 : /* ITD fine control base on vad and correlation parameters */
1285 124310 : cor_lb_avrg = 0.0f;
1286 124310 : par_L_avrg = 0.0f;
1287 497240 : for ( i = 0; i < XCORR_LB_NUM_BANDS; i++ )
1288 : {
1289 372930 : num_cor = xcorr_lb[i * XCORR_LB_BAND_WIDTH] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1290 372930 : den_cor_cur = xcorr_lb[i * XCORR_LB_BAND_WIDTH] * xcorr_lb[i * XCORR_LB_BAND_WIDTH] + 1.0f;
1291 372930 : den_cor_prev = hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH] + 1.0f;
1292 372930 : xcorr_max = xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1293 372930 : sum_nrg_L_tmp = xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1294 2983440 : for ( j = 1; j < XCORR_LB_BAND_WIDTH; j++ )
1295 : {
1296 2610510 : num_cor += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1297 2610510 : den_cor_cur += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1298 2610510 : den_cor_prev += hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1299 2610510 : if ( xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] > xcorr_max )
1300 : {
1301 724928 : xcorr_max = xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1302 : }
1303 2610510 : sum_nrg_L_tmp += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1304 : }
1305 372930 : cor_lb[i] = num_cor / ( sqrtf( den_cor_cur ) * sqrtf( den_cor_prev ) );
1306 372930 : cor_lb_avrg += cor_lb[i];
1307 372930 : par_L[i] = xcorr_max / ( sum_nrg_L_tmp + FLT_MIN );
1308 372930 : par_L_avrg += par_L[i];
1309 : }
1310 124310 : cor_lb_avrg /= XCORR_LB_NUM_BANDS;
1311 124310 : par_L_avrg /= XCORR_LB_NUM_BANDS;
1312 :
1313 : /*Breakdown of fine-control conditions */
1314 124310 : fc_condition_1 = abs( hItd->prev_itd ) > 0.2f * abs( itd );
1315 124310 : fc_condition_2 = cor_lb_avrg > 0.85f;
1316 124310 : fc_condition_3 = ( cor_lb_avrg > 0.7f && ( cor_lb[0] > 0.9f || cor_lb[1] > 0.9f || cor_lb[2] > 0.9f ) && hItd->prev_sum_nrg_L_lb > 0.5f * sum_nrg_L_lb && hItd->prev_sum_nrg_L_lb < 2.0f * sum_nrg_L_lb );
1317 124310 : fc_condition_4 = par_L_avrg > 0.6f;
1318 124310 : fc_condition_5 = hItd->prev_itd != 0;
1319 124310 : fc_condition_6_a = itd * hItd->prev_itd < 0; /* ITD sign change */
1320 124310 : fc_condition_6_b = itd * hItd->prev_itd == 0; /* ITD jump to zero */
1321 124310 : fc_condition_6_c = abs( itd - hItd->prev_itd ) > 0.5f * ( ( abs( itd ) > abs( hItd->prev_itd ) ) ? abs( itd ) : abs( hItd->prev_itd ) ); /* Magnitude of the ITD jump */
1322 :
1323 : /* Combining conditions 1,2,3,4 */
1324 124310 : fc_condition_1234 = fc_condition_1 && ( fc_condition_2 || fc_condition_3 || fc_condition_4 );
1325 :
1326 124310 : if ( ( fc_condition_1234 && ( ( fc_condition_5 && fc_condition_6_b ) || fc_condition_6_c ) ) || ( fc_condition_1234 && fc_condition_6_a ) )
1327 : {
1328 1100 : itd = hItd->prev_itd;
1329 :
1330 1100 : if ( hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER )
1331 : {
1332 940 : hItd->itd_nonzero_cnt++;
1333 : }
1334 1100 : hItd->detected_itd_flag = 1;
1335 : }
1336 :
1337 : /* stop the fine control when inactive or very high mssnr is detected*/
1338 124310 : if ( mssnr < 6e-7f * HIGHT_SNR_VOICE_TH || mssnr > 200 * HIGHT_SNR_VOICE_TH )
1339 : {
1340 39830 : hItd->itd_nonzero_cnt = MAX_ITD_VAD_HANGOVER;
1341 : }
1342 :
1343 124310 : if ( vad_flag_itd )
1344 : {
1345 : /* Fine-control for hangover if set HR period = 0 or if HR period expires */
1346 : /* However fine-control shouldn't be used when HR is disabled because itd_cnt < 2 - hence the extra last condition */
1347 108925 : if ( hItd->itd_hangover == 0 && hItd->prev_itd != 0 && itd == 0 && itd_cal_flag != 1 && hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER && hItd->valid_itd_cnt == ITD_CNT_MAX && hItd->pre_vad == 1 )
1348 : {
1349 193 : itd = hItd->prev_itd;
1350 193 : hItd->itd_nonzero_cnt++;
1351 193 : hItd->detected_itd_flag = 1;
1352 : }
1353 108925 : hItd->pre_vad = 1;
1354 : }
1355 : else
1356 : {
1357 15385 : hItd->pre_vad = 0;
1358 : }
1359 :
1360 124310 : if ( itd == 0 && itd_cal_flag != 1 )
1361 : {
1362 20034 : hItd->itd_nonzero_cnt = 0;
1363 : }
1364 :
1365 124310 : hItd->prev_sum_nrg_L_lb = sum_nrg_L_lb;
1366 124310 : mvr2r( xcorr_lb, hItd->prev_xcorr_lb, STEREO_DFT_XCORR_LB_MAX );
1367 : }
1368 : /*save previous flag*/
1369 153720 : prev_itd_max = hItd->hybrid_itd_max;
1370 : /* enable hybrid ITD handling for very large ITDs*/
1371 153720 : hItd->hybrid_itd_max = ( abs( itd ) > STEREO_DFT_ITD_MAX && abs( itd ) < STEREO_DFT_ITD_MAX_ANA && !hCPE->hCoreCoder[0]->sp_aud_decision0 && hCPE->element_brate < IVAS_32k );
1372 : /* Update memory */
1373 153720 : hItd->prev_itd = itd;
1374 :
1375 153720 : itd = check_bounds_s( itd, -STEREO_DFT_ITD_MAX, STEREO_DFT_ITD_MAX );
1376 :
1377 : /*Inverse the time diff*/
1378 153720 : hItd->itd[k_offset] = -1.f * itd;
1379 :
1380 : /* collect UNCLR classifier parameters */
1381 153720 : hStereoClassif->unclr_fv[E_ITD] = hItd->itd[k_offset];
1382 :
1383 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1384 : dbgwrite( &hItd->itd[k_offset], sizeof( float ), 1, 1, "res/stereo_clf_ITD.x" );
1385 : #endif
1386 :
1387 : /* limit ITD range for MDCT stereo even more */
1388 153720 : if ( hCPE->element_mode == IVAS_CPE_MDCT && fabsf( hItd->itd[k_offset] ) > ITD_MAX_MDCT )
1389 : {
1390 3806 : itd = 0;
1391 : }
1392 :
1393 153720 : stereo_dft_quantize_itd( -1 * itd, hItd->itd + k_offset, input_frame * FRAMES_PER_SEC, hItd->itd_index + k_offset );
1394 :
1395 153720 : hItd->deltaItd[k_offset] = hItd->itd[k_offset] - hItd->td_itd[k_offset];
1396 :
1397 153720 : if ( hItd->hybrid_itd_max )
1398 : {
1399 : /*check if there is an ITD flip*/
1400 532 : itd_max_flip = ( hItd->itd[k_offset] * hItd->itd[k_offset - 1] < 0 );
1401 :
1402 532 : if ( hItd->deltaItd[k_offset - 1] != 0 && itd_max_flip == 0 )
1403 : {
1404 509 : int16_t tmp_itd = (int16_t) floorf( ( ( hItd->prev_itd ) * ( (float) input_frame / 640 ) ) + 0.5f );
1405 509 : hItd->deltaItd[k_offset] = -1.0f * tmp_itd - hItd->td_itd[k_offset];
1406 : }
1407 : }
1408 : /*signal change for next frame*/
1409 153720 : if ( prev_itd_max == 1 && hItd->hybrid_itd_max == 0 )
1410 : {
1411 21 : hItd->hybrid_itd_max = -1;
1412 : }
1413 :
1414 : #ifdef DEBUG_MODE_DFT
1415 : {
1416 : int16_t tmp;
1417 : static FILE *log_fid = NULL;
1418 :
1419 : tmp = (int16_t) hItd->itd[k_offset];
1420 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_combined_fc.pcm" );
1421 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_combined_fc.txt" );
1422 :
1423 : if ( log_fid == NULL )
1424 : log_fid = fopen( "./res/itd_log_td_itd.txt", "w" );
1425 : fprintf( log_fid, "frame: %d\t itd: %f\t td_itd: %d\t delta_itd: %f\n", frame, hItd->itd[1], (int16_t) hItd->td_itd[1], hItd->deltaItd[1] );
1426 : }
1427 : #endif
1428 :
1429 153720 : return;
1430 : }
|