Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #include <math.h>
37 : #include "cnst.h"
38 : #include "rom_enc.h"
39 : #include "rom_com.h"
40 : #include "prot.h"
41 : #include "ivas_prot.h"
42 : #include "ivas_cnst.h"
43 : #include "ivas_rom_com.h"
44 : #include "ivas_rom_enc.h"
45 : #ifdef DEBUGGING
46 : #include "debug.h"
47 : #endif
48 : #include "wmc_auto.h"
49 :
50 :
51 : /*-------------------------------------------------------------------------
52 : * Local constants
53 : *------------------------------------------------------------------------*/
54 :
55 : #define LP_GCC_PHAT_UP 0.9f /* LP filter coefficient, going up */
56 : #define LP_GCC_PHAT_DOWN 0.1f /* LP filter coefficient, going down */
57 : #define ITD_CNT_MAX 2 /* Minimum number of consecutive ITD estimates for ITD hangover */
58 : #define ITD_HO_GCC_PHAT_MAX 0.6f /* LP GCC PHAT value which gives zero hangover */
59 : #define ITD_HO_GCC_PHAT_MIN 0.3f /* LP GCC PHAT value which gives ITD_HO_MAX ITD hangover frames */
60 : #define ITD_HO_MAX 6 /* Maximum number of ITD hangover frames */
61 : #define ITD_HO_GCC_PHAT_INCL ( -( ITD_HO_MAX - 1 ) / ( ITD_HO_GCC_PHAT_MAX - ITD_HO_GCC_PHAT_MIN ) )
62 : #define ITD_HO_GCC_PHAT_OFFS ( -ITD_HO_GCC_PHAT_INCL * ITD_HO_GCC_PHAT_MAX + 1 )
63 : #define SFM_PROD_GRP 4 /*maximum grouping of products for calculating SFM in ITD estimation*/
64 : #define B_DENOM 0.083333333333333f
65 : #define L_SAMPLES 20
66 : #define SUBDIV ( 2 * STEREO_DFT_ITD_MAX_ANA / L_SAMPLES )
67 : #define DENOM 0.05f
68 :
69 : #define XSPEC_ALPHA ( 1.f / 32 )
70 : #define CORR_FILT 0.8f
71 : #define CORR_RESET_FRAMES_MAX 20
72 :
73 : #define ITD_VAD_NOISE_INIT_FRAMES 30
74 : #define ITD_VAD_THRSHOLD 0.001f
75 : #define ITD_VAD_MS_SNR_UPDATE_THRESH 15.0f
76 : #define HIGHT_SNR_VOICE_TH 10000.0f
77 : #define MAX_ITD_VAD_HANGOVER 10
78 :
79 : #define XCORR_LB_NUM_BANDS 3
80 : #define XCORR_LB_BAND_WIDTH 8
81 :
82 : #define ITD_MAX_MDCT 80
83 :
84 :
85 : /*-------------------------------------------------------------------------
86 : * set_band_limits()
87 : *
88 : * configure bands as used in DFT Stereo
89 : *------------------------------------------------------------------------*/
90 :
91 14690 : static void set_band_limits(
92 : int16_t *nbands,
93 : int16_t band_limits[STEREO_DFT_BAND_MAX + 1],
94 : int16_t NFFT )
95 : {
96 14690 : band_limits[0] = 1;
97 14690 : *nbands = 0;
98 183900 : while ( band_limits[( *nbands )++] < NFFT / 2 )
99 : {
100 169210 : band_limits[*nbands] = (int16_t) round_f( dft_band_limits_erb4[*nbands] * ( (float) ( STEREO_DFT_N_NS_ENC ) / STEREO_DFT_N_NS ) );
101 : }
102 14690 : ( *nbands )--;
103 14690 : band_limits[*nbands] = NFFT / 2; /*Nyquist Freq*/
104 :
105 14690 : return;
106 : }
107 :
108 :
109 : /*-------------------------------------------------------------------------
110 : * stereo_dft_hybrid_ITD_flag()
111 : *
112 : * Get the hybrid ITD flag
113 : *------------------------------------------------------------------------*/
114 :
115 60557 : void stereo_dft_hybrid_ITD_flag(
116 : STEREO_DFT_CONFIG_DATA_HANDLE hConfig, /* o : DFT stereo configuration */
117 : const int32_t input_Fs, /* i : CPE element sampling rate */
118 : const int16_t hybrid_itd_max /* i : flag for hybrid ITD for very large ITDs */
119 : )
120 : {
121 60557 : if ( hConfig != NULL )
122 : {
123 60557 : if ( hConfig->res_cod_mode || ( hConfig->ada_wb_res_cod_mode && input_Fs == 16000 ) || ( hybrid_itd_max == 1 ) )
124 : {
125 20752 : hConfig->hybrid_itd_flag = 1;
126 : }
127 : else
128 : {
129 39805 : hConfig->hybrid_itd_flag = 0;
130 : }
131 : }
132 : else
133 : {
134 0 : assert( 0 && "Stereo Dft Config Data Handle is uninitialized" );
135 : }
136 :
137 60557 : return;
138 : }
139 :
140 :
141 : /*-------------------------------------------------------------------------
142 : * stereo_dft_quantize_itd()
143 : *
144 : * Quantize the ITD
145 : *------------------------------------------------------------------------*/
146 :
147 74369 : static void stereo_dft_quantize_itd(
148 : const int16_t in,
149 : float *out,
150 : const int32_t input_Fs,
151 : int16_t *ind )
152 : {
153 : int16_t itd;
154 :
155 74369 : itd = (int16_t) ( sign( in ) * 0.5f + in );
156 :
157 : /*Limit ITD*/
158 74369 : if ( ( ABSVAL( itd ) > STEREO_DFT_ITD_MAX ) || ( ABSVAL( itd ) < STEREO_DFT_ITD_MIN ) )
159 : {
160 40025 : itd = 0;
161 : }
162 : else
163 : {
164 34344 : *ind = ( ( itd < 0 ) << ( STEREO_DFT_ITD_NBITS - 1 ) ) + ABSVAL( itd ) - STEREO_DFT_ITD_MIN;
165 : }
166 :
167 : /*Convert back @ fs*/
168 74369 : *out = (float) ( itd * input_Fs ) / ( (float) ( STEREO_DFT_ITD_FS ) );
169 :
170 74369 : return;
171 : }
172 :
173 :
174 : /*-------------------------------------------------------------------------
175 : * itd_vad_ms_snr_calc()
176 : *
177 : *
178 : *-------------------------------------------------------------------------*/
179 :
180 74369 : static float itd_vad_ms_snr_calc(
181 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
182 : float *Spd,
183 : float *E_band )
184 : {
185 : float snr[STEREO_DFT_ITD_VAD_BAND_NUM];
186 : float msnr[STEREO_DFT_ITD_VAD_BAND_NUM];
187 : float ms_snr;
188 : int16_t i, j;
189 :
190 1561749 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
191 : {
192 1487380 : E_band[i] = 0;
193 24913615 : for ( j = itd_vad_band_tbl[i]; j < itd_vad_band_tbl[i + 1]; j++ )
194 : {
195 23426235 : E_band[i] += Spd[j];
196 : }
197 1487380 : E_band[i] = E_band[i] / ( itd_vad_band_tbl[i + 1] - itd_vad_band_tbl[i] );
198 : }
199 :
200 74369 : ms_snr = 0;
201 1561749 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
202 : {
203 1487380 : snr[i] = E_band[i] / E_band_n[i];
204 1487380 : if ( snr[i] < 1 )
205 : {
206 348773 : snr[i] = 1;
207 : }
208 1487380 : msnr[i] = snr[i] - 1.0f;
209 1487380 : if ( msnr[i] < 6 )
210 : {
211 607817 : msnr[i] = powf( msnr[i], 2 ) / 6.0f;
212 : }
213 1487380 : ms_snr += msnr[i];
214 : }
215 :
216 74369 : return ( ms_snr );
217 : }
218 :
219 :
220 : /*-------------------------------------------------------------------------
221 : * itd_vad_background_update()
222 : *
223 : *
224 : *-------------------------------------------------------------------------*/
225 :
226 74369 : static void itd_vad_background_update(
227 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
228 : int16_t *vad_frm_cnt,
229 : const float ms_snr,
230 : float *E_band )
231 : {
232 : int16_t i;
233 : float energy;
234 :
235 74369 : energy = 0.0f;
236 1561749 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
237 : {
238 1487380 : energy += E_band[i] / (float) STEREO_DFT_ITD_VAD_BAND_NUM;
239 : }
240 :
241 74369 : if ( *vad_frm_cnt < ITD_VAD_NOISE_INIT_FRAMES )
242 : {
243 10937 : ( *vad_frm_cnt )++;
244 : }
245 :
246 74369 : if ( ( *vad_frm_cnt < ITD_VAD_NOISE_INIT_FRAMES ) && energy < 40000000 )
247 : {
248 46662 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
249 : {
250 : /* using the init values as frame (-1) values */
251 44440 : E_band_n[i] = ( E_band_n[i] * ( (float) ( *vad_frm_cnt ) ) + E_band[i] ) / ( (float) ( *vad_frm_cnt + 1 ) );
252 : }
253 : }
254 : else
255 : {
256 72147 : if ( ms_snr < ITD_VAD_MS_SNR_UPDATE_THRESH )
257 : {
258 234171 : for ( i = 0; i < STEREO_DFT_ITD_VAD_BAND_NUM; i++ )
259 : {
260 223020 : E_band_n[i] = 0.96f * E_band_n[i] + 0.04f * E_band[i];
261 223020 : if ( E_band_n[i] < 1.0f )
262 : {
263 0 : E_band_n[i] = 1.0f;
264 : }
265 : }
266 : }
267 : }
268 :
269 74369 : return;
270 : }
271 :
272 : /*-------------------------------------------------------------------------
273 : * stereo_dft_enc_itd_vad()
274 : *
275 : *
276 : *-------------------------------------------------------------------------*/
277 :
278 74369 : static int16_t stereo_dft_enc_itd_vad(
279 : float E_band_n[STEREO_DFT_ITD_VAD_BAND_NUM],
280 : int16_t *vad_frm_cnt,
281 : float *Spd_L,
282 : float *Spd_R,
283 : float *mssnr )
284 : {
285 : int16_t i;
286 : float E_band[STEREO_DFT_ITD_VAD_BAND_NUM];
287 : int16_t vad_flag_itd;
288 :
289 : float Spd[STEREO_DFT_N_16k_ENC / 2 + 1];
290 :
291 : /* Spd is later only used starting at itd_vad_band_tbl[0], so only compute values starting from there */
292 : /* -> this avoids uninitialized values in Spd_L and Spd_R at index 0 to be used */
293 23500604 : for ( i = itd_vad_band_tbl[0]; i < STEREO_DFT_N_16k_ENC / 2; i++ )
294 : {
295 23426235 : Spd[i] = 0.5f * ( Spd_L[i] + Spd_R[i] );
296 : }
297 :
298 74369 : *mssnr = itd_vad_ms_snr_calc( E_band_n, Spd, E_band );
299 :
300 74369 : itd_vad_background_update( E_band_n, vad_frm_cnt, *mssnr, E_band );
301 :
302 74369 : if ( *mssnr < ITD_VAD_THRSHOLD )
303 : {
304 5592 : vad_flag_itd = 0;
305 : }
306 : else
307 : {
308 68777 : vad_flag_itd = 1;
309 : }
310 :
311 74369 : return ( vad_flag_itd );
312 : }
313 :
314 :
315 : /*-------------------------------------------------------------------------
316 : * calc_mean_E_ratio()
317 : *
318 : * calculates mean energy of main-to-background signal ratio
319 : *-------------------------------------------------------------------------*/
320 :
321 74369 : static float calc_mean_E_ratio(
322 : ITD_DATA_HANDLE hItd,
323 : int16_t nbands,
324 : int16_t band_limits[],
325 : const float sfm,
326 : const float nrg_L[STEREO_DFT_N_32k_ENC / 2],
327 : const float nrg_R[STEREO_DFT_N_32k_ENC / 2],
328 : float *total_mEr )
329 : {
330 : float sum_xcorr[2];
331 : float Sxcorr;
332 : float Er[STEREO_DFT_BAND_MAX], fi[STEREO_DFT_BAND_MAX], a, acorr;
333 : float sum_nrg_L, sum_nrg_R;
334 : int16_t b, i;
335 : float sum_Er;
336 : float total_fi;
337 : float grand_nrg_L, grand_nrg_R, grand_sum_xcorr_real, grand_sum_xcorr_img;
338 :
339 74369 : grand_nrg_L = 0.0f;
340 74369 : grand_nrg_R = 0.0f;
341 74369 : grand_sum_xcorr_real = 0.0f;
342 74369 : grand_sum_xcorr_img = 0.0f;
343 :
344 : /*take bands up to 32kHz bandwidth as ITD is always calculated at 32kHz sampling rate*/
345 74369 : nbands -= ( band_limits[nbands] > STEREO_DFT_N_32k_ENC / 2 );
346 :
347 74369 : sum_Er = 0;
348 776797 : for ( b = 0; b < nbands; b++ )
349 : {
350 : /*reset buffers*/
351 702428 : sum_xcorr[0] = 0.f;
352 702428 : sum_xcorr[1] = 0.f;
353 702428 : sum_nrg_L = 0.f;
354 702428 : sum_nrg_R = 0.f;
355 :
356 42344539 : for ( i = band_limits[b]; i < min( band_limits[b + 1], STEREO_DFT_N_32k_ENC / 2 ); i++ )
357 : {
358 41642111 : sum_xcorr[0] += hItd->xcorr_smooth[2 * i];
359 41642111 : sum_xcorr[1] += hItd->xcorr_smooth[2 * i + 1];
360 41642111 : sum_nrg_L += nrg_L[i];
361 41642111 : sum_nrg_R += nrg_R[i];
362 : }
363 :
364 702428 : Sxcorr = sum_xcorr[0] * sum_xcorr[0] + sum_xcorr[1] * sum_xcorr[1];
365 702428 : hItd->acorr_L[b] = ( 1.f - sfm ) * hItd->acorr_L[b] + sfm * sum_nrg_L;
366 702428 : hItd->acorr_R[b] = ( 1.f - sfm ) * hItd->acorr_R[b] + sfm * sum_nrg_R;
367 :
368 702428 : a = hItd->acorr_L[b] - hItd->acorr_R[b];
369 702428 : acorr = hItd->acorr_L[b] + hItd->acorr_R[b];
370 702428 : fi[b] = sqrtf( a * a + 4 * Sxcorr );
371 702428 : Er[b] = ( acorr + fi[b] ) / ( acorr - fi[b] + EPSILON );
372 702428 : sum_Er += Er[b];
373 :
374 702428 : grand_nrg_L += sum_nrg_L;
375 702428 : grand_nrg_R += sum_nrg_R;
376 702428 : grand_sum_xcorr_real += sum_xcorr[0];
377 702428 : grand_sum_xcorr_img += sum_xcorr[1];
378 : }
379 :
380 74369 : Sxcorr = grand_sum_xcorr_real * grand_sum_xcorr_real + grand_sum_xcorr_img * grand_sum_xcorr_img;
381 74369 : a = grand_nrg_L - grand_nrg_R;
382 74369 : acorr = grand_nrg_L + grand_nrg_R;
383 74369 : total_fi = sqrtf( a * a + 4 * Sxcorr );
384 74369 : *total_mEr = ( acorr + total_fi ) / ( acorr - total_fi + EPSILON );
385 :
386 74369 : return ( sum_Er * B_DENOM );
387 : }
388 :
389 :
390 : /*-------------------------------------------------------------------------
391 : * resetEstimates()
392 : *
393 : * resets long term estimates to initial values.
394 : *-------------------------------------------------------------------------*/
395 :
396 0 : static void resetEstimates(
397 : ITD_DATA_HANDLE hItd )
398 : {
399 0 : set_zero( hItd->xcorr_smooth, STEREO_DFT_N_32k_ENC );
400 0 : set_zero( hItd->acorr_L, STEREO_DFT_BAND_MAX );
401 0 : set_zero( hItd->acorr_R, STEREO_DFT_BAND_MAX );
402 0 : hItd->cohSNR = 15;
403 :
404 0 : return;
405 : }
406 :
407 : /*-------------------------------------------------------------------------
408 : * td_sm_filter()
409 : *
410 : * time-domain smoothing filter for smoothing the cross-correlation vector
411 : *-------------------------------------------------------------------------*/
412 :
413 13457 : static void td_sm_filter(
414 : float *x,
415 : float *y,
416 : const int16_t L )
417 : {
418 : int16_t i;
419 : float tmp_x[STEREO_DFT_N_32k_ENC + 1];
420 : float a0, a1;
421 :
422 13457 : set_f( tmp_x, 0, STEREO_DFT_N_32k_ENC + 1 );
423 13457 : mvr2r( x, tmp_x, L );
424 :
425 13457 : a0 = 0.5f;
426 13457 : a1 = 0.25f;
427 :
428 13457 : y[0] = a0 * tmp_x[0] + a1 * x[1];
429 5396257 : for ( i = 1; i < L; i++ )
430 : {
431 5382800 : y[i] = a1 * tmp_x[i + 1] + a0 * tmp_x[i] + a1 * tmp_x[i - 1];
432 : }
433 :
434 13457 : return;
435 : }
436 :
437 : /*-------------------------------------------------------------------------
438 : * peak_detect()
439 : *
440 : * function for calculating the threshold for peak detection of the
441 : * cross-correlation vector
442 : *-------------------------------------------------------------------------*/
443 :
444 74369 : static float peak_detect(
445 : float *xcorr_itd,
446 : float *max_max,
447 : int16_t *index,
448 : int16_t *zero_itd_flag,
449 : const float snr,
450 : const int16_t vad,
451 : float *second_max,
452 : int16_t *second_max_lag,
453 : const float prev_itd,
454 : const int16_t flag_noisy_speech_snr,
455 : const int16_t detected_itd_flag,
456 : float *prev_max,
457 : int16_t *prev_index,
458 : float *prev_avg_max,
459 : float *total_max )
460 : {
461 : int16_t i;
462 : float tmp_max[SUBDIV], tmp_xcorr_itd[2 * STEREO_DFT_ITD_MAX_ANA + 1], tmp_xcorr_itd_sm[2 * STEREO_DFT_ITD_MAX_ANA + 1];
463 : int16_t index_subd[SUBDIV], ind;
464 : float avg_max, max_low, max_high, sum_max, tmp_max_max;
465 : float thres_diff;
466 : float wfac;
467 : int16_t d, i1, i2;
468 :
469 74369 : wfac = 2.5f;
470 74369 : if ( snr > 50.f )
471 : {
472 8167 : wfac = 3.f;
473 : }
474 :
475 : /*detect maxima outside the [-5, 5] ms boundaries */
476 74369 : maximum( xcorr_itd, STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA, &max_low );
477 74369 : maximum( xcorr_itd + STEREO_DFT_N_32k_ENC / 2 + STEREO_DFT_ITD_MAX_ANA + 1, STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA - 1, &max_high );
478 :
479 : /* create temp buffer that includes xcorr within [-5, 5] ms */
480 74369 : mvr2r( xcorr_itd + STEREO_DFT_N_32k_ENC / 2 - STEREO_DFT_ITD_MAX_ANA, tmp_xcorr_itd, 2 * STEREO_DFT_ITD_MAX_ANA + 1 );
481 :
482 74369 : *index = maximumAbs( tmp_xcorr_itd, 2 * STEREO_DFT_ITD_MAX_ANA + 1, max_max );
483 74369 : *total_max = *max_max;
484 :
485 74369 : d = max( 2, (int16_t) round_f( fabsf( prev_itd ) / 16.f ) );
486 74369 : i1 = max( 0, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA - d );
487 74369 : i2 = min( 2 * STEREO_DFT_ITD_MAX_ANA, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA + d );
488 74369 : *second_max_lag = maximumAbs( tmp_xcorr_itd + i1, i2 - i1 + 1, second_max );
489 74369 : *second_max_lag += i1;
490 :
491 :
492 : /*if maximum is out of boundaries signal zero itd OR maximum value is negative*/
493 74369 : *zero_itd_flag = ( *max_max < max_low || *max_max < max_high );
494 :
495 74369 : if ( *zero_itd_flag )
496 : {
497 2199 : return 0;
498 : }
499 : else
500 : {
501 72170 : sum_max = 0;
502 :
503 72170 : if ( snr >= 25.f )
504 : {
505 : /*apply smoothing filter*/
506 13457 : td_sm_filter( tmp_xcorr_itd, tmp_xcorr_itd_sm, 2 * STEREO_DFT_ITD_MAX_ANA + 1 );
507 :
508 : /*subdivide the area of interest and look for local maxima*/
509 269140 : for ( i = 0; i < SUBDIV - 1; i++ )
510 : {
511 255683 : index_subd[i] = maximumAbs( &tmp_xcorr_itd_sm[i * L_SAMPLES], L_SAMPLES, &tmp_max[i] );
512 255683 : sum_max += tmp_max[i];
513 : }
514 :
515 13457 : index_subd[i] = maximumAbs( &tmp_xcorr_itd_sm[i * L_SAMPLES], L_SAMPLES + 1, &tmp_max[i] );
516 13457 : sum_max += tmp_max[i];
517 :
518 13457 : ind = maximumAbs( tmp_max, SUBDIV, &tmp_max_max );
519 :
520 : /*final position of maxmimum*/
521 13457 : *index = index_subd[ind] + ind * L_SAMPLES;
522 13457 : *max_max = tmp_max_max;
523 : /*calculate average of all maxima to determine the threshold*/
524 13457 : avg_max = sum_max * DENOM;
525 :
526 13457 : d = max( 2, (int16_t) round_f( fabsf( prev_itd ) / 16.f ) );
527 13457 : i1 = max( 0, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA - d );
528 13457 : i2 = min( 2 * STEREO_DFT_ITD_MAX_ANA, (int16_t) prev_itd + STEREO_DFT_ITD_MAX_ANA + d );
529 13457 : *second_max_lag = maximumAbs( tmp_xcorr_itd_sm + i1, i2 - i1 + 1, second_max );
530 13457 : *second_max_lag += i1;
531 : }
532 : else
533 : {
534 : /*determine weight for threshold depending on snr value*/
535 58713 : if ( snr <= 20.f && snr > 15.f )
536 : {
537 19677 : wfac = snr * 0.1f + 0.5f;
538 : }
539 : else
540 : {
541 39036 : wfac = 2.5f;
542 : }
543 :
544 1174260 : for ( i = 0; i < SUBDIV - 1; i++ )
545 : {
546 1115547 : index_subd[i] = maximumAbs( &tmp_xcorr_itd[i * L_SAMPLES], L_SAMPLES, &tmp_max[i] );
547 1115547 : sum_max += tmp_max[i];
548 : }
549 :
550 58713 : index_subd[i] = maximumAbs( &tmp_xcorr_itd[i * L_SAMPLES], L_SAMPLES + 1, &tmp_max[i] );
551 58713 : sum_max += tmp_max[i];
552 :
553 : /*calculate average of all maxima to determine the threshold*/
554 58713 : avg_max = sum_max * DENOM;
555 : }
556 :
557 : /*relax threshold if threshold is very close to max: when 7<snr<=15 and (thres-max)<0.05 or 15<snr<30 and (thres-max)<0.01*/
558 :
559 72170 : thres_diff = wfac * avg_max - *max_max;
560 :
561 72170 : if ( vad && thres_diff > 0.f && ( ( thres_diff < 0.05f && ( snr <= 15 && snr > 7.f ) ) || ( thres_diff < 0.01f && ( snr > 15.f && snr < 30.f ) ) ) )
562 : {
563 2319 : wfac = 2.0f;
564 : }
565 :
566 72170 : if ( flag_noisy_speech_snr == 1 )
567 : {
568 37577 : if ( vad == 0 )
569 : {
570 14036 : wfac = 2.5f;
571 : }
572 23541 : else if ( detected_itd_flag == 0 && *max_max > 1.5f * avg_max && *prev_max > 1.5f * *prev_avg_max && abs( *index - *prev_index ) <= 2 )
573 : {
574 106 : wfac = 1.5f;
575 : }
576 : else
577 : {
578 23435 : wfac = 2.0f;
579 : }
580 : }
581 72170 : *prev_max = *max_max;
582 72170 : *prev_avg_max = avg_max;
583 72170 : *prev_index = *index;
584 :
585 72170 : return ( wfac * avg_max );
586 : }
587 : }
588 :
589 :
590 : /*-------------------------------------------------------------------------
591 : * Compute stereo parameter: ITD
592 : * ITD: Interchannel Time Difference
593 : *------------------------------------------------------------------------*/
594 :
595 74369 : void stereo_dft_enc_compute_itd(
596 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
597 : float *DFT_L,
598 : float *DFT_R,
599 : const int16_t k_offset,
600 : const int16_t input_frame,
601 : const int16_t vad_flag_dtx[],
602 : const int16_t vad_hover_flag[],
603 : float *bin_nrgL,
604 : float *bin_nrgR )
605 : {
606 : int16_t i, j;
607 : STEREO_DFT_ENC_DATA_HANDLE hStereoDft;
608 : ITD_DATA_HANDLE hItd;
609 : float *pDFT_L, *pDFT_R;
610 : float abs_L, abs_R, prod_L, prod_R, sum_abs_L, sum_abs_R;
611 : float log_prod_L, log_prod_R;
612 : float sum_nrg_L, sum_nrg_R;
613 : float sfm_L, sfm_R;
614 : float xcorr[STEREO_DFT_N_32k_ENC];
615 : int16_t itd, itd_td;
616 : float xcorr_itd[STEREO_DFT_N_32k_ENC];
617 : float tmpf1, tmpf2, tmpf3;
618 : float thres, alpha;
619 : int16_t index;
620 : float xcorr_max, sum_nrg_L_lb, par_L[XCORR_LB_NUM_BANDS], par_L_avrg, sum_nrg_L_tmp;
621 : float xcorr_lb[STEREO_DFT_XCORR_LB_MAX];
622 : float num_cor, den_cor_cur, den_cor_prev, cor_lb_avrg;
623 : float cor_lb[XCORR_LB_NUM_BANDS];
624 : float Spd_L[STEREO_DFT_N_32k_ENC / 2 + 1];
625 : float Spd_R[STEREO_DFT_N_32k_ENC / 2 + 1];
626 : int16_t vad_flag_itd;
627 : float mssnr;
628 : int16_t itd_cal_flag;
629 : int16_t NFFT, NFFT_mid;
630 : int16_t zero_itd;
631 : float mEr;
632 : float cohSNR;
633 : float *pNrgL, *pNrgR;
634 : float second_max;
635 : int16_t second_max_lag;
636 : int16_t fc_condition_1, fc_condition_2, fc_condition_3, fc_condition_4, fc_condition_5, fc_condition_6_a, fc_condition_6_b, fc_condition_6_c;
637 : int16_t fc_condition_1234;
638 : int16_t split, shift, flag_noisy_speech_snr;
639 : float gcc_phat[2 * XTALK_PHAT_LEN + 1];
640 : float grand_dot_prod_real, grand_dot_prod_img;
641 : float xcorr_abs[STEREO_DFT_N_32k_ENC], sum_xcorr, prod_LL, prod_RR, total_mEr, total_max;
642 : STEREO_CLASSIF_HANDLE hStereoClassif;
643 : const float *dft_trigo32k;
644 : float trigo_enc[STEREO_DFT_N_32k_ENC / 2 + 1];
645 : float cng_xcorr_filt;
646 : float sum_nrg_delta;
647 : int16_t prev_itd_max;
648 : int16_t itd_max_flip;
649 :
650 74369 : if ( hCPE->element_mode == IVAS_CPE_DFT )
651 : {
652 59679 : hStereoDft = hCPE->hStereoDft;
653 59679 : hItd = hCPE->hStereoDft->hItd;
654 59679 : NFFT = min( STEREO_DFT_N_32k_ENC, hStereoDft->NFFT );
655 59679 : dft_trigo32k = hStereoDft->dft_trigo_32k;
656 : }
657 : else
658 : {
659 14690 : hStereoDft = NULL;
660 14690 : hItd = hCPE->hStereoMdct->hItd;
661 14690 : NFFT = min( STEREO_DFT_N_32k_ENC, hCPE->hStereoMdct->hDft_ana->NFFT );
662 14690 : dft_trigo32k = hCPE->hStereoMdct->hDft_ana->dft_trigo_32k;
663 : }
664 74369 : hStereoClassif = hCPE->hStereoClassif;
665 :
666 23872449 : for ( i = 0; i < STEREO_DFT_N_32k_ENC / 4; i++ )
667 : {
668 23798080 : trigo_enc[i] = dft_trigo32k[i];
669 23798080 : trigo_enc[STEREO_DFT_N_32k_ENC / 2 - i] = dft_trigo32k[i];
670 : }
671 74369 : trigo_enc[STEREO_DFT_N_32k_ENC / 4] = dft_trigo32k[STEREO_DFT_N_32k_ENC / 4];
672 :
673 74369 : flag_noisy_speech_snr = hCPE->hCoreCoder[0]->flag_noisy_speech_snr; /* flag from the previous frame */
674 :
675 : /* initializations to avoid compilation warnings */
676 74369 : sum_nrg_L = 0.0f;
677 74369 : sum_nrg_R = 0.0f;
678 74369 : sum_nrg_L_lb = 0.0f;
679 74369 : mssnr = 0.0f;
680 74369 : sfm_L = 0.0f;
681 :
682 :
683 74369 : NFFT_mid = (int16_t) ( ( min( STEREO_DFT_N_16k_ENC, NFFT ) ) * 0.5f );
684 :
685 74369 : pDFT_L = DFT_L;
686 74369 : pDFT_R = DFT_R;
687 74369 : pNrgL = bin_nrgL;
688 74369 : pNrgR = bin_nrgR;
689 74369 : xcorr[0] = 0.f;
690 74369 : xcorr[1] = 0.f;
691 74369 : log_prod_L = logf( max( FLT_MIN, ABSVAL( pDFT_L[0] ) ) );
692 74369 : log_prod_R = logf( max( FLT_MIN, ABSVAL( pDFT_R[0] ) ) );
693 74369 : prod_L = 1.0f;
694 74369 : prod_R = 1.0f;
695 74369 : sum_nrg_L = pDFT_L[0] * pDFT_L[0] + FLT_MIN;
696 74369 : sum_nrg_R = pDFT_R[0] * pDFT_R[0] + FLT_MIN;
697 74369 : sum_abs_L = ABSVAL( pDFT_L[0] ) + EPSILON;
698 74369 : sum_abs_R = ABSVAL( pDFT_R[0] ) + EPSILON;
699 74369 : xcorr_lb[0] = pDFT_L[0] * pDFT_L[0] + EPSILON;
700 74369 : sum_nrg_L_lb = xcorr_lb[0];
701 74369 : prod_LL = 1.0f;
702 74369 : prod_RR = 1.0f;
703 74369 : grand_dot_prod_real = EPSILON;
704 74369 : grand_dot_prod_img = EPSILON;
705 :
706 23798080 : for ( i = 1, j = 0; i < NFFT_mid; i++, j++ )
707 : {
708 23723711 : xcorr[2 * i] = pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1];
709 23723711 : xcorr[2 * i + 1] = pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1];
710 :
711 23723711 : pNrgL[i] = pDFT_L[2 * i] * pDFT_L[2 * i] + pDFT_L[2 * i + 1] * pDFT_L[2 * i + 1];
712 23723711 : pNrgR[i] = pDFT_R[2 * i] * pDFT_R[2 * i] + pDFT_R[2 * i + 1] * pDFT_R[2 * i + 1];
713 :
714 23723711 : Spd_L[i] = pNrgL[i];
715 23723711 : Spd_R[i] = pNrgR[i];
716 :
717 23723711 : abs_L = sqrtf( pNrgL[i] );
718 23723711 : abs_R = sqrtf( pNrgR[i] );
719 :
720 23723711 : sum_nrg_L += pNrgL[i];
721 23723711 : sum_nrg_R += pNrgR[i];
722 :
723 23723711 : sum_abs_L += abs_L;
724 23723711 : sum_abs_R += abs_R;
725 23723711 : prod_L *= abs_L;
726 23723711 : prod_R *= abs_R;
727 :
728 23723711 : grand_dot_prod_real += xcorr[2 * i];
729 23723711 : grand_dot_prod_img += xcorr[2 * i + 1];
730 23723711 : xcorr_abs[i] = sqrtf( xcorr[2 * i] * xcorr[2 * i] + xcorr[2 * i + 1] * xcorr[2 * i + 1] );
731 :
732 23723711 : prod_LL = prod_L;
733 23723711 : prod_RR = prod_R;
734 :
735 23723711 : if ( j == SFM_PROD_GRP || i == NFFT_mid - 1 )
736 : {
737 5949520 : prod_L = max( FLT_MIN, prod_L );
738 5949520 : prod_R = max( FLT_MIN, prod_R );
739 5949520 : log_prod_L += logf( prod_L );
740 5949520 : log_prod_R += logf( prod_R );
741 5949520 : prod_L = 1;
742 5949520 : prod_R = 1;
743 5949520 : j = 0;
744 : }
745 : }
746 :
747 : /* collect UNCLR classifier parameters */
748 : {
749 : float IPD, d_IPD, g_IPD, g_ILD, angle_rot, g_side, g_pred, abs_L_R, grand_nrg_DMX;
750 :
751 74369 : if ( hCPE->last_element_mode != IVAS_CPE_DFT )
752 : {
753 15448 : hStereoClassif->prev_g_IPD = 0.5f;
754 15448 : hStereoClassif->prev_IPD = 0.0f;
755 : }
756 :
757 :
758 74369 : abs_L_R = sqrtf( grand_dot_prod_real * grand_dot_prod_real + grand_dot_prod_img * grand_dot_prod_img );
759 74369 : grand_nrg_DMX = sum_nrg_L + sum_nrg_R + 2 * abs_L_R;
760 :
761 74369 : g_ILD = sqrtf( sum_nrg_L / ( sum_nrg_R + 1.0f ) );
762 74369 : g_ILD = fabsf( ( g_ILD - 1 ) / ( g_ILD + 1 ) );
763 74369 : hStereoClassif->unclr_fv[E_gainILD] = g_ILD;
764 74369 : hStereoClassif->xtalk_fv[E_gainILD] = g_ILD;
765 :
766 74369 : IPD = atan2f( grand_dot_prod_img, grand_dot_prod_real );
767 74369 : hStereoClassif->unclr_fv[E_IPD] = IPD;
768 74369 : hStereoClassif->xtalk_fv[E_IPD] = IPD;
769 :
770 74369 : d_IPD = fabsf( IPD - hStereoClassif->prev_IPD );
771 74369 : hStereoClassif->unclr_fv[E_d_IPD] = IPD; /* VM: need to replace IPD by d_IPD and re-train the UNCLR classifier for DFT stereo */
772 74369 : hStereoClassif->xtalk_fv[E_d_IPD] = d_IPD;
773 74369 : hStereoClassif->prev_IPD = IPD;
774 :
775 74369 : g_IPD = ( sum_nrg_L + sum_nrg_R + 2 * grand_dot_prod_real ) / grand_nrg_DMX;
776 74369 : if ( g_IPD >= 1.0f )
777 : {
778 855 : g_IPD = hStereoClassif->prev_g_IPD;
779 : }
780 : else
781 : {
782 73514 : hStereoClassif->prev_g_IPD = g_IPD;
783 : }
784 74369 : g_IPD = logf( 1.0f - g_IPD );
785 74369 : hStereoClassif->unclr_fv[E_gainIPD] = g_IPD;
786 74369 : hStereoClassif->xtalk_fv[E_gainIPD] = g_IPD;
787 :
788 74369 : if ( sum_nrg_L >= sum_nrg_R )
789 : {
790 23246 : sum_nrg_delta = max( sum_nrg_L - sum_nrg_R, 1.0f );
791 : }
792 : else
793 : {
794 51123 : sum_nrg_delta = min( sum_nrg_L - sum_nrg_R, -1.0f );
795 : }
796 74369 : angle_rot = fabsf( atanf( 2.0f * ( grand_dot_prod_real ) / sum_nrg_delta ) );
797 74369 : hStereoClassif->unclr_fv[E_angle_rot] = angle_rot;
798 74369 : hStereoClassif->xtalk_fv[E_angle_rot] = angle_rot;
799 :
800 74369 : g_side = fabsf( sum_nrg_L - sum_nrg_R ) / ( grand_nrg_DMX );
801 74369 : g_side = max( 0.01f, min( g_side, 0.99f ) );
802 74369 : hStereoClassif->unclr_fv[E_g_side] = g_side;
803 74369 : hStereoClassif->xtalk_fv[E_g_side] = g_side;
804 :
805 74369 : g_pred = logf( max( 0, ( ( 1 - g_side ) * sum_nrg_L + ( 1 + g_side ) * sum_nrg_R - 2 * abs_L_R ) ) + 1.0f );
806 74369 : g_pred = max( 14.0f, g_pred );
807 74369 : hStereoClassif->unclr_fv[E_g_pred] = g_pred;
808 74369 : hStereoClassif->xtalk_fv[E_g_pred] = g_pred;
809 : }
810 :
811 74369 : mvr2r( &Spd_L[1], &xcorr_lb[1], STEREO_DFT_XCORR_LB_MAX - 1 );
812 74369 : sum_nrg_L_lb = sum_nrg_L_lb + sum_f( &Spd_L[1], 11 );
813 :
814 74369 : vad_flag_itd = stereo_dft_enc_itd_vad( hItd->E_band_n, &( hItd->vad_frm_cnt ), Spd_L, Spd_R, &mssnr );
815 :
816 74369 : vad_flag_itd = vad_flag_itd && vad_flag_dtx[0];
817 :
818 74369 : if ( sum_nrg_L < EPSILON )
819 : {
820 0 : sfm_L = 0;
821 : }
822 : else
823 : {
824 74369 : sfm_L = expf( log_prod_L / ( NFFT_mid ) ) / ( sum_abs_L / ( NFFT_mid ) );
825 : }
826 :
827 74369 : if ( sum_nrg_R < EPSILON )
828 : {
829 0 : sfm_R = 0;
830 : }
831 : else
832 : {
833 74369 : sfm_R = expf( log_prod_R / ( NFFT_mid ) ) / ( sum_abs_R / ( NFFT_mid ) );
834 : }
835 :
836 74369 : if ( sfm_R > sfm_L )
837 : {
838 28686 : sfm_L = sfm_R;
839 : }
840 74369 : if ( hCPE->element_mode == IVAS_CPE_DFT )
841 : {
842 59679 : hStereoDft->sfm = sfm_L;
843 : }
844 :
845 18446849 : for ( ; i < NFFT / 2; i++ )
846 : {
847 18372480 : xcorr[2 * i] = pDFT_L[2 * i] * pDFT_R[2 * i] + pDFT_L[2 * i + 1] * pDFT_R[2 * i + 1];
848 18372480 : xcorr[2 * i + 1] = pDFT_L[2 * i + 1] * pDFT_R[2 * i] - pDFT_L[2 * i] * pDFT_R[2 * i + 1];
849 :
850 18372480 : pNrgL[i] = pDFT_L[2 * i] * pDFT_L[2 * i] + pDFT_L[2 * i + 1] * pDFT_L[2 * i + 1];
851 18372480 : pNrgR[i] = pDFT_R[2 * i] * pDFT_R[2 * i] + pDFT_R[2 * i + 1] * pDFT_R[2 * i + 1];
852 : /* Calculate L and R energy power spectrum */
853 18372480 : Spd_L[i] = pNrgL[i];
854 18372480 : Spd_R[i] = pNrgR[i];
855 : }
856 :
857 5499969 : for ( ; i < STEREO_DFT_N_32k_ENC / 2; i++ )
858 : {
859 5425600 : xcorr[2 * i] = 0.f;
860 5425600 : xcorr[2 * i + 1] = 0.f;
861 : }
862 :
863 74369 : hItd->xcorr_smooth[0] = 0.f;
864 74369 : hItd->xcorr_smooth[1] = 0.f;
865 74369 : xcorr[0] = sign( hItd->xcorr_smooth[0] );
866 74369 : xcorr[1] = sign( hItd->xcorr_smooth[1] );
867 :
868 :
869 74369 : if ( hCPE->element_mode == IVAS_CPE_DFT && ( hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) )
870 : {
871 : float alphaD, c, s, c1, s1, ctmp, vtmp;
872 898 : alphaD = -2.f * EVS_PI * ( (float) hItd->td_itd[k_offset] - hItd->td_itd[k_offset - 1] ) / hStereoDft->NFFT;
873 898 : c1 = cosf( alphaD );
874 898 : s1 = sinf( alphaD );
875 898 : c = 1.f; /* cos(0) */
876 898 : s = 0.f; /* sin(0) */
877 :
878 535040 : for ( i = 1; i < NFFT / 2; i++ )
879 : {
880 534142 : ctmp = c;
881 534142 : c = c * c1 - s * s1;
882 534142 : s = ctmp * s1 + s * c1;
883 534142 : vtmp = hItd->xcorr_smooth[2 * i] * c - hItd->xcorr_smooth[2 * i + 1] * s;
884 534142 : hItd->xcorr_smooth[2 * i + 1] = hItd->xcorr_smooth[2 * i] * s + hItd->xcorr_smooth[2 * i + 1] * c;
885 534142 : hItd->xcorr_smooth[2 * i] = vtmp;
886 : }
887 : }
888 :
889 74369 : tmpf3 = 2.f;
890 74369 : if ( flag_noisy_speech_snr )
891 : {
892 37976 : alpha = -0.8f;
893 : }
894 : else
895 : {
896 36393 : alpha = -1.0f;
897 : }
898 :
899 74369 : if ( hCPE->hCoreCoder[0]->Opt_DTX_ON && hCPE->element_mode == IVAS_CPE_DFT )
900 : {
901 30303 : if ( hCPE->hFrontVad[0] != NULL )
902 : {
903 : /* Determine if we are in hangover */
904 30303 : if ( vad_hover_flag[0] && vad_hover_flag[1] )
905 : {
906 : /* Determine if we are in the first DTX hangover frame (also triggers for VAD hangover frame) */
907 930 : if ( hStereoDft->resetFrames > CORR_RESET_FRAMES_MAX )
908 : {
909 : /* Reset cross spectrum when there is hangover */
910 166 : set_f( hStereoDft->xspec_smooth, 0.0f, STEREO_DFT_N_32k_ENC );
911 166 : hStereoDft->resetFrames = 0;
912 166 : hStereoDft->currentNumUpdates = 0;
913 : /* Expected minimum number of updates including first SID */
914 166 : hStereoDft->expectedNumUpdates = 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho );
915 : }
916 764 : else if ( hStereoDft->currentNumUpdates >= hStereoDft->expectedNumUpdates )
917 : {
918 149 : hStereoDft->expectedNumUpdates += 1 + min( hCPE->hFrontVad[0]->rem_dtx_ho, hCPE->hFrontVad[1]->rem_dtx_ho );
919 : }
920 930 : if ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates == 0 )
921 : {
922 0 : cng_xcorr_filt = max( CORR_FILT, sfm_L );
923 : }
924 : else
925 : {
926 930 : cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L );
927 : }
928 930 : hStereoDft->currentNumUpdates++;
929 539840 : for ( i = 1; i < NFFT / 2; i++ )
930 : {
931 : /* Low pass filter cross L/R power spectrum */
932 538910 : hStereoDft->xspec_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i];
933 538910 : hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hStereoDft->xspec_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1];
934 :
935 : /* Low pass filter L/R power spectrum */
936 : /* Calculate coherence as cross spectral density divided by L*R power spectrum */
937 538910 : hStereoDft->Spd_L_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_L_smooth[i] + cng_xcorr_filt * Spd_L[i];
938 538910 : hStereoDft->Spd_R_smooth[i] = ( 1.f - cng_xcorr_filt ) * hStereoDft->Spd_R_smooth[i] + cng_xcorr_filt * Spd_R[i];
939 : }
940 : }
941 29373 : else if ( vad_flag_dtx[0] == 0 )
942 : {
943 10467 : hStereoDft->resetFrames = 0;
944 : }
945 : else
946 : {
947 18906 : if ( hStereoDft->resetFrames < CORR_RESET_FRAMES_MAX + 1 )
948 : {
949 6915 : hStereoDft->resetFrames++;
950 : }
951 18906 : if ( !vad_hover_flag[0] && !vad_hover_flag[1] )
952 : {
953 17272 : hStereoDft->expectedNumUpdates = hStereoDft->currentNumUpdates;
954 : }
955 : }
956 : }
957 30303 : if ( ( vad_flag_dtx[0] == 0 ) || ( hCPE->hFrontVad[0] == NULL && ( hCPE->hCoreCoder[0]->last_core_brate == SID_2k40 || hCPE->hCoreCoder[0]->last_core_brate == FRAME_NO_DATA ) ) || hCPE->hStereoCng->first_SID_after_TD )
958 : {
959 14311 : if ( vad_flag_dtx[0] == 0 )
960 : {
961 : /* expectedNumUpdates updated after call to dtx() in SID frames */
962 10467 : if ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates == 0 )
963 : {
964 2 : cng_xcorr_filt = max( CORR_FILT, sfm_L );
965 : }
966 : else
967 : {
968 10465 : cng_xcorr_filt = max( min( CORR_FILT, 10.0f * CORR_FILT / ( hStereoDft->expectedNumUpdates + hStereoDft->currentNumUpdates ) ), sfm_L );
969 : }
970 10467 : hStereoDft->currentNumUpdates++;
971 10467 : hStereoDft->sfm = cng_xcorr_filt;
972 : }
973 : else /* use sfm for active frames */
974 : {
975 3844 : cng_xcorr_filt = sfm_L;
976 : }
977 :
978 : /* Copy state of xspec_smooth to xcorr_smooth in first CNG frame */
979 14311 : if ( hCPE->hStereoCng->cng_counter == 0 && vad_flag_dtx[0] == 0 )
980 : {
981 209 : mvr2r( hStereoDft->xspec_smooth, hItd->xcorr_smooth, NFFT );
982 : }
983 8239680 : for ( i = 1; i < NFFT / 2; i++ )
984 : {
985 : /* Low pass filter cross L/R power spectrum */
986 8225369 : hStereoDft->xspec_smooth[2 * i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i] + XSPEC_ALPHA * xcorr[2 * i];
987 8225369 : hStereoDft->xspec_smooth[2 * i + 1] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->xspec_smooth[2 * i + 1] + XSPEC_ALPHA * xcorr[2 * i + 1];
988 8225369 : hItd->xcorr_smooth[2 * i] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i] + cng_xcorr_filt * xcorr[2 * i];
989 8225369 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - cng_xcorr_filt ) * hItd->xcorr_smooth[2 * i + 1] + cng_xcorr_filt * xcorr[2 * i + 1];
990 8225369 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
991 8225369 : tmpf1 += EPSILON;
992 8225369 : tmpf2 = tmpf1;
993 8225369 : tmpf1 = powf( tmpf1, alpha );
994 8225369 : tmpf3 += tmpf2 * tmpf1;
995 8225369 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
996 8225369 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
997 :
998 : /* Low pass filter L/R power spectrum */
999 : /* Calculate coherence as cross spectral density divided by L*R power spectrum */
1000 8225369 : hStereoDft->Spd_L_smooth[i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->Spd_L_smooth[i] + XSPEC_ALPHA * Spd_L[i];
1001 8225369 : hStereoDft->Spd_R_smooth[i] = ( 1.f - XSPEC_ALPHA ) * hStereoDft->Spd_R_smooth[i] + XSPEC_ALPHA * Spd_R[i];
1002 : }
1003 : }
1004 : else
1005 : {
1006 9144000 : for ( i = 1; i < NFFT / 2; i++ )
1007 : {
1008 9128008 : hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i];
1009 9128008 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1];
1010 9128008 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1011 9128008 : tmpf1 += EPSILON;
1012 9128008 : tmpf2 = tmpf1;
1013 9128008 : tmpf1 = powf( tmpf1, alpha );
1014 9128008 : tmpf3 += tmpf2 * tmpf1;
1015 9128008 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1016 9128008 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1017 : }
1018 : }
1019 : }
1020 : else
1021 : {
1022 24786880 : for ( i = 1; i < NFFT / 2; i++ )
1023 : {
1024 24742814 : hItd->xcorr_smooth[2 * i] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i] + sfm_L * xcorr[2 * i];
1025 24742814 : hItd->xcorr_smooth[2 * i + 1] = ( 1.f - sfm_L ) * hItd->xcorr_smooth[2 * i + 1] + sfm_L * xcorr[2 * i + 1];
1026 24742814 : tmpf1 = sqrtf( hItd->xcorr_smooth[i * 2] * hItd->xcorr_smooth[i * 2] + hItd->xcorr_smooth[i * 2 + 1] * hItd->xcorr_smooth[i * 2 + 1] );
1027 24742814 : tmpf1 += EPSILON;
1028 24742814 : tmpf2 = tmpf1;
1029 24742814 : tmpf1 = powf( tmpf1, alpha );
1030 24742814 : tmpf3 += tmpf2 * tmpf1;
1031 24742814 : xcorr[2 * i] = hItd->xcorr_smooth[2 * i] * tmpf1;
1032 24742814 : xcorr[2 * i + 1] = hItd->xcorr_smooth[2 * i + 1] * tmpf1;
1033 : }
1034 : }
1035 74369 : tmpf1 = (float) ( NFFT / 2 + 1 ) / tmpf3;
1036 84415489 : for ( i = 0; i < NFFT; i++ )
1037 : {
1038 84341120 : xcorr[i] *= tmpf1;
1039 : }
1040 : /*calculate mean E ratio of main to background signal for cohSNR*/
1041 74369 : if ( hCPE->element_mode == IVAS_CPE_DFT )
1042 : {
1043 59679 : mEr = calc_mean_E_ratio( hItd, hStereoDft->nbands, hStereoDft->band_limits, sfm_L, pNrgL, pNrgR, &total_mEr );
1044 : }
1045 : else
1046 : {
1047 : int16_t nbands;
1048 : int16_t band_limits[STEREO_DFT_BAND_MAX + 1];
1049 :
1050 14690 : set_s( band_limits, 0, STEREO_DFT_BAND_MAX + 1 );
1051 14690 : set_band_limits( &nbands, band_limits, hCPE->hStereoMdct->hDft_ana->NFFT );
1052 14690 : mEr = calc_mean_E_ratio( hItd, nbands, band_limits, sfm_L, pNrgL, pNrgR, &total_mEr );
1053 : }
1054 :
1055 : /*calculate total cohSNR for frame in dB*/
1056 74369 : if ( mEr > 1.0f )
1057 : {
1058 71148 : cohSNR = 20 * log10f( mEr );
1059 : }
1060 : else
1061 : {
1062 3221 : cohSNR = 0;
1063 : }
1064 :
1065 : /* collect UNCLR classifier parameters */
1066 : {
1067 : float es_em, d_prodL_prodR;
1068 :
1069 74369 : if ( total_mEr < 1.0f )
1070 : {
1071 12616 : hStereoClassif->unclr_fv[E_cohSNR] = 0;
1072 : }
1073 : else
1074 : {
1075 61753 : hStereoClassif->unclr_fv[E_cohSNR] = 20 * log10f( total_mEr );
1076 : }
1077 :
1078 74369 : es_em = fabsf( sum_nrg_L - sum_nrg_R ) / ( sum_nrg_L + sum_nrg_R + 1e-5f );
1079 74369 : hStereoClassif->unclr_fv[E_es_em] = es_em;
1080 74369 : hStereoClassif->xtalk_fv[E_es_em] = es_em;
1081 :
1082 74369 : d_prodL_prodR = logf( max( prod_LL, prod_RR ) / ( min( prod_LL, prod_RR ) + 1e-5f ) + 1.0f );
1083 74369 : hStereoClassif->unclr_fv[E_d_prodL_prodR] = d_prodL_prodR;
1084 74369 : hStereoClassif->xtalk_fv[E_d_prodL_prodR] = d_prodL_prodR;
1085 :
1086 74369 : sum_xcorr = 0.0f;
1087 23798080 : for ( i = 1; i < NFFT_mid; i++ )
1088 : {
1089 23723711 : xcorr_abs[i] = logf( xcorr_abs[i] / ( sum_nrg_L + sum_nrg_R + 1e-5f ) + 1e-5f );
1090 23723711 : sum_xcorr += xcorr_abs[i];
1091 : }
1092 :
1093 74369 : hStereoClassif->unclr_fv[E_sum_xcorr] = sum_xcorr;
1094 74369 : hStereoClassif->xtalk_fv[E_sum_xcorr] = sum_xcorr;
1095 :
1096 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1097 : dbgwrite( &hStereoClassif->unclr_fv[E_cohSNR], sizeof( float ), 1, 1, "res/stereo_clf_cohSNR.x" );
1098 : dbgwrite( &hStereoClassif->unclr_fv[E_es_em], sizeof( float ), 1, 1, "res/stereo_clf_es_em.x" );
1099 : dbgwrite( &hStereoClassif->unclr_fv[E_d_prodL_prodR], sizeof( float ), 1, 1, "res/stereo_clf_d_prodL_prodR.x" );
1100 : dbgwrite( &hStereoClassif->unclr_fv[E_sum_xcorr], sizeof( float ), 1, 1, "res/stereo_clf_sum_xcorr.x" );
1101 : #endif
1102 : }
1103 :
1104 : /* reset estimates when silence is detected*/
1105 74369 : if ( ( sum_nrg_L && sum_nrg_R ) < EPSILON )
1106 : {
1107 0 : resetEstimates( hItd );
1108 : }
1109 :
1110 : /*smooth cohSNR with time */
1111 74369 : if ( ( hItd->cohSNR - cohSNR ) < 10.0f )
1112 : {
1113 66649 : tmpf1 = max( 0.05f, min( 0.25f, sfm_L * 0.5f ) );
1114 66649 : hItd->cohSNR = ( 1.f - tmpf1 ) * hItd->cohSNR + tmpf1 * cohSNR;
1115 : }
1116 : else
1117 : {
1118 7720 : hItd->cohSNR = hItd->cohSNR - 0.05f;
1119 : }
1120 :
1121 74369 : cohSNR = hItd->cohSNR;
1122 :
1123 74369 : rfft( xcorr, trigo_enc, STEREO_DFT_N_32k_ENC, +1 );
1124 :
1125 74369 : itd_td = hItd->td_itd_32k[k_offset]; /* This ITD always operates at 32kHz*/
1126 74369 : shift = ( STEREO_DFT_N_32k_ENC / 2 - itd_td ) % STEREO_DFT_N_32k_ENC;
1127 74369 : split = STEREO_DFT_N_32k_ENC - shift;
1128 :
1129 74369 : mvr2r( &xcorr[0], &xcorr_itd[shift], split );
1130 74369 : mvr2r( &xcorr[split], &xcorr_itd[0], shift );
1131 :
1132 74369 : mvr2r( &xcorr_itd[STEREO_DFT_N_32k_ENC / 2 - XTALK_PHAT_LEN], gcc_phat, 2 * XTALK_PHAT_LEN + 1 );
1133 :
1134 : #ifdef DEBUG_MODE_DFT
1135 : dbgwrite( gcc_phat, sizeof( float ), 2 * XTALK_PHAT_LEN + 1, 1, "res/gcc_phat" );
1136 : #endif
1137 :
1138 74369 : thres = peak_detect( xcorr_itd, &tmpf1, &index, &zero_itd, cohSNR, hCPE->hCoreCoder[0]->vad_flag, &second_max, &second_max_lag, hItd->prev_itd, flag_noisy_speech_snr, hItd->detected_itd_flag, &hItd->prev_max, &hItd->prev_index, &hItd->prev_avg_max, &total_max );
1139 :
1140 74369 : hStereoClassif->ave_ener_L = sum_nrg_L / ( NFFT_mid * NFFT_mid );
1141 74369 : hStereoClassif->ave_ener_R = sum_nrg_R / ( NFFT_mid * NFFT_mid );
1142 :
1143 74369 : if ( hCPE->hCoreCoder[0]->input_Fs == 16000 )
1144 : {
1145 16955 : total_max *= 2.0f;
1146 : }
1147 74369 : hStereoClassif->unclr_fv[E_xcorr_itd_value] = total_max;
1148 74369 : hStereoClassif->xtalk_fv[E_xcorr_itd_value] = total_max;
1149 :
1150 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1151 : {
1152 : int16_t stmp = index - STEREO_DFT_ITD_MAX_ANA;
1153 : dbgwrite( &stmp, sizeof( int16_t ), 1, 1, "res/raw_itd.x" );
1154 : }
1155 : dbgwrite( &hStereoClassif->unclr_fv[E_xcorr_itd_value], sizeof( float ), 1, 1, "res/stereo_clf_raw_itd.x" );
1156 : #endif
1157 :
1158 : #ifdef DEBUG_MODE_DFT
1159 : {
1160 : int16_t tmp;
1161 :
1162 : tmp = (int16_t) ( tmpf1 * 100.f / thres );
1163 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_thres0.pcm" );
1164 : }
1165 : #endif
1166 : /*for tonal music items increase thresholing by a factor up to 2.*/
1167 74369 : if ( hCPE->hCoreCoder[0]->sp_aud_decision0 && ( index - STEREO_DFT_ITD_MAX_ANA ) != hItd->prev_itd && !flag_noisy_speech_snr && hCPE->hCoreCoder[0]->vad_flag && tmpf1 < 0.3 )
1168 : {
1169 1406 : thres *= 1.0f + 1.f * min( 1.f, max( 0.f, ( -1.0f * sfm_L + 0.5f ) / ( 0.5f - 0.2f ) ) );
1170 : }
1171 :
1172 : #ifdef DEBUG_MODE_DFT
1173 : {
1174 : int16_t tmp;
1175 :
1176 : tmp = (int16_t) ( sfm_L * 100.f );
1177 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_sfm.pcm" );
1178 : tmp = (int16_t) ( tmpf1 * 100.f / thres );
1179 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_thres.pcm" );
1180 : }
1181 : #endif
1182 :
1183 74369 : itd_cal_flag = 0;
1184 : /*smooth threshold value depending on sfm for music items*/
1185 74369 : if ( hCPE->hCoreCoder[0]->ini_frame == 0 || hCPE->last_element_mode != IVAS_CPE_DFT || !hCPE->hCoreCoder[0]->sp_aud_decision0 || flag_noisy_speech_snr || cohSNR < 20 )
1186 : {
1187 70352 : hItd->itd_thres = thres;
1188 : }
1189 : else
1190 : {
1191 4017 : hItd->itd_thres = ( 1.0f - sfm_L ) * hItd->itd_thres + sfm_L * thres;
1192 : }
1193 :
1194 74369 : if ( flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->vad_flag == 1 && hItd->detected_itd_flag == 0 && ( hItd->currFlatness < 1.5f || hCPE->hCoreCoder[0]->sp_aud_decision0 == 1 ) )
1195 : {
1196 4768 : hItd->itd_thres *= 1.5f;
1197 : }
1198 74369 : if ( hCPE->hCoreCoder[0]->vad_flag == 0 || hItd->detected_itd_flag == 0 )
1199 : {
1200 24942 : hItd->itd_tracking = 0;
1201 : }
1202 49427 : else if ( abs( hItd->prev_itd ) > 2 )
1203 : {
1204 34613 : hItd->itd_tracking = 1;
1205 : }
1206 :
1207 74369 : if ( hItd->itd_tracking == 1 && ( second_max > hItd->itd_thres || tmpf1 - second_max < min( tmpf1 * 0.5f, 0.2f ) ) )
1208 : {
1209 35728 : index = second_max_lag;
1210 : }
1211 :
1212 74369 : if ( hItd->itd_tracking == 1 && abs( hItd->prev_itd - ( index - STEREO_DFT_ITD_MAX_ANA ) ) <= max( 2, (int16_t) round_f( abs( hItd->prev_itd ) / 16.f ) ) )
1213 : {
1214 35728 : hItd->itd_thres *= 0.75f;
1215 : }
1216 :
1217 74369 : if ( tmpf1 > hItd->itd_thres && !zero_itd )
1218 : {
1219 : /* LP filter GCC PHAT peak to follow peak envelope */
1220 57902 : if ( tmpf1 > hItd->lp_phat_peak )
1221 : {
1222 17636 : alpha = LP_GCC_PHAT_UP;
1223 : }
1224 : else
1225 : {
1226 40266 : alpha = LP_GCC_PHAT_DOWN;
1227 : }
1228 57902 : hItd->lp_phat_peak = alpha * tmpf1 + ( 1 - alpha ) * hItd->lp_phat_peak;
1229 57902 : hItd->itd_cnt++;
1230 57902 : if ( hItd->itd_cnt > ITD_CNT_MAX || hItd->itd_hangover > 0 )
1231 : {
1232 : /* If max count is reached, or if an ITD candidate is found during hangover,
1233 : set itd_cnt = ITD_CNT_MAX to ensure hangover is applied */
1234 55699 : hItd->itd_cnt = ITD_CNT_MAX;
1235 : }
1236 57902 : hItd->itd_hangover = 0;
1237 :
1238 57902 : itd = index - STEREO_DFT_ITD_MAX_ANA;
1239 57902 : hItd->itd_nonzero_cnt = 0; /* (1+0+9) <= hItd->itd_nonzero_cnt <= (1+6+3) */
1240 57902 : itd_cal_flag = 1; /* Indicates P>T case */
1241 57902 : hItd->valid_itd_cnt = hItd->itd_cnt; /* Store last non-zero value (when P>T) before reset */
1242 57902 : hItd->detected_itd_flag = 1;
1243 : }
1244 : else
1245 : {
1246 : /* Set prev_itd hangover period */
1247 16467 : if ( hItd->itd_cnt == ITD_CNT_MAX )
1248 : {
1249 723 : hItd->itd_hangover = max( 0, min( ITD_HO_MAX, (int16_t) ( hItd->lp_phat_peak * ITD_HO_GCC_PHAT_INCL + ITD_HO_GCC_PHAT_OFFS ) ) );
1250 : }
1251 :
1252 16467 : if ( hItd->itd_hangover > 0 )
1253 : {
1254 2151 : itd = hItd->prev_itd;
1255 2151 : if ( hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER )
1256 : {
1257 1761 : hItd->itd_nonzero_cnt++;
1258 : }
1259 2151 : hItd->itd_hangover--;
1260 2151 : hItd->detected_itd_flag = 1;
1261 : }
1262 : else
1263 : {
1264 14316 : itd = 0;
1265 14316 : hItd->detected_itd_flag = 0;
1266 : }
1267 :
1268 : /* Reset */
1269 16467 : hItd->itd_cnt = 0;
1270 16467 : hItd->lp_phat_peak = 0.0f;
1271 : }
1272 :
1273 : {
1274 : /* stereo Xtalk classifier */
1275 74369 : xtalk_classifier_dft( hCPE, itd, gcc_phat );
1276 : }
1277 :
1278 : /*avoid enabling ITD fine control for music*/
1279 74369 : if ( !hCPE->hCoreCoder[0]->sp_aud_decision0 || flag_noisy_speech_snr || cohSNR < 20 )
1280 : {
1281 : /* ITD fine control base on vad and correlation parameters */
1282 69563 : cor_lb_avrg = 0.0f;
1283 69563 : par_L_avrg = 0.0f;
1284 278252 : for ( i = 0; i < XCORR_LB_NUM_BANDS; i++ )
1285 : {
1286 208689 : num_cor = xcorr_lb[i * XCORR_LB_BAND_WIDTH] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1287 208689 : den_cor_cur = xcorr_lb[i * XCORR_LB_BAND_WIDTH] * xcorr_lb[i * XCORR_LB_BAND_WIDTH] + 1.0f;
1288 208689 : den_cor_prev = hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH] + 1.0f;
1289 208689 : xcorr_max = xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1290 208689 : sum_nrg_L_tmp = xcorr_lb[i * XCORR_LB_BAND_WIDTH];
1291 1669512 : for ( j = 1; j < XCORR_LB_BAND_WIDTH; j++ )
1292 : {
1293 1460823 : num_cor += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1294 1460823 : den_cor_cur += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1295 1460823 : den_cor_prev += hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] * hItd->prev_xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1296 1460823 : if ( xcorr_lb[i * XCORR_LB_BAND_WIDTH + j] > xcorr_max )
1297 : {
1298 394034 : xcorr_max = xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1299 : }
1300 1460823 : sum_nrg_L_tmp += xcorr_lb[i * XCORR_LB_BAND_WIDTH + j];
1301 : }
1302 208689 : cor_lb[i] = num_cor / ( sqrtf( den_cor_cur ) * sqrtf( den_cor_prev ) );
1303 208689 : cor_lb_avrg += cor_lb[i];
1304 208689 : par_L[i] = xcorr_max / ( sum_nrg_L_tmp + FLT_MIN );
1305 208689 : par_L_avrg += par_L[i];
1306 : }
1307 69563 : cor_lb_avrg /= XCORR_LB_NUM_BANDS;
1308 69563 : par_L_avrg /= XCORR_LB_NUM_BANDS;
1309 :
1310 : /*Breakdown of fine-control conditions */
1311 69563 : fc_condition_1 = abs( hItd->prev_itd ) > 0.2f * abs( itd );
1312 69563 : fc_condition_2 = cor_lb_avrg > 0.85f;
1313 69563 : fc_condition_3 = ( cor_lb_avrg > 0.7f && ( cor_lb[0] > 0.9f || cor_lb[1] > 0.9f || cor_lb[2] > 0.9f ) && hItd->prev_sum_nrg_L_lb > 0.5f * sum_nrg_L_lb && hItd->prev_sum_nrg_L_lb < 2.0f * sum_nrg_L_lb );
1314 69563 : fc_condition_4 = par_L_avrg > 0.6f;
1315 69563 : fc_condition_5 = hItd->prev_itd != 0;
1316 69563 : fc_condition_6_a = itd * hItd->prev_itd < 0; /* ITD sign change */
1317 69563 : fc_condition_6_b = itd * hItd->prev_itd == 0; /* ITD jump to zero */
1318 69563 : fc_condition_6_c = abs( itd - hItd->prev_itd ) > 0.5f * ( ( abs( itd ) > abs( hItd->prev_itd ) ) ? abs( itd ) : abs( hItd->prev_itd ) ); /* Magnitude of the ITD jump */
1319 :
1320 : /* Combining conditions 1,2,3,4 */
1321 69563 : fc_condition_1234 = fc_condition_1 && ( fc_condition_2 || fc_condition_3 || fc_condition_4 );
1322 :
1323 69563 : if ( ( fc_condition_1234 && ( ( fc_condition_5 && fc_condition_6_b ) || fc_condition_6_c ) ) || ( fc_condition_1234 && fc_condition_6_a ) )
1324 : {
1325 521 : itd = hItd->prev_itd;
1326 :
1327 521 : if ( hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER )
1328 : {
1329 407 : hItd->itd_nonzero_cnt++;
1330 : }
1331 521 : hItd->detected_itd_flag = 1;
1332 : }
1333 :
1334 : /* stop the fine control when inactive or very high mssnr is detected*/
1335 69563 : if ( mssnr < 6e-7f * HIGHT_SNR_VOICE_TH || mssnr > 200 * HIGHT_SNR_VOICE_TH )
1336 : {
1337 13758 : hItd->itd_nonzero_cnt = MAX_ITD_VAD_HANGOVER;
1338 : }
1339 :
1340 69563 : if ( vad_flag_itd )
1341 : {
1342 : /* Fine-control for hangover if set HR period = 0 or if HR period expires */
1343 : /* However fine-control shouldn't be used when HR is disabled because itd_cnt < 2 - hence the extra last condition */
1344 56342 : if ( hItd->itd_hangover == 0 && hItd->prev_itd != 0 && itd == 0 && itd_cal_flag != 1 && hItd->itd_nonzero_cnt < MAX_ITD_VAD_HANGOVER && hItd->valid_itd_cnt == ITD_CNT_MAX && hItd->pre_vad == 1 )
1345 : {
1346 137 : itd = hItd->prev_itd;
1347 137 : hItd->itd_nonzero_cnt++;
1348 137 : hItd->detected_itd_flag = 1;
1349 : }
1350 56342 : hItd->pre_vad = 1;
1351 : }
1352 : else
1353 : {
1354 13221 : hItd->pre_vad = 0;
1355 : }
1356 :
1357 69563 : if ( itd == 0 && itd_cal_flag != 1 )
1358 : {
1359 13711 : hItd->itd_nonzero_cnt = 0;
1360 : }
1361 :
1362 69563 : hItd->prev_sum_nrg_L_lb = sum_nrg_L_lb;
1363 69563 : mvr2r( xcorr_lb, hItd->prev_xcorr_lb, STEREO_DFT_XCORR_LB_MAX );
1364 : }
1365 : /*save previous flag*/
1366 74369 : prev_itd_max = hItd->hybrid_itd_max;
1367 : /* enable hybrid ITD handling for very large ITDs*/
1368 74369 : hItd->hybrid_itd_max = ( abs( itd ) > STEREO_DFT_ITD_MAX && abs( itd ) < STEREO_DFT_ITD_MAX_ANA && !hCPE->hCoreCoder[0]->sp_aud_decision0 && hCPE->element_brate < IVAS_32k );
1369 : /* Update memory */
1370 74369 : hItd->prev_itd = itd;
1371 :
1372 74369 : itd = check_bounds_s( itd, -STEREO_DFT_ITD_MAX, STEREO_DFT_ITD_MAX );
1373 :
1374 : /*Inverse the time diff*/
1375 74369 : hItd->itd[k_offset] = -1.f * itd;
1376 :
1377 : /* collect UNCLR classifier parameters */
1378 74369 : hStereoClassif->unclr_fv[E_ITD] = hItd->itd[k_offset];
1379 :
1380 : #if defined( DEBUG_MODE_DFT ) || defined( DEBUG_MODE_TD )
1381 : dbgwrite( &hItd->itd[k_offset], sizeof( float ), 1, 1, "res/stereo_clf_ITD.x" );
1382 : #endif
1383 :
1384 : /* limit ITD range for MDCT stereo even more */
1385 74369 : if ( hCPE->element_mode == IVAS_CPE_MDCT && fabsf( hItd->itd[k_offset] ) > ITD_MAX_MDCT )
1386 : {
1387 3103 : itd = 0;
1388 : }
1389 :
1390 74369 : stereo_dft_quantize_itd( -1 * itd, hItd->itd + k_offset, input_frame * FRAMES_PER_SEC, hItd->itd_index + k_offset );
1391 :
1392 74369 : hItd->deltaItd[k_offset] = hItd->itd[k_offset] - hItd->td_itd[k_offset];
1393 :
1394 74369 : if ( hItd->hybrid_itd_max )
1395 : {
1396 : /*check if there is an ITD flip*/
1397 512 : itd_max_flip = ( hItd->itd[k_offset] * hItd->itd[k_offset - 1] < 0 );
1398 :
1399 512 : if ( hItd->deltaItd[k_offset - 1] != 0 && itd_max_flip == 0 )
1400 : {
1401 490 : int16_t tmp_itd = (int16_t) floorf( ( ( hItd->prev_itd ) * ( (float) input_frame / 640 ) ) + 0.5f );
1402 490 : hItd->deltaItd[k_offset] = -1.0f * tmp_itd - hItd->td_itd[k_offset];
1403 : }
1404 : }
1405 : /*signal change for next frame*/
1406 74369 : if ( prev_itd_max == 1 && hItd->hybrid_itd_max == 0 )
1407 : {
1408 21 : hItd->hybrid_itd_max = -1;
1409 : }
1410 :
1411 : #ifdef DEBUG_MODE_DFT
1412 : {
1413 : int16_t tmp;
1414 : static FILE *log_fid = NULL;
1415 :
1416 : tmp = (int16_t) hItd->itd[k_offset];
1417 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_combined_fc.pcm" );
1418 : dbgwrite( &tmp, sizeof( int16_t ), 1, input_frame, "./res/stereo_dft_itd_combined_fc.txt" );
1419 :
1420 : if ( log_fid == NULL )
1421 : log_fid = fopen( "./res/itd_log_td_itd.txt", "w" );
1422 : fprintf( log_fid, "frame: %d\t itd: %f\t td_itd: %d\t delta_itd: %f\n", frame, hItd->itd[1], (int16_t) hItd->td_itd[1], hItd->deltaItd[1] );
1423 : }
1424 : #endif
1425 :
1426 74369 : return;
1427 : }
|