Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include <math.h>
35 : #include "options.h"
36 : #include "cnst.h"
37 : #include "rom_com.h"
38 : #include "prot.h"
39 : #include "ivas_prot.h"
40 : #include "ivas_rom_com.h"
41 : #include "ivas_rom_enc.h"
42 : #include "ivas_cnst.h"
43 : #ifdef DEBUGGING
44 : #include "debug.h"
45 : #endif
46 : #include "wmc_auto.h"
47 :
48 :
49 : /*-------------------------------------------------------------------*
50 : * Local constants
51 : *-------------------------------------------------------------------*/
52 :
53 : #define RC_FACT_UP 0.3f
54 : #define RC_FACT_DOWN 0.7f
55 : #define UNCLR_SCORE_THR 4.0f
56 : #define XTALK_SCORE_THR_DFT 4.0f
57 : #define XTALK_SCORE_THR_TD_UP 3.0f
58 : #define XTALK_SCORE_THR_TD_DN 4.0f
59 :
60 : #define UNCLR_INTERCEPT_TD 0.780313f
61 : #define UNCLR_INTERCEPT_DFT 1.226513f
62 : #define XTALK_INTERCEPT_TD -1.770983f
63 : #define XTALK_INTERCEPT_DFT -0.758556f
64 :
65 : #define EDGE_MAX_LEN 30 /* maximum length of buffer for edge detection */
66 : #define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */
67 :
68 : #define CLASSIFIER_ITD_THRES 8 /* ITD threshold in samples that enables classifier to switch */
69 :
70 :
71 : /*-------------------------------------------------------------------*
72 : * Local function prototypes
73 : *-------------------------------------------------------------------*/
74 :
75 : static void rc_filter( const float x, float *y, const int16_t order, const float tau );
76 :
77 : static void edge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max, float *edge_str, int16_t *edge_type );
78 :
79 : static float redge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max );
80 :
81 :
82 : /*-------------------------------------------------------------------*
83 : * Function select_stereo_mode()
84 : *
85 : * Select stereo technology based on output of stereo classifiers
86 : *-------------------------------------------------------------------*/
87 :
88 : /*! r: element mode */
89 150083 : int16_t select_stereo_mode(
90 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
91 : const IVAS_FORMAT ivas_format /* i : IVAS format */
92 : )
93 : {
94 : int16_t element_mode;
95 : STEREO_CLASSIF_HANDLE hStereoClassif;
96 : int16_t is_speech;
97 : int16_t stereo_switching_flag;
98 :
99 : /* initialization */
100 150083 : element_mode = hCPE->element_mode;
101 150083 : hStereoClassif = hCPE->hStereoClassif;
102 :
103 : /* set binary flag to prevent LRTD mode on music */
104 150083 : hStereoClassif->is_speech = 0.97f * hStereoClassif->is_speech + 0.03f * hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0];
105 150083 : is_speech = ( hStereoClassif->is_speech < 1.0f && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk < 0.0f );
106 :
107 : /* set binary flag indicating LRTD mode based on unclr/xtalk classifiers' decisions */
108 150083 : hStereoClassif->prev_lrtd_mode = hStereoClassif->lrtd_mode;
109 150083 : hStereoClassif->unclr_decision = ( hStereoClassif->unclr_decision && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && hCPE->element_brate > IVAS_16k4 );
110 150083 : hStereoClassif->lrtd_mode = ( ( hStereoClassif->unclr_decision | hStereoClassif->xtalk_decision ) && is_speech );
111 :
112 150083 : stereo_switching_flag = 1;
113 :
114 150083 : if ( hCPE->element_brate >= MIN_BRATE_MDCT_STEREO || ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && hCPE->element_brate < MASA_STEREO_MIN_BITRATE )
115 : #ifdef DEBUGGING
116 : || ( hCPE->stereo_mode_cmdl == IVAS_CPE_DFT || hCPE->stereo_mode_cmdl == IVAS_CPE_TD )
117 : #endif
118 : )
119 : {
120 92588 : stereo_switching_flag = 0;
121 : }
122 :
123 150083 : if ( hCPE->element_brate >= MIN_BRATE_MDCT_STEREO )
124 : {
125 86712 : hStereoClassif->prev_lrtd_mode = 0;
126 86712 : hStereoClassif->lrtd_mode = 0;
127 86712 : element_mode = IVAS_CPE_MDCT;
128 : }
129 63371 : else if ( hCPE->element_brate < MIN_BRATE_MDCT_STEREO && hCPE->last_element_mode == IVAS_CPE_MDCT )
130 : {
131 732 : hStereoClassif->lrtd_mode = 0;
132 732 : element_mode = IVAS_CPE_DFT;
133 :
134 732 : if ( stereo_switching_flag == 1 && hCPE->element_brate > IVAS_13k2 && hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0] < 1.0f && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk < -0.01f && hCPE->hCoreCoder[0]->vad_flag == 1 && ( hCPE->hStereoMdct->sw_uncorr || hStereoClassif->xtalk_decision ) )
135 : {
136 8 : hStereoClassif->lrtd_mode = 1;
137 8 : element_mode = IVAS_CPE_TD;
138 : }
139 : }
140 :
141 : /* set the element mode */
142 150083 : if ( hStereoClassif->lrtd_mode == 1 && stereo_switching_flag == 1 )
143 : {
144 4001 : element_mode = IVAS_CPE_TD;
145 : }
146 146082 : else if ( element_mode < IVAS_CPE_MDCT )
147 : {
148 59370 : if ( stereo_switching_flag == 0 )
149 : {
150 5876 : if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && hCPE->element_brate < MASA_STEREO_MIN_BITRATE )
151 : {
152 5876 : element_mode = IVAS_CPE_DFT;
153 : }
154 : #ifdef DEBUGGING
155 : if ( hCPE->stereo_mode_cmdl > 1 )
156 : {
157 : element_mode = hCPE->stereo_mode_cmdl;
158 : }
159 : #endif
160 : }
161 53494 : else if ( element_mode == IVAS_CPE_TD )
162 : {
163 20 : if ( hCPE->hStereoTD->prev_fr_LRTD_TD_dec > 0 && is_speech )
164 : {
165 : /* if unclr_decision goes from 1->0 on active content, continue in LRTD mode */
166 19 : hStereoClassif->lrtd_mode = 1;
167 : }
168 1 : else if ( stereo_switching_flag == 1 )
169 : {
170 1 : element_mode = IVAS_CPE_DFT;
171 : }
172 : }
173 53474 : else if ( stereo_switching_flag == 1 )
174 : {
175 53474 : element_mode = IVAS_CPE_DFT;
176 : }
177 : }
178 :
179 : /* switch from LRTD to DFT when xtalk_decision goes from 0->1 (note: this special case is not handled in the xtalk classifier) */
180 150083 : if ( hCPE->last_element_mode == IVAS_CPE_TD && element_mode == IVAS_CPE_TD && hStereoClassif->xtalk_decision == 1 )
181 : {
182 2062 : if ( hCPE->hStereoTD->prev_fr_LRTD_TD_dec == 0 && hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt > 15 && hCPE->hStereoTD->tdm_last_LRTD_frame_cnt > 3 && hCPE->hCoreCoder[0]->clas < VOICED_CLAS && ( hCPE->element_brate >= IVAS_16k4 || hStereoClassif->xtalk_wscore < 0.01f ) )
183 : {
184 17 : if ( stereo_switching_flag == 1 )
185 : {
186 17 : element_mode = IVAS_CPE_DFT;
187 : }
188 17 : hStereoClassif->xtalk_decision = 0;
189 17 : hStereoClassif->lrtd_mode = 0;
190 : }
191 : }
192 :
193 150083 : if ( hCPE->last_element_mode != element_mode )
194 : {
195 1737 : if ( hCPE->last_element_mode != IVAS_CPE_DFT && hCPE->last_element_mode != IVAS_CPE_TD )
196 732 : {
197 732 : int16_t lrtd_mode = hStereoClassif->lrtd_mode;
198 :
199 : /* reset stereo classifier when switching from MDCT stereo to Unified stereo */
200 732 : stereo_classifier_init( hCPE->hStereoClassif );
201 :
202 732 : hStereoClassif->lrtd_mode = lrtd_mode;
203 : }
204 : else
205 : {
206 : /* reset UNCLR classifier parameters */
207 1005 : set_f( hStereoClassif->unclr_fv, -1.0f, SSC_MAX_NFEA );
208 1005 : hStereoClassif->unclr_corrLagMax_prev = 0;
209 :
210 : /* reset xtalk classifier parameters */
211 1005 : set_f( hStereoClassif->xtalk_fv, -1.0f, SSC_MAX_NFEA );
212 : }
213 : }
214 : #ifdef DEBUG_MODE_TD
215 : dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, L_FRAME16k, "res/unclr_decision.enc" );
216 : dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, L_FRAME16k, "res/xtalk_decision.enc" );
217 : dbgwrite( &hCPE->hCoreCoder[0]->sp_aud_decision0, sizeof( int16_t ), 1, L_FRAME16k, "res/sp_aud_class.enc" );
218 : dbgwrite( &hCPE->hCoreCoder[0]->last_core, sizeof( int16_t ), 1, L_FRAME16k, "res/last_core.enc" );
219 : #endif
220 :
221 150083 : if ( element_mode == IVAS_CPE_TD && hCPE->hCoreCoder[0]->Opt_DTX_ON )
222 : {
223 1877 : hCPE->hStereoCng->td_active = 1;
224 1877 : hCPE->hStereoCng->first_SID_after_TD = 1;
225 : }
226 :
227 150083 : return ( element_mode );
228 : }
229 :
230 :
231 : /*-------------------------------------------------------------------*
232 : * Function stereo_classifier_init()
233 : *
234 : * Initialize stereo classifier handle
235 : *-------------------------------------------------------------------*/
236 :
237 3612 : void stereo_classifier_init(
238 : STEREO_CLASSIF_HANDLE hStereoClassif /* i/o: stereo classifier structure */
239 : )
240 : {
241 : /* initialization of features for xtalk classifier and UNCLR classifier */
242 3612 : hStereoClassif->clas_ch1 = 0;
243 3612 : set_s( hStereoClassif->pitch_ch1, 0, 3 );
244 3612 : set_f( hStereoClassif->voicing_ch1, 0.0f, 3 );
245 3612 : hStereoClassif->cor_map_sum_ch1 = 0.0f;
246 3612 : set_f( hStereoClassif->lsf_ch1, 0.0f, M );
247 3612 : hStereoClassif->lepsP_ch1 = 0.0f;
248 3612 : hStereoClassif->dE1_ch1 = 0.0f;
249 3612 : hStereoClassif->dE1_ch2 = 0.0f;
250 3612 : hStereoClassif->nchar_ch1 = 0.0f;
251 3612 : hStereoClassif->nchar_ch2 = 0.0f;
252 3612 : hStereoClassif->non_sta_ch1 = 0.0f;
253 3612 : hStereoClassif->sp_div_ch1 = 0.0f;
254 3612 : hStereoClassif->ps_diff_ch1 = 0.0f;
255 3612 : hStereoClassif->ps_diff_ch2 = 0.0f;
256 3612 : hStereoClassif->ps_sta_ch1 = 0.0f;
257 3612 : hStereoClassif->ps_sta_ch2 = 0.0f;
258 3612 : hStereoClassif->prev_g_IPD = 0.5f;
259 3612 : hStereoClassif->prev_IPD = 0.0f;
260 3612 : hStereoClassif->prev_ratio_m1_m2 = 0.0f;
261 3612 : set_f( hStereoClassif->xtalk_score_buf, 0.0f, XTALK_SCORE_BUF_LEN );
262 3612 : hStereoClassif->ratio_L = 0.5f;
263 3612 : hStereoClassif->vad_flag_glob = 0;
264 3612 : hStereoClassif->vad_relE = 0;
265 3612 : hStereoClassif->is_speech = 0.0f;
266 :
267 3612 : set_s( hStereoClassif->aEn_raw, 0, CPE_CHANNELS );
268 :
269 3612 : hStereoClassif->Etot_dn = 0.0f;
270 3612 : hStereoClassif->Etot_up = 0.0f;
271 :
272 3612 : set_f( hStereoClassif->relE_buf, 0.0f, UNCLR_L_RELE );
273 3612 : set_f( hStereoClassif->Etot_buf, 0.0f, UNCLR_L_ETOT );
274 3612 : set_f( hStereoClassif->unclr_relE_0_1_LT, 0.0f, UNCLR_RC_ORDER );
275 :
276 3612 : hStereoClassif->unclr_sw_enable_cnt[0] = 0;
277 3612 : hStereoClassif->unclr_sw_enable_cnt[1] = 0;
278 :
279 3612 : hStereoClassif->unclr_decision = 0;
280 3612 : hStereoClassif->unclr_wscore = 0.0f;
281 :
282 3612 : set_f( hStereoClassif->unclr_fv, -1.0f, SSC_MAX_NFEA );
283 3612 : hStereoClassif->unclr_corrLagMax_prev = 0;
284 3612 : hStereoClassif->ave_ener_L = 0;
285 3612 : hStereoClassif->ave_ener_R = 0;
286 3612 : hStereoClassif->relE_0_1 = 0.01f;
287 3612 : hStereoClassif->relE_0_1_LT = 0.01f;
288 :
289 3612 : set_f( hStereoClassif->xtalk_fv, -1.0f, SSC_MAX_NFEA );
290 3612 : hStereoClassif->xtalk_wscore = 0.0f;
291 3612 : hStereoClassif->xtalk_decision = 0;
292 3612 : hStereoClassif->xtalk_score_wrelE = 0.0f;
293 :
294 3612 : hStereoClassif->lrtd_mode = 0;
295 3612 : hStereoClassif->prev_lrtd_mode = 0;
296 :
297 3612 : hStereoClassif->silence_flag = 0;
298 :
299 3612 : return;
300 : }
301 :
302 :
303 : /*-----------------------------------------------------------------*
304 : * stereo_classifier_features()
305 : *
306 : * Collect features for stereo classifiers
307 : *-----------------------------------------------------------------*/
308 :
309 782031 : void stereo_classifier_features(
310 : STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */
311 : const int16_t idchan, /* i : channel ID */
312 : const int16_t element_mode, /* i : element mode */
313 : const int16_t vad_flag, /* i : VAD flag */
314 : const float lsf_new[], /* i : LSFs at the end of the frame */
315 : const float epsP[], /* i : LP analysis residual energies for each iteration*/
316 : const int16_t pitch[], /* i : open-loop pitch values for quantiz. */
317 : const float voicing[], /* i : OL maximum normalized correlation */
318 : const float cor_map_sum, /* i : speech/music clasif. parameter */
319 : const float non_staX, /* i : unbound non-stationarity for sp/mu clas. */
320 : const float sp_div, /* i : spectral diversity feature */
321 : const int16_t clas /* i : signal class */
322 : )
323 : {
324 : int16_t i, clas_ch2;
325 : float lepsP_ch2, ener_l, ener_r;
326 :
327 : /* combine VAD flags from both channels */
328 782031 : if ( idchan == 0 )
329 : {
330 420855 : hStereoClassif->vad_flag_glob = vad_flag;
331 : }
332 : else
333 : {
334 361176 : hStereoClassif->vad_flag_glob |= vad_flag;
335 : }
336 :
337 782031 : if ( ( element_mode == IVAS_CPE_DFT && idchan == 0 ) || ( element_mode == IVAS_CPE_TD && idchan == 1 ) )
338 : {
339 : /* update Etot_up and Etot_dn based on aEn */
340 63470 : ener_l = 10.0f * log10f( hStereoClassif->ave_ener_L + 1.0f );
341 63470 : ener_r = 10.0f * log10f( hStereoClassif->ave_ener_R + 1.0f );
342 63470 : mvr2r( &hStereoClassif->Etot_buf[0], &hStereoClassif->Etot_buf[1], UNCLR_L_ETOT - 1 );
343 63470 : hStereoClassif->Etot_buf[0] = max( 0, max( ener_l, ener_r ) );
344 :
345 63470 : if ( hStereoClassif->aEn_raw[0] == 6 || ( element_mode == IVAS_CPE_TD && hStereoClassif->aEn_raw[1] == 6 ) )
346 : {
347 : /* active signal, update upper bound */
348 39997 : if ( hStereoClassif->Etot_buf[0] < hStereoClassif->Etot_up )
349 : {
350 : /* energy decreases -> slower update */
351 27524 : hStereoClassif->Etot_up = 0.99f * hStereoClassif->Etot_up + 0.01f * hStereoClassif->Etot_buf[0];
352 : }
353 : else
354 : {
355 : /* energy increases -> faster update */
356 12473 : hStereoClassif->Etot_up = 0.95f * hStereoClassif->Etot_up + 0.05f * hStereoClassif->Etot_buf[0];
357 : }
358 : }
359 23473 : else if ( ( element_mode == IVAS_CPE_DFT && hStereoClassif->aEn_raw[0] == 0 ) ||
360 198 : ( element_mode == IVAS_CPE_TD && hStereoClassif->aEn_raw[0] == 0 && hStereoClassif->aEn_raw[1] == 0 ) )
361 : {
362 : /* inactive signal, update lower bound */
363 16253 : if ( hStereoClassif->Etot_buf[0] < hStereoClassif->Etot_dn )
364 : {
365 : /* energy decreases -> faster update */
366 6267 : hStereoClassif->Etot_dn = 0.9f * hStereoClassif->Etot_dn + 0.1f * hStereoClassif->Etot_buf[0];
367 : }
368 : else
369 : {
370 : /* energy increases -> slower update */
371 9986 : hStereoClassif->Etot_dn = 0.95f * hStereoClassif->Etot_dn + 0.05f * hStereoClassif->Etot_buf[0];
372 : }
373 :
374 16253 : if ( hStereoClassif->Etot_dn < 30.0f )
375 : {
376 : /* do not decrease below lower bound threshold */
377 2675 : hStereoClassif->Etot_dn = 30.0f;
378 : }
379 : }
380 :
381 : /* upper bound is too low and close to lower bound -> update it */
382 63470 : if ( hStereoClassif->Etot_up < hStereoClassif->Etot_dn + 20.0f )
383 : {
384 14490 : hStereoClassif->Etot_up = hStereoClassif->Etot_dn + 20.0f;
385 : }
386 :
387 : /* normalize Etot to (0,1) */
388 63470 : hStereoClassif->relE_0_1 = lin_interp( hStereoClassif->Etot_buf[0], hStereoClassif->Etot_dn, 0.0f, hStereoClassif->Etot_up, 0.9f, 1 );
389 :
390 : /* update relE_buf */
391 63470 : mvr2r( &hStereoClassif->relE_buf[0], &hStereoClassif->relE_buf[1], UNCLR_L_RELE - 1 );
392 63470 : hStereoClassif->relE_buf[0] = hStereoClassif->relE_0_1;
393 :
394 63470 : if ( hStereoClassif->relE_0_1 >= hStereoClassif->relE_buf[1] )
395 : {
396 39483 : hStereoClassif->relE_0_1_LT = 0.9f * hStereoClassif->relE_0_1_LT + 0.1f * hStereoClassif->relE_0_1;
397 : }
398 : else
399 : {
400 23987 : hStereoClassif->relE_0_1_LT = 0.95f * hStereoClassif->relE_0_1_LT + 0.05f * hStereoClassif->relE_0_1;
401 : }
402 :
403 : /* estimate VAD flag based on relative energy */
404 63470 : if ( hStereoClassif->relE_0_1_LT < 0.1f )
405 : {
406 14850 : hStereoClassif->vad_relE = 0;
407 : }
408 : else
409 : {
410 48620 : hStereoClassif->vad_relE = 1;
411 : }
412 :
413 : /* combine classical VAD flag with VAD flag based on relative energy */
414 63470 : hStereoClassif->vad_flag_glob &= hStereoClassif->vad_relE;
415 : }
416 :
417 782031 : if ( idchan == 0 )
418 : {
419 420855 : mvr2r( lsf_new, hStereoClassif->lsf_ch1, M );
420 420855 : hStereoClassif->lepsP_ch1 = logf( epsP[13] + 1e-5f ) - logf( epsP[0] + 1e-5f );
421 :
422 420855 : hStereoClassif->xtalk_fv[E_lsf_1] = lsf_new[0];
423 420855 : hStereoClassif->xtalk_fv[E_lsf_4] = lsf_new[3];
424 420855 : hStereoClassif->xtalk_fv[E_lsf_9] = lsf_new[8];
425 420855 : hStereoClassif->xtalk_fv[E_lsf_14] = lsf_new[13];
426 420855 : hStereoClassif->xtalk_fv[E_lepsP_13] = hStereoClassif->lepsP_ch1;
427 : }
428 : else
429 : {
430 361176 : hStereoClassif->xtalk_fv[E_sum_d_LSF] = 0;
431 6139992 : for ( i = 0; i < M; i++ )
432 : {
433 5778816 : hStereoClassif->xtalk_fv[E_sum_d_LSF] += fabsf( lsf_new[i] - hStereoClassif->lsf_ch1[i] );
434 : }
435 :
436 361176 : if ( hStereoClassif->vad_flag_glob && ( hStereoClassif->ratio_L > 0.0f && hStereoClassif->ratio_L < 1.0f ) )
437 : {
438 343167 : hStereoClassif->xtalk_fv[E_sum_d_LSF] = 0.0f;
439 : }
440 361176 : lepsP_ch2 = logf( epsP[13] + 1e-5f ) - logf( epsP[0] + 1e-5f );
441 361176 : hStereoClassif->xtalk_fv[E_d_lepsP_13] = fabsf( hStereoClassif->lepsP_ch1 - lepsP_ch2 );
442 : }
443 :
444 782031 : if ( idchan == 0 )
445 : {
446 420855 : mvs2s( pitch, hStereoClassif->pitch_ch1, 3 );
447 420855 : mvr2r( voicing, hStereoClassif->voicing_ch1, 3 );
448 :
449 420855 : hStereoClassif->xtalk_fv[E_pitch] = 1 / 3.0f * ( pitch[0] + pitch[1] + pitch[2] );
450 420855 : hStereoClassif->xtalk_fv[E_voicing] = 1 / 3.0f * ( voicing[0] + voicing[1] + voicing[2] );
451 : }
452 : else
453 : {
454 361176 : hStereoClassif->xtalk_fv[E_d_pitch] = 0;
455 361176 : hStereoClassif->xtalk_fv[E_d_voicing] = 0;
456 1444704 : for ( i = 0; i < 3; i++ )
457 : {
458 1083528 : hStereoClassif->xtalk_fv[E_d_pitch] += (float) abs( pitch[i] - hStereoClassif->pitch_ch1[i] );
459 1083528 : hStereoClassif->xtalk_fv[E_d_voicing] += fabsf( voicing[i] - hStereoClassif->voicing_ch1[i] );
460 : }
461 361176 : hStereoClassif->xtalk_fv[E_d_pitch] /= 3.0f;
462 361176 : hStereoClassif->xtalk_fv[E_d_voicing] /= 3.0f;
463 : }
464 :
465 782031 : if ( idchan == 0 )
466 : {
467 420855 : hStereoClassif->cor_map_sum_ch1 = cor_map_sum;
468 420855 : hStereoClassif->non_sta_ch1 = non_staX;
469 420855 : hStereoClassif->sp_div_ch1 = sp_div;
470 :
471 420855 : hStereoClassif->xtalk_fv[E_cor_map_sum] = cor_map_sum;
472 420855 : hStereoClassif->xtalk_fv[E_nchar] = logf( hStereoClassif->nchar_ch1 + 1.0f );
473 420855 : hStereoClassif->xtalk_fv[E_non_sta] = non_staX;
474 420855 : hStereoClassif->xtalk_fv[E_sp_div] = logf( sp_div + 1.0f );
475 : }
476 : else
477 : {
478 361176 : hStereoClassif->xtalk_fv[E_d_cor_map_sum] = fabsf( hStereoClassif->cor_map_sum_ch1 - cor_map_sum );
479 361176 : hStereoClassif->xtalk_fv[E_d_nchar] = fabsf( logf( hStereoClassif->nchar_ch1 + 1.0f ) - logf( hStereoClassif->nchar_ch2 + 1.0f ) );
480 361176 : hStereoClassif->xtalk_fv[E_d_non_sta] = fabsf( hStereoClassif->non_sta_ch1 - non_staX );
481 361176 : hStereoClassif->xtalk_fv[E_d_sp_div] = fabsf( logf( hStereoClassif->sp_div_ch1 + 1.0f ) - logf( sp_div + 1.0f ) );
482 : }
483 :
484 782031 : if ( idchan == 0 )
485 : {
486 420855 : hStereoClassif->xtalk_fv[E_dE1] = logf( hStereoClassif->dE1_ch1 + 1.0f );
487 : }
488 : else
489 : {
490 361176 : hStereoClassif->xtalk_fv[E_d_dE1] = fabsf( logf( hStereoClassif->dE1_ch1 + 1.0f ) - logf( hStereoClassif->dE1_ch2 + 1.0f ) );
491 : }
492 :
493 782031 : if ( idchan == 0 )
494 : {
495 420855 : if ( clas > VOICED_CLAS )
496 : {
497 22707 : hStereoClassif->clas_ch1 = VOICED_CLAS;
498 : }
499 398148 : else if ( clas < VOICED_CLAS )
500 : {
501 247324 : hStereoClassif->clas_ch1 = UNVOICED_CLAS;
502 : }
503 : else
504 : {
505 150824 : hStereoClassif->clas_ch1 = clas;
506 : }
507 :
508 420855 : hStereoClassif->xtalk_fv[E_clas] = hStereoClassif->clas_ch1;
509 : }
510 : else
511 : {
512 361176 : if ( clas > VOICED_CLAS )
513 : {
514 15404 : clas_ch2 = VOICED_CLAS;
515 : }
516 345772 : else if ( clas < VOICED_CLAS )
517 : {
518 240756 : clas_ch2 = UNVOICED_CLAS;
519 : }
520 : else
521 : {
522 105016 : clas_ch2 = clas;
523 : }
524 :
525 361176 : hStereoClassif->xtalk_fv[E_d_clas] = (float) abs( hStereoClassif->clas_ch1 - clas_ch2 );
526 : }
527 :
528 782031 : if ( idchan == 0 )
529 : {
530 420855 : hStereoClassif->xtalk_fv[E_ps_diff] = hStereoClassif->ps_diff_ch1;
531 420855 : hStereoClassif->xtalk_fv[E_ps_sta] = hStereoClassif->ps_sta_ch1;
532 : }
533 : else
534 : {
535 361176 : hStereoClassif->xtalk_fv[E_d_ps_diff] = fabsf( hStereoClassif->ps_diff_ch1 - hStereoClassif->ps_diff_ch2 );
536 361176 : hStereoClassif->xtalk_fv[E_d_ps_sta] = fabsf( hStereoClassif->ps_sta_ch1 - hStereoClassif->ps_sta_ch2 );
537 : }
538 :
539 782031 : return;
540 : }
541 :
542 :
543 : /*-------------------------------------------------------------------*
544 : * Function unclr_classifier_td()
545 : *
546 : * Classify current TD frame as uncorrelated L/R (1) or normal (0)
547 : *-------------------------------------------------------------------*/
548 :
549 3791 : void unclr_classifier_td(
550 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
551 : )
552 : {
553 : int16_t i, ind;
554 : float relE_ST, edge, edge_0_1;
555 : float score, fvn[SSC_MAX_NFEA];
556 : #ifdef DEBUG_MODE_TD
557 : int16_t dec;
558 : #endif
559 3791 : STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
560 :
561 3791 : set_f( fvn, -1.0f, SSC_MAX_NFEA );
562 :
563 : /* calcualte raw score based on LR */
564 3791 : score = UNCLR_INTERCEPT_TD;
565 41701 : for ( i = 0; i < SIZE_UNCLR_ISEL_TD; i++ )
566 : {
567 37910 : ind = unclr_isel_td[i];
568 :
569 : /* mean & std removal */
570 37910 : fvn[i] = ( hStereoClassif->unclr_fv[ind] - unclr_mean_td[i] ) / unclr_scale_td[i];
571 :
572 : /* LR */
573 37910 : score += fvn[i] * unclr_coef_td[i];
574 : }
575 :
576 : #ifdef DEBUG_MODE_TD
577 : /* raw decision */
578 : dec = score > 0;
579 : #endif
580 :
581 : /* normalize score to -1:+1 */
582 3791 : if ( score > UNCLR_SCORE_THR )
583 : {
584 364 : score = UNCLR_SCORE_THR;
585 : }
586 3427 : else if ( score < -UNCLR_SCORE_THR )
587 : {
588 662 : score = -UNCLR_SCORE_THR;
589 : }
590 3791 : score /= 2 * UNCLR_SCORE_THR;
591 :
592 : /* weight raw score with relative energy */
593 3791 : score *= hStereoClassif->relE_0_1;
594 :
595 : /* rising edge detection on relE */
596 3791 : relE_ST = mean( hStereoClassif->relE_buf, UNCLR_L_RELE );
597 3791 : if ( hStereoClassif->relE_0_1 > relE_ST )
598 : {
599 1747 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_UP );
600 : }
601 : else
602 : {
603 2044 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_DOWN );
604 : }
605 :
606 3791 : edge = hStereoClassif->relE_0_1 - hStereoClassif->unclr_relE_0_1_LT[UNCLR_RC_ORDER - 1];
607 3791 : edge_0_1 = lin_interp( edge, 0.0f, 0.95f, 1.0f, 0.9f, 1 );
608 :
609 : /* LT average */
610 3791 : hStereoClassif->unclr_wscore = edge_0_1 * hStereoClassif->unclr_wscore + ( 1 - edge_0_1 ) * score;
611 :
612 : /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
613 3791 : if ( ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->unclr_wscore > 0.1f ) || ( hStereoClassif->unclr_decision == 1 && hStereoClassif->unclr_wscore < -0.07f ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) )
614 : {
615 : /* let's switch the binary decision */
616 0 : hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision;
617 : }
618 :
619 : #ifdef DEBUG_MODE_TD
620 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/unclr_dec.x" );
621 : dbgwrite( &hStereoClassif->unclr_wscore, sizeof( float ), 1, 1, "res/unclr_wscore.x" );
622 : dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, 1, "res/unclr_dec_hyst.x" );
623 : #endif
624 :
625 3791 : return;
626 : }
627 :
628 :
629 : /*-------------------------------------------------------------------*
630 : * Function unclr_classifier_dft()
631 : *
632 : * Classifies current DFT frame as uncorrelated L/R (1) or normal stereo (0)
633 : *-------------------------------------------------------------------*/
634 :
635 59679 : void unclr_classifier_dft(
636 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
637 : )
638 : {
639 : int16_t i, ind;
640 : float edge, relE_ST, edge_0_1;
641 : float score, fvn[SSC_MAX_NFEA];
642 : #ifdef DEBUG_MODE_TD
643 : int16_t dec;
644 : #endif
645 :
646 59679 : STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
647 :
648 : /* calculate raw score based on LR */
649 59679 : score = UNCLR_INTERCEPT_DFT;
650 537111 : for ( i = 0; i < SIZE_UNCLR_ISEL_DFT; i++ )
651 : {
652 477432 : ind = unclr_isel_dft[i];
653 :
654 : /* mean & std removal */
655 477432 : fvn[i] = ( hStereoClassif->unclr_fv[ind] - unclr_mean_dft[i] ) / unclr_scale_dft[i];
656 :
657 : /* LR */
658 477432 : score += fvn[i] * unclr_coef_dft[i];
659 : }
660 :
661 : #ifdef DEBUG_MODE_TD
662 : /* raw decision */
663 : dec = score > 0;
664 : #endif
665 :
666 : /* normalize score to -1:+1 */
667 59679 : if ( score > UNCLR_SCORE_THR )
668 : {
669 929 : score = UNCLR_SCORE_THR;
670 : }
671 58750 : else if ( score < -UNCLR_SCORE_THR )
672 : {
673 35235 : score = -UNCLR_SCORE_THR;
674 : }
675 59679 : score /= 2 * UNCLR_SCORE_THR;
676 :
677 : /* weight raw score with relative energy */
678 59679 : score *= hStereoClassif->relE_0_1;
679 :
680 59679 : if ( !hStereoClassif->vad_flag_glob )
681 : {
682 20870 : score = 0;
683 : }
684 :
685 : /* rising edge detector on relE */
686 59679 : relE_ST = mean( hStereoClassif->relE_buf, UNCLR_L_RELE );
687 59679 : if ( hStereoClassif->relE_0_1 > relE_ST )
688 : {
689 27305 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_UP );
690 : }
691 : else
692 : {
693 32374 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_DOWN );
694 : }
695 :
696 59679 : edge = hStereoClassif->relE_0_1 - hStereoClassif->unclr_relE_0_1_LT[UNCLR_RC_ORDER - 1];
697 59679 : edge_0_1 = lin_interp( edge, 0.0f, 0.95f, 1.0f, 0.9f, 1 );
698 :
699 : /* LT average */
700 59679 : hStereoClassif->unclr_wscore = edge_0_1 * hStereoClassif->unclr_wscore + ( 1 - edge_0_1 ) * score;
701 :
702 : /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
703 59679 : if ( ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->unclr_wscore > 0.1f ) || ( hStereoClassif->unclr_decision == 1 && hStereoClassif->unclr_wscore < -0.07f ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 ) )
704 : {
705 : /* let's switch the binary decision */
706 508 : hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision;
707 : }
708 :
709 : #ifdef DEBUG_MODE_TD
710 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/unclr_dec.x" );
711 : dbgwrite( &hStereoClassif->unclr_wscore, sizeof( float ), 1, 1, "res/unclr_wscore.x" );
712 : dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, 1, "res/unclr_dec_hyst.x" );
713 : #endif
714 :
715 59679 : return;
716 : }
717 :
718 :
719 : /*-------------------------------------------------------------------*
720 : * Function xtalk_classifier_td()
721 : *
722 : * Classify current TD frame as cross-talk frame (1) or normal stereo frame (0)
723 : *-------------------------------------------------------------------*/
724 :
725 3791 : void xtalk_classifier_td(
726 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
727 : )
728 : {
729 : int16_t i, ind, edge_type;
730 : float score, fvn[SSC_MAX_NFEA];
731 : float edge, edge_0_1, wedge, scr_min, scr_max, wrelE;
732 :
733 : #ifdef DEBUG_MODE_TD
734 : int16_t dec;
735 : #endif
736 :
737 3791 : STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
738 :
739 3791 : set_f( fvn, -1.0f, SSC_MAX_NFEA );
740 :
741 : /* calcualte raw score based on LR */
742 3791 : score = XTALK_INTERCEPT_TD;
743 68238 : for ( i = 0; i < SIZE_XTALK_ISEL_TD; i++ )
744 : {
745 64447 : ind = xtalk_isel_td[i];
746 :
747 : /* mean & std removal */
748 64447 : fvn[i] = ( hStereoClassif->xtalk_fv[ind] - xtalk_mean_td[i] ) / xtalk_scale_td[i];
749 :
750 : /* LR */
751 64447 : score += fvn[i] * xtalk_coef_td[i];
752 : }
753 :
754 : /* normalize raw score to -1:+1 */
755 3791 : if ( score > XTALK_SCORE_THR_TD_UP )
756 : {
757 1101 : score = 1.0f;
758 : }
759 2690 : else if ( score < -XTALK_SCORE_THR_TD_DN )
760 : {
761 119 : score = -1.0f;
762 : }
763 2571 : else if ( score > 0.0f )
764 : {
765 1791 : score /= XTALK_SCORE_THR_TD_UP;
766 : }
767 : else
768 : {
769 780 : score /= XTALK_SCORE_THR_TD_DN;
770 : }
771 :
772 3791 : if ( hCPE->last_element_mode == IVAS_CPE_DFT )
773 : {
774 : /* overwrite score if we have just switched from DFT stereo */
775 51 : score = hStereoClassif->xtalk_score;
776 : }
777 : else
778 : {
779 3740 : hStereoClassif->xtalk_score = score;
780 : }
781 :
782 3791 : if ( !hStereoClassif->vad_flag_glob )
783 : {
784 : /* reset score to 0 in inactive segments */
785 187 : score = 0;
786 : }
787 :
788 : #ifdef DEBUG_MODE_TD
789 : /* raw decision */
790 : dec = score > 0;
791 : #endif
792 :
793 : /* weight raw score with relative energy */
794 3791 : wrelE = lin_interp( hStereoClassif->relE_0_1, 0.5f, 0.95f, 0.9f, 0.0f, 1 );
795 3791 : hStereoClassif->xtalk_score_wrelE = wrelE * hStereoClassif->xtalk_score_wrelE + ( 1 - wrelE ) * score;
796 3791 : score = hStereoClassif->xtalk_score_wrelE;
797 :
798 : /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the raw score buffer */
799 3791 : mvr2r( &hStereoClassif->xtalk_score_buf[0], &hStereoClassif->xtalk_score_buf[1], XTALK_SCORE_BUF_LEN - 1 );
800 3791 : hStereoClassif->xtalk_score_buf[0] = score;
801 :
802 3791 : minimum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_min );
803 3791 : maximum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_max );
804 :
805 : #ifdef DEBUG_MODE_TD
806 : edge_type = 0;
807 : edge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f, &edge, &edge_type );
808 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.6f, 0.0f, 1 );
809 : dbgwrite( &score, sizeof( float ), 1, 1, "res/score.x" );
810 : dbgwrite( &edge_0_1, sizeof( float ), 1, 1, "res/edge_0_1.x" );
811 : #endif
812 :
813 3791 : if ( ( ( scr_min < 0.0f && scr_max > 0.2f ) || ( scr_max - scr_min > 0.5f ) ) )
814 : {
815 : /* test rising edge (use 0 as edge_type because of newer->older buffer samples ordering) */
816 857 : edge_type = 0;
817 857 : edge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f, &edge, &edge_type );
818 :
819 857 : if ( edge_type == 0 && edge < 0.3f )
820 : {
821 : /* normalize edge to 0-1 interval */
822 559 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.6f, 0.0f, 1 );
823 : }
824 : else
825 : {
826 298 : edge_0_1 = 0;
827 : }
828 : }
829 : else
830 : {
831 2934 : edge_0_1 = 0.0f;
832 : }
833 :
834 :
835 : /* weight raw score based on rising edge detector */
836 3791 : wedge = lin_interp( edge_0_1, 0.0f, 0.9f, 1.0f, 0.5f, 1 );
837 :
838 3791 : hStereoClassif->xtalk_wscore = wedge * hStereoClassif->xtalk_wscore + ( 1 - wedge ) * score;
839 :
840 3791 : if ( !hStereoClassif->vad_flag_glob )
841 : {
842 187 : hStereoClassif->xtalk_wscore = 0;
843 : }
844 :
845 : /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
846 3791 : if ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->xtalk_decision == 0 && hStereoClassif->xtalk_wscore > 0.03f ) /*|| (hStereoClassif->xtalk_decision == 1 && hStereoClassif->xtalk_wscore < 0.00f)*/ && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) )
847 : {
848 : /* let's switch the binary decision */
849 8 : hStereoClassif->xtalk_decision = !hStereoClassif->xtalk_decision;
850 : }
851 :
852 : #ifdef DEBUG_MODE_TD
853 : dbgwrite( &hStereoClassif->xtalk_wscore, sizeof( float ), 1, 1, "res/xtalk_wscore.x" );
854 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/xtalk_dec.x" );
855 : dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, 1, "res/xtalk_dec_hyst.x" );
856 : #endif
857 :
858 3791 : return;
859 : }
860 :
861 :
862 : /*-------------------------------------------------------------------*
863 : * Function xtalk_classifier_dft()
864 : *
865 : * Classify current DFT frame as cross-talk frame (1) or normal stereo frame (0)
866 : *-------------------------------------------------------------------*/
867 :
868 74369 : void xtalk_classifier_dft(
869 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
870 : const int16_t itd, /* i : ITD from DFT stereo - used as a feature */
871 : const float gcc_phat[] /* i : GPHAT cross-channel correlation function*/
872 : )
873 : {
874 : int16_t i, ind, itd2, thr;
875 : float score, m1, m2;
876 : STEREO_CLASSIF_HANDLE hStereoClassif;
877 : ITD_DATA_HANDLE hItd;
878 : float fvn[SSC_MAX_NFEA], edge, edge_0_1, wedge;
879 : float ratio_m1_m2, m2_m2, d_itd2, itd1_flip;
880 : float scr_min, scr_max;
881 : #ifdef DEBUG_MODE_TD
882 : int16_t dec;
883 : #endif
884 :
885 74369 : hStereoClassif = hCPE->hStereoClassif;
886 74369 : hItd = ( hCPE->hStereoDft != NULL ) ? hCPE->hStereoDft->hItd : hCPE->hStereoMdct->hItd;
887 :
888 74369 : m1 = 0.0f;
889 74369 : m2 = 0.0f;
890 :
891 74369 : itd2 = 0;
892 74369 : thr = ( hCPE->element_brate >= IVAS_32k ? 2 : 1 ) * CLASSIFIER_ITD_THRES;
893 :
894 74369 : if ( itd > thr )
895 : {
896 15961 : m1 = fabsf( gcc_phat[itd + XTALK_PHAT_LEN] );
897 15961 : m2 = fabsf( gcc_phat[0] );
898 15961 : itd2 = -XTALK_PHAT_LEN;
899 3011016 : for ( i = 1; i < XTALK_PHAT_LEN - thr; i++ )
900 : {
901 2995055 : if ( fabsf( gcc_phat[i] ) > m2 )
902 : {
903 130721 : itd2 = -XTALK_PHAT_LEN + i;
904 130721 : m2 = fabsf( gcc_phat[i] );
905 : }
906 : }
907 : }
908 58408 : else if ( itd < -thr )
909 : {
910 11141 : m1 = fabsf( gcc_phat[itd + XTALK_PHAT_LEN] );
911 11141 : m2 = fabsf( gcc_phat[XTALK_PHAT_LEN + thr + 1] );
912 11141 : itd2 = thr + 1;
913 2094840 : for ( i = XTALK_PHAT_LEN + thr + 2; i < 2 * XTALK_PHAT_LEN + 1; i++ )
914 : {
915 2083699 : if ( fabsf( gcc_phat[i] ) > m2 )
916 : {
917 56747 : itd2 = -XTALK_PHAT_LEN + i;
918 56747 : m2 = fabsf( gcc_phat[i] );
919 : }
920 : }
921 : }
922 :
923 74369 : ratio_m1_m2 = fabsf( m1 * m2 ) / fabsf( m1 + m2 + 1.0f );
924 74369 : m2_m2 = hItd->prev_m2 * m2;
925 74369 : d_itd2 = (float) abs( itd2 - hItd->prev_itd2 );
926 74369 : itd1_flip = (float) ( max( itd, hItd->prev_itd1 ) * ( -min( itd, hItd->prev_itd1 ) ) );
927 :
928 :
929 74369 : hStereoClassif->xtalk_fv[E_gphat_d_itd2] = d_itd2;
930 74369 : hStereoClassif->xtalk_fv[E_gphat_itd1_flip] = itd1_flip;
931 74369 : hStereoClassif->xtalk_fv[E_gphat_ratio_m1_m2] = ratio_m1_m2 * hStereoClassif->prev_ratio_m1_m2;
932 74369 : hStereoClassif->xtalk_fv[E_gphat_m2_m2] = m2_m2;
933 :
934 74369 : hStereoClassif->prev_ratio_m1_m2 = ratio_m1_m2;
935 :
936 74369 : set_f( fvn, -1.0f, SSC_MAX_NFEA );
937 :
938 : /* calcualte raw score based on LR */
939 74369 : score = XTALK_INTERCEPT_DFT;
940 892428 : for ( i = 0; i < SIZE_XTALK_ISEL_DFT; i++ )
941 : {
942 818059 : ind = xtalk_isel_dft[i];
943 :
944 : /* mean & std removal */
945 818059 : fvn[i] = ( hStereoClassif->xtalk_fv[ind] - xtalk_mean_dft[i] ) / xtalk_scale_dft[i];
946 :
947 : /* LR */
948 818059 : score += fvn[i] * xtalk_coef_dft[i];
949 : }
950 :
951 : /* normalize score to -1:+1 */
952 74369 : if ( score > XTALK_SCORE_THR_DFT )
953 : {
954 2490 : score = 1.0f;
955 : }
956 71879 : else if ( score < -XTALK_SCORE_THR_DFT )
957 : {
958 4538 : score = -1.0f;
959 : }
960 : else
961 : {
962 67341 : score /= XTALK_SCORE_THR_DFT;
963 : }
964 :
965 : /* raw score */
966 74369 : hStereoClassif->xtalk_score = score;
967 :
968 74369 : if ( !hStereoClassif->vad_flag_glob )
969 : {
970 24663 : score = 0;
971 : }
972 :
973 : #ifdef DEBUG_MODE_TD
974 : /* raw decision */
975 : dec = score > 0;
976 : #endif
977 :
978 :
979 : /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the given buffer */
980 74369 : mvr2r( &hStereoClassif->xtalk_score_buf[0], &hStereoClassif->xtalk_score_buf[1], XTALK_SCORE_BUF_LEN - 1 );
981 74369 : hStereoClassif->xtalk_score_buf[0] = score;
982 :
983 74369 : minimum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_min );
984 74369 : maximum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_max );
985 :
986 : #ifdef DEBUG_MODE_TD
987 : edge = redge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f );
988 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.83f, 0.0f, 1 );
989 : dbgwrite( &score, sizeof( float ), 1, 1, "res/score.x" );
990 : dbgwrite( &edge_0_1, sizeof( float ), 1, 1, "res/edge_0_1.x" );
991 : #endif
992 :
993 74369 : if ( scr_min < 0.2f && scr_max > 0.0f )
994 : {
995 15879 : edge = redge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f );
996 15879 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.83f, 0.0f, 1 );
997 : }
998 : else
999 : {
1000 58490 : edge_0_1 = 0.0f;
1001 : }
1002 :
1003 :
1004 : /* weight raw score based on rising edge detector */
1005 74369 : wedge = lin_interp( edge_0_1, 0.0f, 0.95f, 1.0f, 0.3f, 1 );
1006 74369 : hStereoClassif->xtalk_wscore = wedge * hStereoClassif->xtalk_wscore + ( 1 - wedge ) * score;
1007 :
1008 74369 : if ( ( itd == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 ) )
1009 : {
1010 39092 : hStereoClassif->xtalk_decision = 0;
1011 : }
1012 35277 : else if ( hCPE->element_brate >= IVAS_24k4 &&
1013 22545 : hStereoClassif->xtalk_decision == 0 && ( ( m1 * 0.8 < m2 && hItd->prev_m1 * 0.8 < hItd->prev_m2 && abs( itd2 - hItd->prev_itd2 ) < 4 && m1 > 0.15 && hItd->prev_m1 > 0.15 ) || ( hStereoClassif->xtalk_wscore > 0.8 ) || ( itd > thr && hItd->prev_itd1 < -thr && hStereoClassif->silence_flag == 0 ) || ( hItd->prev_itd1 > thr && itd < -thr && hStereoClassif->silence_flag == 0 ) ) &&
1014 2135 : hCPE->hCoreCoder[0]->vad_flag == 1 && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->hNoiseEst->aEn_inac_cnt > 15 )
1015 : {
1016 0 : hStereoClassif->xtalk_decision = 1;
1017 : #ifdef DEBUG_MODE_TD
1018 : printf( "\nSwitch DFT-stereo -> TD-LR on frame %d\n", frame );
1019 : #endif
1020 : }
1021 35277 : else if ( hCPE->element_brate >= IVAS_16k4 && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f )
1022 : {
1023 66 : hStereoClassif->xtalk_decision = 1;
1024 : }
1025 :
1026 : #ifdef DEBUG_MODE_TD
1027 : dbgwrite( &hStereoClassif->xtalk_wscore, sizeof( float ), 1, 1, "res/xtalk_wscore.x" );
1028 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/xtalk_dec.x" );
1029 : dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, 1, "res/xtalk_dec_hyst.x" );
1030 : #endif
1031 :
1032 : /* updates */
1033 74369 : hItd->prev_m1 = m1;
1034 74369 : hItd->prev_m2 = m2;
1035 74369 : hItd->prev_itd1 = itd;
1036 74369 : hItd->prev_itd2 = itd2;
1037 :
1038 74369 : return;
1039 : }
1040 :
1041 :
1042 : /*-------------------------------------------------------------------*
1043 : * Function rc_filter()
1044 : *
1045 : *
1046 : *-------------------------------------------------------------------*/
1047 :
1048 63470 : static void rc_filter(
1049 : const float x,
1050 : float *y,
1051 : const int16_t order,
1052 : const float tau )
1053 : {
1054 : int16_t i;
1055 :
1056 63470 : y[0] = tau * y[0] + ( 1 - tau ) * x;
1057 1269400 : for ( i = 1; i < order; i++ )
1058 : {
1059 1205930 : y[i] = tau * y[i] + ( 1 - tau ) * y[i - 1];
1060 : }
1061 :
1062 63470 : return;
1063 : }
1064 :
1065 :
1066 : /*-------------------------------------------------------------------*
1067 : * Function edge_detect()
1068 : *
1069 : * Rising/falling edge detection algorithm
1070 : * Analyzes the input buffer and outputs strength and type of the detected edge (rising or falling)
1071 : * Set edge_type to 0/1/2 when calling this function to specify the edge type you want to detect. The returned value will be modified
1072 : * according to the edge type detected (-1 indicates that no edge has been detected)
1073 : *-------------------------------------------------------------------*/
1074 :
1075 857 : static void edge_detect(
1076 : const float *inp, /* i : input buffer */
1077 : const int16_t len, /* i : length of the input buffer */
1078 : const float inp_min, /* i : minimum value for edge detection */
1079 : const float inp_max, /* i : maximum value for edge detection */
1080 : float *edge_str, /* o : edge strength (from 0 to Inf) */
1081 : int16_t *edge_type /* i/o: edge type (to be) detected: 0 = falling, 1 = rising, 2 = both */
1082 : )
1083 : {
1084 : int16_t i, j, et;
1085 : float y, err, edge_slope, edge[EDGE_MAX_LEN];
1086 : float edge_min, err0;
1087 :
1088 857 : et = -1;
1089 857 : edge_min = 1e7f;
1090 :
1091 857 : if ( *edge_type == 0 || *edge_type == 2 )
1092 : {
1093 : /* falling edge detection */
1094 857 : set_f( edge, 0.0f, EDGE_MAX_LEN );
1095 :
1096 : /* set error at 0th index */
1097 857 : if ( inp[0] > inp_max )
1098 : {
1099 0 : err0 = 0.0f;
1100 : }
1101 : else
1102 : {
1103 : /* inhibits edge smearing effect */
1104 857 : err0 = powf( inp[0] - inp_max, 2 );
1105 : }
1106 :
1107 : /* test edges on intervals from 2 to len */
1108 4285 : for ( i = 1; i < len; i++ )
1109 : {
1110 3428 : edge_slope = ( inp_max - inp_min ) / i;
1111 3428 : edge[i] = err0;
1112 11998 : for ( j = 1; j <= i; j++ )
1113 : {
1114 8570 : y = inp_max - edge_slope * j;
1115 8570 : err = y - check_bounds( inp[j], inp_min, inp_max );
1116 8570 : edge[i] += err * err;
1117 : }
1118 :
1119 3428 : edge[i] /= i + 1;
1120 :
1121 3428 : if ( edge[i] < edge_min )
1122 : {
1123 2595 : edge_min = edge[i];
1124 2595 : et = 0;
1125 : }
1126 : }
1127 : }
1128 :
1129 857 : if ( *edge_type == 1 || *edge_type == 2 )
1130 : {
1131 : /* rising edge detection */
1132 0 : set_f( edge, 0.0f, EDGE_MAX_LEN );
1133 :
1134 : /* set error at 0th index */
1135 0 : if ( inp[0] < inp_min )
1136 : {
1137 0 : err0 = 0.0f;
1138 : }
1139 : else
1140 : {
1141 : /* inhibits edge smearing effect */
1142 0 : err0 = powf( inp[0] - inp_min, 2 );
1143 : }
1144 :
1145 : /* test edges on intervals from 2 to len */
1146 0 : for ( i = 1; i <= len; i++ )
1147 : {
1148 0 : edge_slope = ( inp_max - inp_min ) / i;
1149 0 : edge[i] = err0;
1150 0 : for ( j = 1; j < i; j++ )
1151 : {
1152 0 : y = inp_min + edge_slope * j;
1153 0 : err = y - check_bounds( inp[j], inp_min, inp_max );
1154 0 : edge[i] += err * err;
1155 : }
1156 :
1157 0 : edge[i] /= i + 1;
1158 :
1159 0 : if ( edge[i] < edge_min )
1160 : {
1161 0 : edge_min = edge[i];
1162 0 : et = 1;
1163 : }
1164 : }
1165 : }
1166 :
1167 857 : *edge_str = edge_min;
1168 857 : *edge_type = et;
1169 :
1170 857 : return;
1171 : }
1172 :
1173 :
1174 : /*-------------------------------------------------------------------*
1175 : * Function redge_detect()
1176 : *
1177 : * Rising edge detection algorithm
1178 : * Analyzes the input buffer and outputs value close to 1 when it detects strong rising edge
1179 : *-------------------------------------------------------------------*/
1180 :
1181 : /*! r: rising edge strength normalized to 0-1 */
1182 15879 : static float redge_detect(
1183 : const float *inp, /* i : input buffer (ordered from newest to oldest values)*/
1184 : const int16_t len, /* i : length of the input buffer */
1185 : const float inp_min, /* i : minimum value for edge detection */
1186 : const float inp_max /* i : maximum value for edge detection */
1187 : )
1188 : {
1189 : int16_t i, j;
1190 : float y, err, edge_slope, edge[REDGE_MAX_LEN];
1191 : float edge_min, err0;
1192 :
1193 15879 : set_f( edge, 0.0f, REDGE_MAX_LEN );
1194 15879 : edge_min = 1e7f;
1195 :
1196 : /* test rising edges on intervals from 2 to len */
1197 15879 : if ( inp[0] > inp_max )
1198 : {
1199 0 : err0 = 0.0f;
1200 : }
1201 : else
1202 : {
1203 15879 : err0 = powf( inp[0] - inp_max, 2 );
1204 : }
1205 79395 : for ( i = 1; i < len; i++ )
1206 : {
1207 63516 : edge_slope = ( inp_max - inp_min ) / i;
1208 63516 : edge[i] = err0;
1209 158790 : for ( j = 1; j < i; j++ )
1210 : {
1211 95274 : y = inp_max - edge_slope * j;
1212 95274 : if ( inp[j] == inp[j - 1] && inp[j] == inp_max )
1213 : {
1214 : /* we are saturated at inp_max */
1215 755 : err = 0.0f;
1216 : }
1217 94519 : else if ( inp[j] < inp_min )
1218 : {
1219 : /* we are below inp_min */
1220 7204 : err = y - inp_min;
1221 : }
1222 : else
1223 : {
1224 87315 : err = y - inp[j];
1225 : }
1226 95274 : err = err * err;
1227 95274 : edge[i] += err;
1228 : }
1229 :
1230 63516 : edge[i] /= i + 1;
1231 :
1232 63516 : if ( edge[i] < edge_min )
1233 : {
1234 56805 : edge_min = edge[i];
1235 : }
1236 : }
1237 :
1238 15879 : return edge_min;
1239 : }
|