Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include <math.h>
35 : #include "options.h"
36 : #include "cnst.h"
37 : #include "rom_com.h"
38 : #include "prot.h"
39 : #include "ivas_prot.h"
40 : #include "ivas_rom_com.h"
41 : #include "ivas_rom_enc.h"
42 : #include "ivas_cnst.h"
43 : #ifdef DEBUGGING
44 : #include "debug.h"
45 : #endif
46 : #include "wmc_auto.h"
47 :
48 :
49 : /*-------------------------------------------------------------------*
50 : * Local constants
51 : *-------------------------------------------------------------------*/
52 :
53 : #define RC_FACT_UP 0.3f
54 : #define RC_FACT_DOWN 0.7f
55 : #define UNCLR_SCORE_THR 4.0f
56 : #define XTALK_SCORE_THR_DFT 4.0f
57 : #define XTALK_SCORE_THR_TD_UP 3.0f
58 : #define XTALK_SCORE_THR_TD_DN 4.0f
59 :
60 : #define UNCLR_INTERCEPT_TD 0.780313f
61 : #define UNCLR_INTERCEPT_DFT 1.226513f
62 : #define XTALK_INTERCEPT_TD -1.770983f
63 : #define XTALK_INTERCEPT_DFT -0.758556f
64 :
65 : #define EDGE_MAX_LEN 30 /* maximum length of buffer for edge detection */
66 : #define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */
67 :
68 : #define CLASSIFIER_ITD_THRES 8 /* ITD threshold in samples that enables classifier to switch */
69 :
70 :
71 : /*-------------------------------------------------------------------*
72 : * Local function prototypes
73 : *-------------------------------------------------------------------*/
74 :
75 : static void rc_filter( const float x, float *y, const int16_t order, const float tau );
76 :
77 : static void edge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max, float *edge_str, int16_t *edge_type );
78 :
79 : static float redge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max );
80 :
81 :
82 : /*-------------------------------------------------------------------*
83 : * Function select_stereo_mode()
84 : *
85 : * Select stereo technology based on output of stereo classifiers
86 : *-------------------------------------------------------------------*/
87 :
88 : /*! r: element mode */
89 304035 : int16_t select_stereo_mode(
90 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
91 : const IVAS_FORMAT ivas_format /* i : IVAS format */
92 : )
93 : {
94 : int16_t element_mode;
95 : STEREO_CLASSIF_HANDLE hStereoClassif;
96 : int16_t is_speech;
97 : int16_t stereo_switching_flag;
98 :
99 : /* initialization */
100 304035 : element_mode = hCPE->element_mode;
101 304035 : hStereoClassif = hCPE->hStereoClassif;
102 :
103 : /* set binary flag to prevent LRTD mode on music */
104 304035 : hStereoClassif->is_speech = 0.97f * hStereoClassif->is_speech + 0.03f * hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0];
105 304035 : is_speech = ( hStereoClassif->is_speech < 1.0f && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk < 0.0f );
106 :
107 : /* set binary flag indicating LRTD mode based on unclr/xtalk classifiers' decisions */
108 304035 : hStereoClassif->prev_lrtd_mode = hStereoClassif->lrtd_mode;
109 304035 : hStereoClassif->unclr_decision = ( hStereoClassif->unclr_decision && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && hCPE->element_brate > IVAS_16k4 );
110 304035 : hStereoClassif->lrtd_mode = ( ( hStereoClassif->unclr_decision | hStereoClassif->xtalk_decision ) && is_speech );
111 :
112 304035 : stereo_switching_flag = 1;
113 :
114 304035 : if ( hCPE->element_brate >= MIN_BRATE_MDCT_STEREO || ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && hCPE->element_brate < MASA_STEREO_MIN_BITRATE )
115 : #ifdef DEBUGGING
116 : || ( hCPE->stereo_mode_cmdl == IVAS_CPE_DFT || hCPE->stereo_mode_cmdl == IVAS_CPE_TD )
117 : #endif
118 : )
119 : {
120 229501 : stereo_switching_flag = 0;
121 : }
122 :
123 304035 : if ( hCPE->element_brate >= MIN_BRATE_MDCT_STEREO )
124 : {
125 190378 : hStereoClassif->prev_lrtd_mode = 0;
126 190378 : hStereoClassif->lrtd_mode = 0;
127 190378 : element_mode = IVAS_CPE_MDCT;
128 : }
129 113657 : else if ( hCPE->element_brate < MIN_BRATE_MDCT_STEREO && hCPE->last_element_mode == IVAS_CPE_MDCT )
130 : {
131 817 : hStereoClassif->lrtd_mode = 0;
132 817 : element_mode = IVAS_CPE_DFT;
133 :
134 817 : if ( stereo_switching_flag == 1 && hCPE->element_brate > IVAS_13k2 && hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0] < 1.0f && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk < -0.01f && hCPE->hCoreCoder[0]->vad_flag == 1 && ( hCPE->hStereoMdct->sw_uncorr || hStereoClassif->xtalk_decision ) )
135 : {
136 9 : hStereoClassif->lrtd_mode = 1;
137 9 : element_mode = IVAS_CPE_TD;
138 : }
139 : }
140 :
141 : /* set the element mode */
142 304035 : if ( hStereoClassif->lrtd_mode == 1 && stereo_switching_flag == 1 )
143 : {
144 4365 : element_mode = IVAS_CPE_TD;
145 : }
146 299670 : else if ( element_mode < IVAS_CPE_MDCT )
147 : {
148 109292 : if ( stereo_switching_flag == 0 )
149 : {
150 39123 : if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && hCPE->element_brate < MASA_STEREO_MIN_BITRATE )
151 : {
152 39123 : element_mode = IVAS_CPE_DFT;
153 : }
154 : #ifdef DEBUGGING
155 : if ( hCPE->stereo_mode_cmdl > 1 )
156 : {
157 : element_mode = hCPE->stereo_mode_cmdl;
158 : }
159 : #endif
160 : }
161 70169 : else if ( element_mode == IVAS_CPE_TD )
162 : {
163 22 : if ( hCPE->hStereoTD->prev_fr_LRTD_TD_dec > 0 && is_speech )
164 : {
165 : /* if unclr_decision goes from 1->0 on active content, continue in LRTD mode */
166 21 : hStereoClassif->lrtd_mode = 1;
167 : }
168 1 : else if ( stereo_switching_flag == 1 )
169 : {
170 1 : element_mode = IVAS_CPE_DFT;
171 : }
172 : }
173 70147 : else if ( stereo_switching_flag == 1 )
174 : {
175 70147 : element_mode = IVAS_CPE_DFT;
176 : }
177 : }
178 :
179 : /* switch from LRTD to DFT when xtalk_decision goes from 0->1 (note: this special case is not handled in the xtalk classifier) */
180 304035 : if ( hCPE->last_element_mode == IVAS_CPE_TD && element_mode == IVAS_CPE_TD && hStereoClassif->xtalk_decision == 1 )
181 : {
182 2216 : if ( hCPE->hStereoTD->prev_fr_LRTD_TD_dec == 0 && hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt > 15 && hCPE->hStereoTD->tdm_last_LRTD_frame_cnt > 3 && hCPE->hCoreCoder[0]->clas < VOICED_CLAS && ( hCPE->element_brate >= IVAS_16k4 || hStereoClassif->xtalk_wscore < 0.01f ) )
183 : {
184 23 : if ( stereo_switching_flag == 1 )
185 : {
186 23 : element_mode = IVAS_CPE_DFT;
187 : }
188 23 : hStereoClassif->xtalk_decision = 0;
189 23 : hStereoClassif->lrtd_mode = 0;
190 : }
191 : }
192 :
193 304035 : if ( hCPE->last_element_mode != element_mode )
194 : {
195 1935 : if ( hCPE->last_element_mode != IVAS_CPE_DFT && hCPE->last_element_mode != IVAS_CPE_TD )
196 817 : {
197 817 : int16_t lrtd_mode = hStereoClassif->lrtd_mode;
198 :
199 : /* reset stereo classifier when switching from MDCT stereo to Unified stereo */
200 817 : stereo_classifier_init( hCPE->hStereoClassif );
201 :
202 817 : hStereoClassif->lrtd_mode = lrtd_mode;
203 : }
204 : else
205 : {
206 : /* reset UNCLR classifier parameters */
207 1118 : set_f( hStereoClassif->unclr_fv, -1.0f, SSC_MAX_NFEA );
208 1118 : hStereoClassif->unclr_corrLagMax_prev = 0;
209 :
210 : /* reset xtalk classifier parameters */
211 1118 : set_f( hStereoClassif->xtalk_fv, -1.0f, SSC_MAX_NFEA );
212 : }
213 : }
214 : #ifdef DEBUG_MODE_TD
215 : dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, L_FRAME16k, "res/unclr_decision.enc" );
216 : dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, L_FRAME16k, "res/xtalk_decision.enc" );
217 : dbgwrite( &hCPE->hCoreCoder[0]->sp_aud_decision0, sizeof( int16_t ), 1, L_FRAME16k, "res/sp_aud_class.enc" );
218 : dbgwrite( &hCPE->hCoreCoder[0]->last_core, sizeof( int16_t ), 1, L_FRAME16k, "res/last_core.enc" );
219 : #endif
220 :
221 304035 : if ( element_mode == IVAS_CPE_TD && hCPE->hCoreCoder[0]->Opt_DTX_ON )
222 : {
223 2025 : hCPE->hStereoCng->td_active = 1;
224 2025 : hCPE->hStereoCng->first_SID_after_TD = 1;
225 : }
226 :
227 304035 : return ( element_mode );
228 : }
229 :
230 :
231 : /*-------------------------------------------------------------------*
232 : * Function stereo_classifier_init()
233 : *
234 : * Initialize stereo classifier handle
235 : *-------------------------------------------------------------------*/
236 :
237 11522 : void stereo_classifier_init(
238 : STEREO_CLASSIF_HANDLE hStereoClassif /* i/o: stereo classifier structure */
239 : )
240 : {
241 : /* initialization of features for xtalk classifier and UNCLR classifier */
242 11522 : hStereoClassif->clas_ch1 = 0;
243 11522 : set_s( hStereoClassif->pitch_ch1, 0, 3 );
244 11522 : set_f( hStereoClassif->voicing_ch1, 0.0f, 3 );
245 11522 : hStereoClassif->cor_map_sum_ch1 = 0.0f;
246 11522 : set_f( hStereoClassif->lsf_ch1, 0.0f, M );
247 11522 : hStereoClassif->lepsP_ch1 = 0.0f;
248 11522 : hStereoClassif->dE1_ch1 = 0.0f;
249 11522 : hStereoClassif->dE1_ch2 = 0.0f;
250 11522 : hStereoClassif->nchar_ch1 = 0.0f;
251 11522 : hStereoClassif->nchar_ch2 = 0.0f;
252 11522 : hStereoClassif->non_sta_ch1 = 0.0f;
253 11522 : hStereoClassif->sp_div_ch1 = 0.0f;
254 11522 : hStereoClassif->ps_diff_ch1 = 0.0f;
255 11522 : hStereoClassif->ps_diff_ch2 = 0.0f;
256 11522 : hStereoClassif->ps_sta_ch1 = 0.0f;
257 11522 : hStereoClassif->ps_sta_ch2 = 0.0f;
258 11522 : hStereoClassif->prev_g_IPD = 0.5f;
259 11522 : hStereoClassif->prev_IPD = 0.0f;
260 11522 : hStereoClassif->prev_ratio_m1_m2 = 0.0f;
261 11522 : set_f( hStereoClassif->xtalk_score_buf, 0.0f, XTALK_SCORE_BUF_LEN );
262 11522 : hStereoClassif->ratio_L = 0.5f;
263 11522 : hStereoClassif->vad_flag_glob = 0;
264 11522 : hStereoClassif->vad_relE = 0;
265 11522 : hStereoClassif->is_speech = 0.0f;
266 :
267 11522 : set_s( hStereoClassif->aEn_raw, 0, CPE_CHANNELS );
268 :
269 11522 : hStereoClassif->Etot_dn = 0.0f;
270 11522 : hStereoClassif->Etot_up = 0.0f;
271 :
272 11522 : set_f( hStereoClassif->relE_buf, 0.0f, UNCLR_L_RELE );
273 11522 : set_f( hStereoClassif->Etot_buf, 0.0f, UNCLR_L_ETOT );
274 11522 : set_f( hStereoClassif->unclr_relE_0_1_LT, 0.0f, UNCLR_RC_ORDER );
275 :
276 11522 : hStereoClassif->unclr_sw_enable_cnt[0] = 0;
277 11522 : hStereoClassif->unclr_sw_enable_cnt[1] = 0;
278 :
279 11522 : hStereoClassif->unclr_decision = 0;
280 11522 : hStereoClassif->unclr_wscore = 0.0f;
281 :
282 11522 : set_f( hStereoClassif->unclr_fv, -1.0f, SSC_MAX_NFEA );
283 11522 : hStereoClassif->unclr_corrLagMax_prev = 0;
284 11522 : hStereoClassif->ave_ener_L = 0;
285 11522 : hStereoClassif->ave_ener_R = 0;
286 11522 : hStereoClassif->relE_0_1 = 0.01f;
287 11522 : hStereoClassif->relE_0_1_LT = 0.01f;
288 :
289 11522 : set_f( hStereoClassif->xtalk_fv, -1.0f, SSC_MAX_NFEA );
290 11522 : hStereoClassif->xtalk_wscore = 0.0f;
291 11522 : hStereoClassif->xtalk_decision = 0;
292 11522 : hStereoClassif->xtalk_score_wrelE = 0.0f;
293 :
294 11522 : hStereoClassif->lrtd_mode = 0;
295 11522 : hStereoClassif->prev_lrtd_mode = 0;
296 :
297 11522 : hStereoClassif->silence_flag = 0;
298 :
299 11522 : return;
300 : }
301 :
302 :
303 : /*-----------------------------------------------------------------*
304 : * stereo_classifier_features()
305 : *
306 : * Collect features for stereo classifiers
307 : *-----------------------------------------------------------------*/
308 :
309 2279128 : void stereo_classifier_features(
310 : STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure */
311 : const int16_t idchan, /* i : channel ID */
312 : const int16_t element_mode, /* i : element mode */
313 : const int16_t vad_flag, /* i : VAD flag */
314 : const float lsf_new[], /* i : LSFs at the end of the frame */
315 : const float epsP[], /* i : LP analysis residual energies for each iteration*/
316 : const int16_t pitch[], /* i : open-loop pitch values for quantiz. */
317 : const float voicing[], /* i : OL maximum normalized correlation */
318 : const float cor_map_sum, /* i : speech/music clasif. parameter */
319 : const float non_staX, /* i : unbound non-stationarity for sp/mu clas. */
320 : const float sp_div, /* i : spectral diversity feature */
321 : const int16_t clas /* i : signal class */
322 : )
323 : {
324 : int16_t i, clas_ch2;
325 : float lepsP_ch2, ener_l, ener_r;
326 :
327 : /* combine VAD flags from both channels */
328 2279128 : if ( idchan == 0 )
329 : {
330 1194655 : hStereoClassif->vad_flag_glob = vad_flag;
331 : }
332 : else
333 : {
334 1084473 : hStereoClassif->vad_flag_glob |= vad_flag;
335 : }
336 :
337 2279128 : if ( ( element_mode == IVAS_CPE_DFT && idchan == 0 ) || ( element_mode == IVAS_CPE_TD && idchan == 1 ) )
338 : {
339 : /* update Etot_up and Etot_dn based on aEn */
340 114320 : ener_l = 10.0f * log10f( hStereoClassif->ave_ener_L + 1.0f );
341 114320 : ener_r = 10.0f * log10f( hStereoClassif->ave_ener_R + 1.0f );
342 114320 : mvr2r( &hStereoClassif->Etot_buf[0], &hStereoClassif->Etot_buf[1], UNCLR_L_ETOT - 1 );
343 114320 : hStereoClassif->Etot_buf[0] = max( 0, max( ener_l, ener_r ) );
344 :
345 114320 : if ( hStereoClassif->aEn_raw[0] == 6 || ( element_mode == IVAS_CPE_TD && hStereoClassif->aEn_raw[1] == 6 ) )
346 : {
347 : /* active signal, update upper bound */
348 81659 : if ( hStereoClassif->Etot_buf[0] < hStereoClassif->Etot_up )
349 : {
350 : /* energy decreases -> slower update */
351 37841 : hStereoClassif->Etot_up = 0.99f * hStereoClassif->Etot_up + 0.01f * hStereoClassif->Etot_buf[0];
352 : }
353 : else
354 : {
355 : /* energy increases -> faster update */
356 43818 : hStereoClassif->Etot_up = 0.95f * hStereoClassif->Etot_up + 0.05f * hStereoClassif->Etot_buf[0];
357 : }
358 : }
359 32661 : else if ( ( element_mode == IVAS_CPE_DFT && hStereoClassif->aEn_raw[0] == 0 ) ||
360 232 : ( element_mode == IVAS_CPE_TD && hStereoClassif->aEn_raw[0] == 0 && hStereoClassif->aEn_raw[1] == 0 ) )
361 : {
362 : /* inactive signal, update lower bound */
363 19552 : if ( hStereoClassif->Etot_buf[0] < hStereoClassif->Etot_dn )
364 : {
365 : /* energy decreases -> faster update */
366 7178 : hStereoClassif->Etot_dn = 0.9f * hStereoClassif->Etot_dn + 0.1f * hStereoClassif->Etot_buf[0];
367 : }
368 : else
369 : {
370 : /* energy increases -> slower update */
371 12374 : hStereoClassif->Etot_dn = 0.95f * hStereoClassif->Etot_dn + 0.05f * hStereoClassif->Etot_buf[0];
372 : }
373 :
374 19552 : if ( hStereoClassif->Etot_dn < 30.0f )
375 : {
376 : /* do not decrease below lower bound threshold */
377 3231 : hStereoClassif->Etot_dn = 30.0f;
378 : }
379 : }
380 :
381 : /* upper bound is too low and close to lower bound -> update it */
382 114320 : if ( hStereoClassif->Etot_up < hStereoClassif->Etot_dn + 20.0f )
383 : {
384 17654 : hStereoClassif->Etot_up = hStereoClassif->Etot_dn + 20.0f;
385 : }
386 :
387 : /* normalize Etot to (0,1) */
388 114320 : hStereoClassif->relE_0_1 = lin_interp( hStereoClassif->Etot_buf[0], hStereoClassif->Etot_dn, 0.0f, hStereoClassif->Etot_up, 0.9f, 1 );
389 :
390 : /* update relE_buf */
391 114320 : mvr2r( &hStereoClassif->relE_buf[0], &hStereoClassif->relE_buf[1], UNCLR_L_RELE - 1 );
392 114320 : hStereoClassif->relE_buf[0] = hStereoClassif->relE_0_1;
393 :
394 114320 : if ( hStereoClassif->relE_0_1 >= hStereoClassif->relE_buf[1] )
395 : {
396 81189 : hStereoClassif->relE_0_1_LT = 0.9f * hStereoClassif->relE_0_1_LT + 0.1f * hStereoClassif->relE_0_1;
397 : }
398 : else
399 : {
400 33131 : hStereoClassif->relE_0_1_LT = 0.95f * hStereoClassif->relE_0_1_LT + 0.05f * hStereoClassif->relE_0_1;
401 : }
402 :
403 : /* estimate VAD flag based on relative energy */
404 114320 : if ( hStereoClassif->relE_0_1_LT < 0.1f )
405 : {
406 17550 : hStereoClassif->vad_relE = 0;
407 : }
408 : else
409 : {
410 96770 : hStereoClassif->vad_relE = 1;
411 : }
412 :
413 : /* combine classical VAD flag with VAD flag based on relative energy */
414 114320 : hStereoClassif->vad_flag_glob &= hStereoClassif->vad_relE;
415 : }
416 :
417 2279128 : if ( idchan == 0 )
418 : {
419 1194655 : mvr2r( lsf_new, hStereoClassif->lsf_ch1, M );
420 1194655 : hStereoClassif->lepsP_ch1 = logf( epsP[13] + 1e-5f ) - logf( epsP[0] + 1e-5f );
421 :
422 1194655 : hStereoClassif->xtalk_fv[E_lsf_1] = lsf_new[0];
423 1194655 : hStereoClassif->xtalk_fv[E_lsf_4] = lsf_new[3];
424 1194655 : hStereoClassif->xtalk_fv[E_lsf_9] = lsf_new[8];
425 1194655 : hStereoClassif->xtalk_fv[E_lsf_14] = lsf_new[13];
426 1194655 : hStereoClassif->xtalk_fv[E_lepsP_13] = hStereoClassif->lepsP_ch1;
427 : }
428 : else
429 : {
430 1084473 : hStereoClassif->xtalk_fv[E_sum_d_LSF] = 0;
431 18436041 : for ( i = 0; i < M; i++ )
432 : {
433 17351568 : hStereoClassif->xtalk_fv[E_sum_d_LSF] += fabsf( lsf_new[i] - hStereoClassif->lsf_ch1[i] );
434 : }
435 :
436 1084473 : if ( hStereoClassif->vad_flag_glob && ( hStereoClassif->ratio_L > 0.0f && hStereoClassif->ratio_L < 1.0f ) )
437 : {
438 1006866 : hStereoClassif->xtalk_fv[E_sum_d_LSF] = 0.0f;
439 : }
440 1084473 : lepsP_ch2 = logf( epsP[13] + 1e-5f ) - logf( epsP[0] + 1e-5f );
441 1084473 : hStereoClassif->xtalk_fv[E_d_lepsP_13] = fabsf( hStereoClassif->lepsP_ch1 - lepsP_ch2 );
442 : }
443 :
444 2279128 : if ( idchan == 0 )
445 : {
446 1194655 : mvs2s( pitch, hStereoClassif->pitch_ch1, 3 );
447 1194655 : mvr2r( voicing, hStereoClassif->voicing_ch1, 3 );
448 :
449 1194655 : hStereoClassif->xtalk_fv[E_pitch] = 1 / 3.0f * ( pitch[0] + pitch[1] + pitch[2] );
450 1194655 : hStereoClassif->xtalk_fv[E_voicing] = 1 / 3.0f * ( voicing[0] + voicing[1] + voicing[2] );
451 : }
452 : else
453 : {
454 1084473 : hStereoClassif->xtalk_fv[E_d_pitch] = 0;
455 1084473 : hStereoClassif->xtalk_fv[E_d_voicing] = 0;
456 4337892 : for ( i = 0; i < 3; i++ )
457 : {
458 3253419 : hStereoClassif->xtalk_fv[E_d_pitch] += (float) abs( pitch[i] - hStereoClassif->pitch_ch1[i] );
459 3253419 : hStereoClassif->xtalk_fv[E_d_voicing] += fabsf( voicing[i] - hStereoClassif->voicing_ch1[i] );
460 : }
461 1084473 : hStereoClassif->xtalk_fv[E_d_pitch] /= 3.0f;
462 1084473 : hStereoClassif->xtalk_fv[E_d_voicing] /= 3.0f;
463 : }
464 :
465 2279128 : if ( idchan == 0 )
466 : {
467 1194655 : hStereoClassif->cor_map_sum_ch1 = cor_map_sum;
468 1194655 : hStereoClassif->non_sta_ch1 = non_staX;
469 1194655 : hStereoClassif->sp_div_ch1 = sp_div;
470 :
471 1194655 : hStereoClassif->xtalk_fv[E_cor_map_sum] = cor_map_sum;
472 1194655 : hStereoClassif->xtalk_fv[E_nchar] = logf( hStereoClassif->nchar_ch1 + 1.0f );
473 1194655 : hStereoClassif->xtalk_fv[E_non_sta] = non_staX;
474 1194655 : hStereoClassif->xtalk_fv[E_sp_div] = logf( sp_div + 1.0f );
475 : }
476 : else
477 : {
478 1084473 : hStereoClassif->xtalk_fv[E_d_cor_map_sum] = fabsf( hStereoClassif->cor_map_sum_ch1 - cor_map_sum );
479 1084473 : hStereoClassif->xtalk_fv[E_d_nchar] = fabsf( logf( hStereoClassif->nchar_ch1 + 1.0f ) - logf( hStereoClassif->nchar_ch2 + 1.0f ) );
480 1084473 : hStereoClassif->xtalk_fv[E_d_non_sta] = fabsf( hStereoClassif->non_sta_ch1 - non_staX );
481 1084473 : hStereoClassif->xtalk_fv[E_d_sp_div] = fabsf( logf( hStereoClassif->sp_div_ch1 + 1.0f ) - logf( sp_div + 1.0f ) );
482 : }
483 :
484 2279128 : if ( idchan == 0 )
485 : {
486 1194655 : hStereoClassif->xtalk_fv[E_dE1] = logf( hStereoClassif->dE1_ch1 + 1.0f );
487 : }
488 : else
489 : {
490 1084473 : hStereoClassif->xtalk_fv[E_d_dE1] = fabsf( logf( hStereoClassif->dE1_ch1 + 1.0f ) - logf( hStereoClassif->dE1_ch2 + 1.0f ) );
491 : }
492 :
493 2279128 : if ( idchan == 0 )
494 : {
495 1194655 : if ( clas > VOICED_CLAS )
496 : {
497 71032 : hStereoClassif->clas_ch1 = VOICED_CLAS;
498 : }
499 1123623 : else if ( clas < VOICED_CLAS )
500 : {
501 653814 : hStereoClassif->clas_ch1 = UNVOICED_CLAS;
502 : }
503 : else
504 : {
505 469809 : hStereoClassif->clas_ch1 = clas;
506 : }
507 :
508 1194655 : hStereoClassif->xtalk_fv[E_clas] = hStereoClassif->clas_ch1;
509 : }
510 : else
511 : {
512 1084473 : if ( clas > VOICED_CLAS )
513 : {
514 51816 : clas_ch2 = VOICED_CLAS;
515 : }
516 1032657 : else if ( clas < VOICED_CLAS )
517 : {
518 693568 : clas_ch2 = UNVOICED_CLAS;
519 : }
520 : else
521 : {
522 339089 : clas_ch2 = clas;
523 : }
524 :
525 1084473 : hStereoClassif->xtalk_fv[E_d_clas] = (float) abs( hStereoClassif->clas_ch1 - clas_ch2 );
526 : }
527 :
528 2279128 : if ( idchan == 0 )
529 : {
530 1194655 : hStereoClassif->xtalk_fv[E_ps_diff] = hStereoClassif->ps_diff_ch1;
531 1194655 : hStereoClassif->xtalk_fv[E_ps_sta] = hStereoClassif->ps_sta_ch1;
532 : }
533 : else
534 : {
535 1084473 : hStereoClassif->xtalk_fv[E_d_ps_diff] = fabsf( hStereoClassif->ps_diff_ch1 - hStereoClassif->ps_diff_ch2 );
536 1084473 : hStereoClassif->xtalk_fv[E_d_ps_sta] = fabsf( hStereoClassif->ps_sta_ch1 - hStereoClassif->ps_sta_ch2 );
537 : }
538 :
539 2279128 : return;
540 : }
541 :
542 :
543 : /*-------------------------------------------------------------------*
544 : * Function unclr_classifier_td()
545 : *
546 : * Classify current TD frame as uncorrelated L/R (1) or normal (0)
547 : *-------------------------------------------------------------------*/
548 :
549 4138 : void unclr_classifier_td(
550 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
551 : )
552 : {
553 : int16_t i, ind;
554 : float relE_ST, edge, edge_0_1;
555 : float score, fvn[SSC_MAX_NFEA];
556 : #ifdef DEBUG_MODE_TD
557 : int16_t dec;
558 : #endif
559 4138 : STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
560 :
561 4138 : set_f( fvn, -1.0f, SSC_MAX_NFEA );
562 :
563 : /* calcualte raw score based on LR */
564 4138 : score = UNCLR_INTERCEPT_TD;
565 45518 : for ( i = 0; i < SIZE_UNCLR_ISEL_TD; i++ )
566 : {
567 41380 : ind = unclr_isel_td[i];
568 :
569 : /* mean & std removal */
570 41380 : fvn[i] = ( hStereoClassif->unclr_fv[ind] - unclr_mean_td[i] ) / unclr_scale_td[i];
571 :
572 : /* LR */
573 41380 : score += fvn[i] * unclr_coef_td[i];
574 : }
575 :
576 : #ifdef DEBUG_MODE_TD
577 : /* raw decision */
578 : dec = score > 0;
579 : #endif
580 :
581 : /* normalize score to -1:+1 */
582 4138 : if ( score > UNCLR_SCORE_THR )
583 : {
584 399 : score = UNCLR_SCORE_THR;
585 : }
586 3739 : else if ( score < -UNCLR_SCORE_THR )
587 : {
588 750 : score = -UNCLR_SCORE_THR;
589 : }
590 4138 : score /= 2 * UNCLR_SCORE_THR;
591 :
592 : /* weight raw score with relative energy */
593 4138 : score *= hStereoClassif->relE_0_1;
594 :
595 : /* rising edge detection on relE */
596 4138 : relE_ST = mean( hStereoClassif->relE_buf, UNCLR_L_RELE );
597 4138 : if ( hStereoClassif->relE_0_1 > relE_ST )
598 : {
599 1915 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_UP );
600 : }
601 : else
602 : {
603 2223 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_DOWN );
604 : }
605 :
606 4138 : edge = hStereoClassif->relE_0_1 - hStereoClassif->unclr_relE_0_1_LT[UNCLR_RC_ORDER - 1];
607 4138 : edge_0_1 = lin_interp( edge, 0.0f, 0.95f, 1.0f, 0.9f, 1 );
608 :
609 : /* LT average */
610 4138 : hStereoClassif->unclr_wscore = edge_0_1 * hStereoClassif->unclr_wscore + ( 1 - edge_0_1 ) * score;
611 :
612 : /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
613 4138 : if ( ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->unclr_wscore > 0.1f ) || ( hStereoClassif->unclr_decision == 1 && hStereoClassif->unclr_wscore < -0.07f ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) )
614 : {
615 : /* let's switch the binary decision */
616 7 : hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision;
617 : }
618 :
619 : #ifdef DEBUG_MODE_TD
620 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/unclr_dec.x" );
621 : dbgwrite( &hStereoClassif->unclr_wscore, sizeof( float ), 1, 1, "res/unclr_wscore.x" );
622 : dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, 1, "res/unclr_dec_hyst.x" );
623 : #endif
624 :
625 4138 : return;
626 : }
627 :
628 :
629 : /*-------------------------------------------------------------------*
630 : * Function unclr_classifier_dft()
631 : *
632 : * Classifies current DFT frame as uncorrelated L/R (1) or normal stereo (0)
633 : *-------------------------------------------------------------------*/
634 :
635 110182 : void unclr_classifier_dft(
636 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
637 : )
638 : {
639 : int16_t i, ind;
640 : float edge, relE_ST, edge_0_1;
641 : float score, fvn[SSC_MAX_NFEA];
642 : #ifdef DEBUG_MODE_TD
643 : int16_t dec;
644 : #endif
645 :
646 110182 : STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
647 :
648 : /* calculate raw score based on LR */
649 110182 : score = UNCLR_INTERCEPT_DFT;
650 991638 : for ( i = 0; i < SIZE_UNCLR_ISEL_DFT; i++ )
651 : {
652 881456 : ind = unclr_isel_dft[i];
653 :
654 : /* mean & std removal */
655 881456 : fvn[i] = ( hStereoClassif->unclr_fv[ind] - unclr_mean_dft[i] ) / unclr_scale_dft[i];
656 :
657 : /* LR */
658 881456 : score += fvn[i] * unclr_coef_dft[i];
659 : }
660 :
661 : #ifdef DEBUG_MODE_TD
662 : /* raw decision */
663 : dec = score > 0;
664 : #endif
665 :
666 : /* normalize score to -1:+1 */
667 110182 : if ( score > UNCLR_SCORE_THR )
668 : {
669 3651 : score = UNCLR_SCORE_THR;
670 : }
671 106531 : else if ( score < -UNCLR_SCORE_THR )
672 : {
673 69881 : score = -UNCLR_SCORE_THR;
674 : }
675 110182 : score /= 2 * UNCLR_SCORE_THR;
676 :
677 : /* weight raw score with relative energy */
678 110182 : score *= hStereoClassif->relE_0_1;
679 :
680 110182 : if ( !hStereoClassif->vad_flag_glob )
681 : {
682 24693 : score = 0;
683 : }
684 :
685 : /* rising edge detector on relE */
686 110182 : relE_ST = mean( hStereoClassif->relE_buf, UNCLR_L_RELE );
687 110182 : if ( hStereoClassif->relE_0_1 > relE_ST )
688 : {
689 47076 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_UP );
690 : }
691 : else
692 : {
693 63106 : rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_DOWN );
694 : }
695 :
696 110182 : edge = hStereoClassif->relE_0_1 - hStereoClassif->unclr_relE_0_1_LT[UNCLR_RC_ORDER - 1];
697 110182 : edge_0_1 = lin_interp( edge, 0.0f, 0.95f, 1.0f, 0.9f, 1 );
698 :
699 : /* LT average */
700 110182 : hStereoClassif->unclr_wscore = edge_0_1 * hStereoClassif->unclr_wscore + ( 1 - edge_0_1 ) * score;
701 :
702 : /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
703 110182 : if ( ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->unclr_wscore > 0.1f ) || ( hStereoClassif->unclr_decision == 1 && hStereoClassif->unclr_wscore < -0.07f ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 ) )
704 : {
705 : /* let's switch the binary decision */
706 1639 : hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision;
707 : }
708 :
709 : #ifdef DEBUG_MODE_TD
710 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/unclr_dec.x" );
711 : dbgwrite( &hStereoClassif->unclr_wscore, sizeof( float ), 1, 1, "res/unclr_wscore.x" );
712 : dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, 1, "res/unclr_dec_hyst.x" );
713 : #endif
714 :
715 110182 : return;
716 : }
717 :
718 :
719 : /*-------------------------------------------------------------------*
720 : * Function xtalk_classifier_td()
721 : *
722 : * Classify current TD frame as cross-talk frame (1) or normal stereo frame (0)
723 : *-------------------------------------------------------------------*/
724 :
725 4138 : void xtalk_classifier_td(
726 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
727 : )
728 : {
729 : int16_t i, ind, edge_type;
730 : float score, fvn[SSC_MAX_NFEA];
731 : float edge, edge_0_1, wedge, scr_min, scr_max, wrelE;
732 :
733 : #ifdef DEBUG_MODE_TD
734 : int16_t dec;
735 : #endif
736 :
737 4138 : STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
738 :
739 4138 : set_f( fvn, -1.0f, SSC_MAX_NFEA );
740 :
741 : /* calcualte raw score based on LR */
742 4138 : score = XTALK_INTERCEPT_TD;
743 74484 : for ( i = 0; i < SIZE_XTALK_ISEL_TD; i++ )
744 : {
745 70346 : ind = xtalk_isel_td[i];
746 :
747 : /* mean & std removal */
748 70346 : fvn[i] = ( hStereoClassif->xtalk_fv[ind] - xtalk_mean_td[i] ) / xtalk_scale_td[i];
749 :
750 : /* LR */
751 70346 : score += fvn[i] * xtalk_coef_td[i];
752 : }
753 :
754 : /* normalize raw score to -1:+1 */
755 4138 : if ( score > XTALK_SCORE_THR_TD_UP )
756 : {
757 1217 : score = 1.0f;
758 : }
759 2921 : else if ( score < -XTALK_SCORE_THR_TD_DN )
760 : {
761 137 : score = -1.0f;
762 : }
763 2784 : else if ( score > 0.0f )
764 : {
765 1888 : score /= XTALK_SCORE_THR_TD_UP;
766 : }
767 : else
768 : {
769 896 : score /= XTALK_SCORE_THR_TD_DN;
770 : }
771 :
772 4138 : if ( hCPE->last_element_mode == IVAS_CPE_DFT )
773 : {
774 : /* overwrite score if we have just switched from DFT stereo */
775 61 : score = hStereoClassif->xtalk_score;
776 : }
777 : else
778 : {
779 4077 : hStereoClassif->xtalk_score = score;
780 : }
781 :
782 4138 : if ( !hStereoClassif->vad_flag_glob )
783 : {
784 : /* reset score to 0 in inactive segments */
785 220 : score = 0;
786 : }
787 :
788 : #ifdef DEBUG_MODE_TD
789 : /* raw decision */
790 : dec = score > 0;
791 : #endif
792 :
793 : /* weight raw score with relative energy */
794 4138 : wrelE = lin_interp( hStereoClassif->relE_0_1, 0.5f, 0.95f, 0.9f, 0.0f, 1 );
795 4138 : hStereoClassif->xtalk_score_wrelE = wrelE * hStereoClassif->xtalk_score_wrelE + ( 1 - wrelE ) * score;
796 4138 : score = hStereoClassif->xtalk_score_wrelE;
797 :
798 : /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the raw score buffer */
799 4138 : mvr2r( &hStereoClassif->xtalk_score_buf[0], &hStereoClassif->xtalk_score_buf[1], XTALK_SCORE_BUF_LEN - 1 );
800 4138 : hStereoClassif->xtalk_score_buf[0] = score;
801 :
802 4138 : minimum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_min );
803 4138 : maximum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_max );
804 :
805 : #ifdef DEBUG_MODE_TD
806 : edge_type = 0;
807 : edge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f, &edge, &edge_type );
808 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.6f, 0.0f, 1 );
809 : dbgwrite( &score, sizeof( float ), 1, 1, "res/score.x" );
810 : dbgwrite( &edge_0_1, sizeof( float ), 1, 1, "res/edge_0_1.x" );
811 : #endif
812 :
813 4138 : if ( ( ( scr_min < 0.0f && scr_max > 0.2f ) || ( scr_max - scr_min > 0.5f ) ) )
814 : {
815 : /* test rising edge (use 0 as edge_type because of newer->older buffer samples ordering) */
816 978 : edge_type = 0;
817 978 : edge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f, &edge, &edge_type );
818 :
819 978 : if ( edge_type == 0 && edge < 0.3f )
820 : {
821 : /* normalize edge to 0-1 interval */
822 619 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.6f, 0.0f, 1 );
823 : }
824 : else
825 : {
826 359 : edge_0_1 = 0;
827 : }
828 : }
829 : else
830 : {
831 3160 : edge_0_1 = 0.0f;
832 : }
833 :
834 :
835 : /* weight raw score based on rising edge detector */
836 4138 : wedge = lin_interp( edge_0_1, 0.0f, 0.9f, 1.0f, 0.5f, 1 );
837 :
838 4138 : hStereoClassif->xtalk_wscore = wedge * hStereoClassif->xtalk_wscore + ( 1 - wedge ) * score;
839 :
840 4138 : if ( !hStereoClassif->vad_flag_glob )
841 : {
842 220 : hStereoClassif->xtalk_wscore = 0;
843 : }
844 :
845 : /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
846 4138 : if ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->xtalk_decision == 0 && hStereoClassif->xtalk_wscore > 0.03f ) /*|| (hStereoClassif->xtalk_decision == 1 && hStereoClassif->xtalk_wscore < 0.00f)*/ && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) )
847 : {
848 : /* let's switch the binary decision */
849 15 : hStereoClassif->xtalk_decision = !hStereoClassif->xtalk_decision;
850 : }
851 :
852 : #ifdef DEBUG_MODE_TD
853 : dbgwrite( &hStereoClassif->xtalk_wscore, sizeof( float ), 1, 1, "res/xtalk_wscore.x" );
854 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/xtalk_dec.x" );
855 : dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, 1, "res/xtalk_dec_hyst.x" );
856 : #endif
857 :
858 4138 : return;
859 : }
860 :
861 :
862 : /*-------------------------------------------------------------------*
863 : * Function xtalk_classifier_dft()
864 : *
865 : * Classify current DFT frame as cross-talk frame (1) or normal stereo frame (0)
866 : *-------------------------------------------------------------------*/
867 :
868 128227 : void xtalk_classifier_dft(
869 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
870 : const int16_t itd, /* i : ITD from DFT stereo - used as a feature */
871 : const float gcc_phat[] /* i : GPHAT cross-channel correlation function*/
872 : )
873 : {
874 : int16_t i, ind, itd2, thr;
875 : float score, m1, m2;
876 : STEREO_CLASSIF_HANDLE hStereoClassif;
877 : ITD_DATA_HANDLE hItd;
878 : float fvn[SSC_MAX_NFEA], edge, edge_0_1, wedge;
879 : float ratio_m1_m2, m2_m2, d_itd2, itd1_flip;
880 : float scr_min, scr_max;
881 : #ifdef DEBUG_MODE_TD
882 : int16_t dec;
883 : #endif
884 :
885 128227 : hStereoClassif = hCPE->hStereoClassif;
886 128227 : hItd = ( hCPE->hStereoDft != NULL ) ? hCPE->hStereoDft->hItd : hCPE->hStereoMdct->hItd;
887 :
888 128227 : m1 = 0.0f;
889 128227 : m2 = 0.0f;
890 :
891 128227 : itd2 = 0;
892 128227 : thr = ( hCPE->element_brate >= IVAS_32k ? 2 : 1 ) * CLASSIFIER_ITD_THRES;
893 :
894 128227 : if ( itd > thr )
895 : {
896 18009 : m1 = fabsf( gcc_phat[itd + XTALK_PHAT_LEN] );
897 18009 : m2 = fabsf( gcc_phat[0] );
898 18009 : itd2 = -XTALK_PHAT_LEN;
899 3398608 : for ( i = 1; i < XTALK_PHAT_LEN - thr; i++ )
900 : {
901 3380599 : if ( fabsf( gcc_phat[i] ) > m2 )
902 : {
903 146522 : itd2 = -XTALK_PHAT_LEN + i;
904 146522 : m2 = fabsf( gcc_phat[i] );
905 : }
906 : }
907 : }
908 110218 : else if ( itd < -thr )
909 : {
910 12286 : m1 = fabsf( gcc_phat[itd + XTALK_PHAT_LEN] );
911 12286 : m2 = fabsf( gcc_phat[XTALK_PHAT_LEN + thr + 1] );
912 12286 : itd2 = thr + 1;
913 2308712 : for ( i = XTALK_PHAT_LEN + thr + 2; i < 2 * XTALK_PHAT_LEN + 1; i++ )
914 : {
915 2296426 : if ( fabsf( gcc_phat[i] ) > m2 )
916 : {
917 63046 : itd2 = -XTALK_PHAT_LEN + i;
918 63046 : m2 = fabsf( gcc_phat[i] );
919 : }
920 : }
921 : }
922 :
923 128227 : ratio_m1_m2 = fabsf( m1 * m2 ) / fabsf( m1 + m2 + 1.0f );
924 128227 : m2_m2 = hItd->prev_m2 * m2;
925 128227 : d_itd2 = (float) abs( itd2 - hItd->prev_itd2 );
926 128227 : itd1_flip = (float) ( max( itd, hItd->prev_itd1 ) * ( -min( itd, hItd->prev_itd1 ) ) );
927 :
928 :
929 128227 : hStereoClassif->xtalk_fv[E_gphat_d_itd2] = d_itd2;
930 128227 : hStereoClassif->xtalk_fv[E_gphat_itd1_flip] = itd1_flip;
931 128227 : hStereoClassif->xtalk_fv[E_gphat_ratio_m1_m2] = ratio_m1_m2 * hStereoClassif->prev_ratio_m1_m2;
932 128227 : hStereoClassif->xtalk_fv[E_gphat_m2_m2] = m2_m2;
933 :
934 128227 : hStereoClassif->prev_ratio_m1_m2 = ratio_m1_m2;
935 :
936 128227 : set_f( fvn, -1.0f, SSC_MAX_NFEA );
937 :
938 : /* calcualte raw score based on LR */
939 128227 : score = XTALK_INTERCEPT_DFT;
940 1538724 : for ( i = 0; i < SIZE_XTALK_ISEL_DFT; i++ )
941 : {
942 1410497 : ind = xtalk_isel_dft[i];
943 :
944 : /* mean & std removal */
945 1410497 : fvn[i] = ( hStereoClassif->xtalk_fv[ind] - xtalk_mean_dft[i] ) / xtalk_scale_dft[i];
946 :
947 : /* LR */
948 1410497 : score += fvn[i] * xtalk_coef_dft[i];
949 : }
950 :
951 : /* normalize score to -1:+1 */
952 128227 : if ( score > XTALK_SCORE_THR_DFT )
953 : {
954 2989 : score = 1.0f;
955 : }
956 125238 : else if ( score < -XTALK_SCORE_THR_DFT )
957 : {
958 11412 : score = -1.0f;
959 : }
960 : else
961 : {
962 113826 : score /= XTALK_SCORE_THR_DFT;
963 : }
964 :
965 : /* raw score */
966 128227 : hStereoClassif->xtalk_score = score;
967 :
968 128227 : if ( !hStereoClassif->vad_flag_glob )
969 : {
970 29440 : score = 0;
971 : }
972 :
973 : #ifdef DEBUG_MODE_TD
974 : /* raw decision */
975 : dec = score > 0;
976 : #endif
977 :
978 :
979 : /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the given buffer */
980 128227 : mvr2r( &hStereoClassif->xtalk_score_buf[0], &hStereoClassif->xtalk_score_buf[1], XTALK_SCORE_BUF_LEN - 1 );
981 128227 : hStereoClassif->xtalk_score_buf[0] = score;
982 :
983 128227 : minimum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_min );
984 128227 : maximum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_max );
985 :
986 : #ifdef DEBUG_MODE_TD
987 : edge = redge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f );
988 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.83f, 0.0f, 1 );
989 : dbgwrite( &score, sizeof( float ), 1, 1, "res/score.x" );
990 : dbgwrite( &edge_0_1, sizeof( float ), 1, 1, "res/edge_0_1.x" );
991 : #endif
992 :
993 128227 : if ( scr_min < 0.2f && scr_max > 0.0f )
994 : {
995 27324 : edge = redge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f );
996 27324 : edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.83f, 0.0f, 1 );
997 : }
998 : else
999 : {
1000 100903 : edge_0_1 = 0.0f;
1001 : }
1002 :
1003 :
1004 : /* weight raw score based on rising edge detector */
1005 128227 : wedge = lin_interp( edge_0_1, 0.0f, 0.95f, 1.0f, 0.3f, 1 );
1006 128227 : hStereoClassif->xtalk_wscore = wedge * hStereoClassif->xtalk_wscore + ( 1 - wedge ) * score;
1007 :
1008 128227 : if ( ( itd == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 ) )
1009 : {
1010 73983 : hStereoClassif->xtalk_decision = 0;
1011 : }
1012 54244 : else if ( hCPE->element_brate >= IVAS_24k4 &&
1013 32030 : hStereoClassif->xtalk_decision == 0 && ( ( m1 * 0.8 < m2 && hItd->prev_m1 * 0.8 < hItd->prev_m2 && abs( itd2 - hItd->prev_itd2 ) < 4 && m1 > 0.15 && hItd->prev_m1 > 0.15 ) || ( hStereoClassif->xtalk_wscore > 0.8 ) || ( itd > thr && hItd->prev_itd1 < -thr && hStereoClassif->silence_flag == 0 ) || ( hItd->prev_itd1 > thr && itd < -thr && hStereoClassif->silence_flag == 0 ) ) &&
1014 2596 : hCPE->hCoreCoder[0]->vad_flag == 1 && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->hNoiseEst->aEn_inac_cnt > 15 )
1015 : {
1016 0 : hStereoClassif->xtalk_decision = 1;
1017 : #ifdef DEBUG_MODE_TD
1018 : printf( "\nSwitch DFT-stereo -> TD-LR on frame %d\n", frame );
1019 : #endif
1020 : }
1021 54244 : else if ( hCPE->element_brate >= IVAS_16k4 && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f )
1022 : {
1023 69 : hStereoClassif->xtalk_decision = 1;
1024 : }
1025 :
1026 : #ifdef DEBUG_MODE_TD
1027 : dbgwrite( &hStereoClassif->xtalk_wscore, sizeof( float ), 1, 1, "res/xtalk_wscore.x" );
1028 : dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/xtalk_dec.x" );
1029 : dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, 1, "res/xtalk_dec_hyst.x" );
1030 : #endif
1031 :
1032 : /* updates */
1033 128227 : hItd->prev_m1 = m1;
1034 128227 : hItd->prev_m2 = m2;
1035 128227 : hItd->prev_itd1 = itd;
1036 128227 : hItd->prev_itd2 = itd2;
1037 :
1038 128227 : return;
1039 : }
1040 :
1041 :
1042 : /*-------------------------------------------------------------------*
1043 : * Function rc_filter()
1044 : *
1045 : *
1046 : *-------------------------------------------------------------------*/
1047 :
1048 114320 : static void rc_filter(
1049 : const float x,
1050 : float *y,
1051 : const int16_t order,
1052 : const float tau )
1053 : {
1054 : int16_t i;
1055 :
1056 114320 : y[0] = tau * y[0] + ( 1 - tau ) * x;
1057 2286400 : for ( i = 1; i < order; i++ )
1058 : {
1059 2172080 : y[i] = tau * y[i] + ( 1 - tau ) * y[i - 1];
1060 : }
1061 :
1062 114320 : return;
1063 : }
1064 :
1065 :
1066 : /*-------------------------------------------------------------------*
1067 : * Function edge_detect()
1068 : *
1069 : * Rising/falling edge detection algorithm
1070 : * Analyzes the input buffer and outputs strength and type of the detected edge (rising or falling)
1071 : * Set edge_type to 0/1/2 when calling this function to specify the edge type you want to detect. The returned value will be modified
1072 : * according to the edge type detected (-1 indicates that no edge has been detected)
1073 : *-------------------------------------------------------------------*/
1074 :
1075 978 : static void edge_detect(
1076 : const float *inp, /* i : input buffer */
1077 : const int16_t len, /* i : length of the input buffer */
1078 : const float inp_min, /* i : minimum value for edge detection */
1079 : const float inp_max, /* i : maximum value for edge detection */
1080 : float *edge_str, /* o : edge strength (from 0 to Inf) */
1081 : int16_t *edge_type /* i/o: edge type (to be) detected: 0 = falling, 1 = rising, 2 = both */
1082 : )
1083 : {
1084 : int16_t i, j, et;
1085 : float y, err, edge_slope, edge[EDGE_MAX_LEN];
1086 : float edge_min, err0;
1087 :
1088 978 : et = -1;
1089 978 : edge_min = 1e7f;
1090 :
1091 978 : if ( *edge_type == 0 || *edge_type == 2 )
1092 : {
1093 : /* falling edge detection */
1094 978 : set_f( edge, 0.0f, EDGE_MAX_LEN );
1095 :
1096 : /* set error at 0th index */
1097 978 : if ( inp[0] > inp_max )
1098 : {
1099 0 : err0 = 0.0f;
1100 : }
1101 : else
1102 : {
1103 : /* inhibits edge smearing effect */
1104 978 : err0 = powf( inp[0] - inp_max, 2 );
1105 : }
1106 :
1107 : /* test edges on intervals from 2 to len */
1108 4890 : for ( i = 1; i < len; i++ )
1109 : {
1110 3912 : edge_slope = ( inp_max - inp_min ) / i;
1111 3912 : edge[i] = err0;
1112 13692 : for ( j = 1; j <= i; j++ )
1113 : {
1114 9780 : y = inp_max - edge_slope * j;
1115 9780 : err = y - check_bounds( inp[j], inp_min, inp_max );
1116 9780 : edge[i] += err * err;
1117 : }
1118 :
1119 3912 : edge[i] /= i + 1;
1120 :
1121 3912 : if ( edge[i] < edge_min )
1122 : {
1123 2958 : edge_min = edge[i];
1124 2958 : et = 0;
1125 : }
1126 : }
1127 : }
1128 :
1129 978 : if ( *edge_type == 1 || *edge_type == 2 )
1130 : {
1131 : /* rising edge detection */
1132 0 : set_f( edge, 0.0f, EDGE_MAX_LEN );
1133 :
1134 : /* set error at 0th index */
1135 0 : if ( inp[0] < inp_min )
1136 : {
1137 0 : err0 = 0.0f;
1138 : }
1139 : else
1140 : {
1141 : /* inhibits edge smearing effect */
1142 0 : err0 = powf( inp[0] - inp_min, 2 );
1143 : }
1144 :
1145 : /* test edges on intervals from 2 to len */
1146 0 : for ( i = 1; i <= len; i++ )
1147 : {
1148 0 : edge_slope = ( inp_max - inp_min ) / i;
1149 0 : edge[i] = err0;
1150 0 : for ( j = 1; j < i; j++ )
1151 : {
1152 0 : y = inp_min + edge_slope * j;
1153 0 : err = y - check_bounds( inp[j], inp_min, inp_max );
1154 0 : edge[i] += err * err;
1155 : }
1156 :
1157 0 : edge[i] /= i + 1;
1158 :
1159 0 : if ( edge[i] < edge_min )
1160 : {
1161 0 : edge_min = edge[i];
1162 0 : et = 1;
1163 : }
1164 : }
1165 : }
1166 :
1167 978 : *edge_str = edge_min;
1168 978 : *edge_type = et;
1169 :
1170 978 : return;
1171 : }
1172 :
1173 :
1174 : /*-------------------------------------------------------------------*
1175 : * Function redge_detect()
1176 : *
1177 : * Rising edge detection algorithm
1178 : * Analyzes the input buffer and outputs value close to 1 when it detects strong rising edge
1179 : *-------------------------------------------------------------------*/
1180 :
1181 : /*! r: rising edge strength normalized to 0-1 */
1182 27324 : static float redge_detect(
1183 : const float *inp, /* i : input buffer (ordered from newest to oldest values)*/
1184 : const int16_t len, /* i : length of the input buffer */
1185 : const float inp_min, /* i : minimum value for edge detection */
1186 : const float inp_max /* i : maximum value for edge detection */
1187 : )
1188 : {
1189 : int16_t i, j;
1190 : float y, err, edge_slope, edge[REDGE_MAX_LEN];
1191 : float edge_min, err0;
1192 :
1193 27324 : set_f( edge, 0.0f, REDGE_MAX_LEN );
1194 27324 : edge_min = 1e7f;
1195 :
1196 : /* test rising edges on intervals from 2 to len */
1197 27324 : if ( inp[0] > inp_max )
1198 : {
1199 0 : err0 = 0.0f;
1200 : }
1201 : else
1202 : {
1203 27324 : err0 = powf( inp[0] - inp_max, 2 );
1204 : }
1205 136620 : for ( i = 1; i < len; i++ )
1206 : {
1207 109296 : edge_slope = ( inp_max - inp_min ) / i;
1208 109296 : edge[i] = err0;
1209 273240 : for ( j = 1; j < i; j++ )
1210 : {
1211 163944 : y = inp_max - edge_slope * j;
1212 163944 : if ( inp[j] == inp[j - 1] && inp[j] == inp_max )
1213 : {
1214 : /* we are saturated at inp_max */
1215 857 : err = 0.0f;
1216 : }
1217 163087 : else if ( inp[j] < inp_min )
1218 : {
1219 : /* we are below inp_min */
1220 11563 : err = y - inp_min;
1221 : }
1222 : else
1223 : {
1224 151524 : err = y - inp[j];
1225 : }
1226 163944 : err = err * err;
1227 163944 : edge[i] += err;
1228 : }
1229 :
1230 109296 : edge[i] /= i + 1;
1231 :
1232 109296 : if ( edge[i] < edge_min )
1233 : {
1234 100391 : edge_min = edge[i];
1235 : }
1236 : }
1237 :
1238 27324 : return edge_min;
1239 : }
|