LCOV - code coverage report
Current view: top level - lib_enc - ivas_stereo_classifier.c (source / functions) Hit Total Coverage
Test: Coverage on main -- long test vectors @ 9b04ec3cb36f5e8dc438cf854fa3e349998fa1e9 Lines: 402 419 95.9 %
Date: 2025-10-31 05:43:07 Functions: 10 10 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : #include <stdint.h>
      34             : #include <math.h>
      35             : #include "options.h"
      36             : #include "cnst.h"
      37             : #include "rom_com.h"
      38             : #include "prot.h"
      39             : #include "ivas_prot.h"
      40             : #include "ivas_rom_com.h"
      41             : #include "ivas_rom_enc.h"
      42             : #include "ivas_cnst.h"
      43             : #ifdef DEBUGGING
      44             : #include "debug.h"
      45             : #endif
      46             : #include "wmc_auto.h"
      47             : 
      48             : 
      49             : /*-------------------------------------------------------------------*
      50             :  * Local constants
      51             :  *-------------------------------------------------------------------*/
      52             : 
      53             : #define RC_FACT_UP            0.3f
      54             : #define RC_FACT_DOWN          0.7f
      55             : #define UNCLR_SCORE_THR       4.0f
      56             : #define XTALK_SCORE_THR_DFT   4.0f
      57             : #define XTALK_SCORE_THR_TD_UP 3.0f
      58             : #define XTALK_SCORE_THR_TD_DN 4.0f
      59             : 
      60             : #define UNCLR_INTERCEPT_TD  0.780313f
      61             : #define UNCLR_INTERCEPT_DFT 1.226513f
      62             : #define XTALK_INTERCEPT_TD  -1.770983f
      63             : #define XTALK_INTERCEPT_DFT -0.758556f
      64             : 
      65             : #define EDGE_MAX_LEN  30 /* maximum length of buffer for edge detection */
      66             : #define REDGE_MAX_LEN 30 /* maximum length of buffer for rising edge detection */
      67             : 
      68             : #define CLASSIFIER_ITD_THRES 8 /* ITD threshold in samples that enables classifier to switch */
      69             : 
      70             : 
      71             : /*-------------------------------------------------------------------*
      72             :  * Local function prototypes
      73             :  *-------------------------------------------------------------------*/
      74             : 
      75             : static void rc_filter( const float x, float *y, const int16_t order, const float tau );
      76             : 
      77             : static void edge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max, float *edge_str, int16_t *edge_type );
      78             : 
      79             : static float redge_detect( const float *inp, const int16_t len, const float inp_min, const float inp_max );
      80             : 
      81             : 
      82             : /*-------------------------------------------------------------------*
      83             :  * Function select_stereo_mode()
      84             :  *
      85             :  * Select stereo technology based on output of stereo classifiers
      86             :  *-------------------------------------------------------------------*/
      87             : 
      88             : /*! r: element mode */
      89     2059089 : int16_t select_stereo_mode(
      90             :     CPE_ENC_HANDLE hCPE,          /* i/o: CPE encoder structure       */
      91             :     const IVAS_FORMAT ivas_format /* i  : IVAS format                 */
      92             : )
      93             : {
      94             :     int16_t element_mode;
      95             :     STEREO_CLASSIF_HANDLE hStereoClassif;
      96             :     int16_t is_speech;
      97             :     int16_t stereo_switching_flag;
      98             : 
      99             :     /* initialization */
     100     2059089 :     element_mode = hCPE->element_mode;
     101     2059089 :     hStereoClassif = hCPE->hStereoClassif;
     102             : 
     103             :     /* set binary flag to prevent LRTD mode on music */
     104     2059089 :     hStereoClassif->is_speech = 0.97f * hStereoClassif->is_speech + 0.03f * hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0];
     105     2059089 :     is_speech = ( hStereoClassif->is_speech < 1.0f && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk < 0.0f );
     106             : 
     107             :     /* set binary flag indicating LRTD mode based on unclr/xtalk classifiers' decisions */
     108     2059089 :     hStereoClassif->prev_lrtd_mode = hStereoClassif->lrtd_mode;
     109     2059089 :     hStereoClassif->unclr_decision = ( hStereoClassif->unclr_decision && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && hCPE->element_brate > IVAS_16k4 );
     110     2059089 :     hStereoClassif->lrtd_mode = ( ( hStereoClassif->unclr_decision | hStereoClassif->xtalk_decision ) && is_speech );
     111             : 
     112     2059089 :     stereo_switching_flag = 1;
     113             : 
     114     2059089 :     if ( hCPE->element_brate >= MIN_BRATE_MDCT_STEREO || ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && hCPE->element_brate < MASA_STEREO_MIN_BITRATE )
     115             : #ifdef DEBUGGING
     116             :          || ( hCPE->stereo_mode_cmdl == IVAS_CPE_DFT || hCPE->stereo_mode_cmdl == IVAS_CPE_TD )
     117             : #endif
     118             :     )
     119             :     {
     120     1427092 :         stereo_switching_flag = 0;
     121             :     }
     122             : 
     123     2059089 :     if ( hCPE->element_brate >= MIN_BRATE_MDCT_STEREO )
     124             :     {
     125     1306850 :         hStereoClassif->prev_lrtd_mode = 0;
     126     1306850 :         hStereoClassif->lrtd_mode = 0;
     127     1306850 :         element_mode = IVAS_CPE_MDCT;
     128             :     }
     129      752239 :     else if ( hCPE->element_brate < MIN_BRATE_MDCT_STEREO && hCPE->last_element_mode == IVAS_CPE_MDCT )
     130             :     {
     131       17195 :         hStereoClassif->lrtd_mode = 0;
     132       17195 :         element_mode = IVAS_CPE_DFT;
     133             : 
     134       17195 :         if ( stereo_switching_flag == 1 && hCPE->element_brate > IVAS_13k2 && hCPE->hCoreCoder[0]->hSpMusClas->past_dlp[0] < 1.0f && hCPE->hCoreCoder[0]->hSpMusClas->wdlp_xtalk < -0.01f && hCPE->hCoreCoder[0]->vad_flag == 1 && ( hCPE->hStereoMdct->sw_uncorr || hStereoClassif->xtalk_decision ) )
     135             :         {
     136         193 :             hStereoClassif->lrtd_mode = 1;
     137         193 :             element_mode = IVAS_CPE_TD;
     138             :         }
     139             :     }
     140             : 
     141             :     /* set the element mode */
     142     2059089 :     if ( hStereoClassif->lrtd_mode == 1 && stereo_switching_flag == 1 )
     143             :     {
     144       24408 :         element_mode = IVAS_CPE_TD;
     145             :     }
     146     2034681 :     else if ( element_mode < IVAS_CPE_MDCT )
     147             :     {
     148      727831 :         if ( stereo_switching_flag == 0 )
     149             :         {
     150      120242 :             if ( ( ivas_format == MASA_FORMAT || ivas_format == MASA_ISM_FORMAT ) && hCPE->element_brate < MASA_STEREO_MIN_BITRATE )
     151             :             {
     152      120242 :                 element_mode = IVAS_CPE_DFT;
     153             :             }
     154             : #ifdef DEBUGGING
     155             :             if ( hCPE->stereo_mode_cmdl > 1 )
     156             :             {
     157             :                 element_mode = hCPE->stereo_mode_cmdl;
     158             :             }
     159             : #endif
     160             :         }
     161      607589 :         else if ( element_mode == IVAS_CPE_TD )
     162             :         {
     163         463 :             if ( hCPE->hStereoTD->prev_fr_LRTD_TD_dec > 0 && is_speech )
     164             :             {
     165             :                 /* if unclr_decision goes from 1->0 on active content, continue in LRTD mode */
     166         309 :                 hStereoClassif->lrtd_mode = 1;
     167             :             }
     168         154 :             else if ( stereo_switching_flag == 1 )
     169             :             {
     170         154 :                 element_mode = IVAS_CPE_DFT;
     171             :             }
     172             :         }
     173      607126 :         else if ( stereo_switching_flag == 1 )
     174             :         {
     175      607126 :             element_mode = IVAS_CPE_DFT;
     176             :         }
     177             :     }
     178             : 
     179             :     /* switch from LRTD to DFT when xtalk_decision goes from 0->1 (note: this special case is not handled in the xtalk classifier) */
     180     2059089 :     if ( hCPE->last_element_mode == IVAS_CPE_TD && element_mode == IVAS_CPE_TD && hStereoClassif->xtalk_decision == 1 )
     181             :     {
     182        7564 :         if ( hCPE->hStereoTD->prev_fr_LRTD_TD_dec == 0 && hCPE->hStereoTD->tdm_FD2LRTD_SW_cnt > 15 && hCPE->hStereoTD->tdm_last_LRTD_frame_cnt > 3 && hCPE->hCoreCoder[0]->clas < VOICED_CLAS && ( hCPE->element_brate >= IVAS_16k4 || hStereoClassif->xtalk_wscore < 0.01f ) )
     183             :         {
     184          87 :             if ( stereo_switching_flag == 1 )
     185             :             {
     186          87 :                 element_mode = IVAS_CPE_DFT;
     187             :             }
     188          87 :             hStereoClassif->xtalk_decision = 0;
     189          87 :             hStereoClassif->lrtd_mode = 0;
     190             :         }
     191             :     }
     192             : 
     193     2059089 :     if ( hCPE->last_element_mode != element_mode )
     194             :     {
     195       35133 :         if ( hCPE->last_element_mode != IVAS_CPE_DFT && hCPE->last_element_mode != IVAS_CPE_TD )
     196       17195 :         {
     197       17195 :             int16_t lrtd_mode = hStereoClassif->lrtd_mode;
     198             : 
     199             :             /* reset stereo classifier when switching from MDCT stereo to Unified stereo */
     200       17195 :             stereo_classifier_init( hCPE->hStereoClassif );
     201             : 
     202       17195 :             hStereoClassif->lrtd_mode = lrtd_mode;
     203             :         }
     204             :         else
     205             :         {
     206             :             /* reset UNCLR classifier parameters */
     207       17938 :             set_f( hStereoClassif->unclr_fv, -1.0f, SSC_MAX_NFEA );
     208       17938 :             hStereoClassif->unclr_corrLagMax_prev = 0;
     209             : 
     210             :             /* reset xtalk classifier parameters */
     211       17938 :             set_f( hStereoClassif->xtalk_fv, -1.0f, SSC_MAX_NFEA );
     212             :         }
     213             :     }
     214             : #ifdef DEBUG_MODE_TD
     215             :     dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, L_FRAME16k, "res/unclr_decision.enc" );
     216             :     dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, L_FRAME16k, "res/xtalk_decision.enc" );
     217             :     dbgwrite( &hCPE->hCoreCoder[0]->sp_aud_decision0, sizeof( int16_t ), 1, L_FRAME16k, "res/sp_aud_class.enc" );
     218             :     dbgwrite( &hCPE->hCoreCoder[0]->last_core, sizeof( int16_t ), 1, L_FRAME16k, "res/last_core.enc" );
     219             : #endif
     220             : 
     221     2059089 :     if ( element_mode == IVAS_CPE_TD && hCPE->hCoreCoder[0]->Opt_DTX_ON )
     222             :     {
     223        6500 :         hCPE->hStereoCng->td_active = 1;
     224        6500 :         hCPE->hStereoCng->first_SID_after_TD = 1;
     225             :     }
     226             : 
     227     2059089 :     return ( element_mode );
     228             : }
     229             : 
     230             : 
     231             : /*-------------------------------------------------------------------*
     232             :  * Function stereo_classifier_init()
     233             :  *
     234             :  * Initialize stereo classifier handle
     235             :  *-------------------------------------------------------------------*/
     236             : 
     237       47462 : void stereo_classifier_init(
     238             :     STEREO_CLASSIF_HANDLE hStereoClassif /* i/o: stereo classifier structure */
     239             : )
     240             : {
     241             :     /* initialization of features for xtalk classifier and UNCLR classifier */
     242       47462 :     hStereoClassif->clas_ch1 = 0;
     243       47462 :     set_s( hStereoClassif->pitch_ch1, 0, 3 );
     244       47462 :     set_f( hStereoClassif->voicing_ch1, 0.0f, 3 );
     245       47462 :     hStereoClassif->cor_map_sum_ch1 = 0.0f;
     246       47462 :     set_f( hStereoClassif->lsf_ch1, 0.0f, M );
     247       47462 :     hStereoClassif->lepsP_ch1 = 0.0f;
     248       47462 :     hStereoClassif->dE1_ch1 = 0.0f;
     249       47462 :     hStereoClassif->dE1_ch2 = 0.0f;
     250       47462 :     hStereoClassif->nchar_ch1 = 0.0f;
     251       47462 :     hStereoClassif->nchar_ch2 = 0.0f;
     252       47462 :     hStereoClassif->non_sta_ch1 = 0.0f;
     253       47462 :     hStereoClassif->sp_div_ch1 = 0.0f;
     254       47462 :     hStereoClassif->ps_diff_ch1 = 0.0f;
     255       47462 :     hStereoClassif->ps_diff_ch2 = 0.0f;
     256       47462 :     hStereoClassif->ps_sta_ch1 = 0.0f;
     257       47462 :     hStereoClassif->ps_sta_ch2 = 0.0f;
     258       47462 :     hStereoClassif->prev_g_IPD = 0.5f;
     259       47462 :     hStereoClassif->prev_IPD = 0.0f;
     260       47462 :     hStereoClassif->prev_ratio_m1_m2 = 0.0f;
     261       47462 :     set_f( hStereoClassif->xtalk_score_buf, 0.0f, XTALK_SCORE_BUF_LEN );
     262       47462 :     hStereoClassif->ratio_L = 0.5f;
     263       47462 :     hStereoClassif->vad_flag_glob = 0;
     264       47462 :     hStereoClassif->vad_relE = 0;
     265       47462 :     hStereoClassif->is_speech = 0.0f;
     266             : 
     267       47462 :     set_s( hStereoClassif->aEn_raw, 0, CPE_CHANNELS );
     268             : 
     269       47462 :     hStereoClassif->Etot_dn = 0.0f;
     270       47462 :     hStereoClassif->Etot_up = 0.0f;
     271             : 
     272       47462 :     set_f( hStereoClassif->relE_buf, 0.0f, UNCLR_L_RELE );
     273       47462 :     set_f( hStereoClassif->Etot_buf, 0.0f, UNCLR_L_ETOT );
     274       47462 :     set_f( hStereoClassif->unclr_relE_0_1_LT, 0.0f, UNCLR_RC_ORDER );
     275             : 
     276       47462 :     hStereoClassif->unclr_sw_enable_cnt[0] = 0;
     277       47462 :     hStereoClassif->unclr_sw_enable_cnt[1] = 0;
     278             : 
     279       47462 :     hStereoClassif->unclr_decision = 0;
     280       47462 :     hStereoClassif->unclr_wscore = 0.0f;
     281             : 
     282       47462 :     set_f( hStereoClassif->unclr_fv, -1.0f, SSC_MAX_NFEA );
     283       47462 :     hStereoClassif->unclr_corrLagMax_prev = 0;
     284       47462 :     hStereoClassif->ave_ener_L = 0;
     285       47462 :     hStereoClassif->ave_ener_R = 0;
     286       47462 :     hStereoClassif->relE_0_1 = 0.01f;
     287       47462 :     hStereoClassif->relE_0_1_LT = 0.01f;
     288             : 
     289       47462 :     set_f( hStereoClassif->xtalk_fv, -1.0f, SSC_MAX_NFEA );
     290       47462 :     hStereoClassif->xtalk_wscore = 0.0f;
     291       47462 :     hStereoClassif->xtalk_decision = 0;
     292       47462 :     hStereoClassif->xtalk_score_wrelE = 0.0f;
     293             : 
     294       47462 :     hStereoClassif->lrtd_mode = 0;
     295       47462 :     hStereoClassif->prev_lrtd_mode = 0;
     296             : 
     297       47462 :     hStereoClassif->silence_flag = 0;
     298             : 
     299       47462 :     return;
     300             : }
     301             : 
     302             : 
     303             : /*-----------------------------------------------------------------*
     304             :  * stereo_classifier_features()
     305             :  *
     306             :  * Collect features for stereo classifiers
     307             :  *-----------------------------------------------------------------*/
     308             : 
     309    10785106 : void stereo_classifier_features(
     310             :     STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure                     */
     311             :     const int16_t idchan,                 /* i  : channel ID                                      */
     312             :     const int16_t element_mode,           /* i  : element mode                                    */
     313             :     const int16_t vad_flag,               /* i  : VAD flag                                        */
     314             :     const float lsf_new[],                /* i  : LSFs at the end of the frame                    */
     315             :     const float epsP[],                   /* i  : LP analysis residual energies for each iteration*/
     316             :     const int16_t pitch[],                /* i  : open-loop pitch values for quantiz.             */
     317             :     const float voicing[],                /* i  : OL maximum normalized correlation               */
     318             :     const float cor_map_sum,              /* i  : speech/music clasif. parameter                  */
     319             :     const float non_staX,                 /* i  : unbound non-stationarity for sp/mu clas.        */
     320             :     const float sp_div,                   /* i  : spectral diversity feature                      */
     321             :     const int16_t clas                    /* i  : signal class                                    */
     322             : )
     323             : {
     324             :     int16_t i, clas_ch2;
     325             :     float lepsP_ch2, ener_l, ener_r;
     326             : 
     327             :     /* combine VAD flags from both channels */
     328    10785106 :     if ( idchan == 0 )
     329             :     {
     330     5756464 :         hStereoClassif->vad_flag_glob = vad_flag;
     331             :     }
     332             :     else
     333             :     {
     334     5028642 :         hStereoClassif->vad_flag_glob |= vad_flag;
     335             :     }
     336             : 
     337    10785106 :     if ( ( element_mode == IVAS_CPE_DFT && idchan == 0 ) || ( element_mode == IVAS_CPE_TD && idchan == 1 ) )
     338             :     {
     339             :         /* update Etot_up and Etot_dn based on aEn */
     340      752336 :         ener_l = 10.0f * log10f( hStereoClassif->ave_ener_L + 1.0f );
     341      752336 :         ener_r = 10.0f * log10f( hStereoClassif->ave_ener_R + 1.0f );
     342      752336 :         mvr2r( &hStereoClassif->Etot_buf[0], &hStereoClassif->Etot_buf[1], UNCLR_L_ETOT - 1 );
     343      752336 :         hStereoClassif->Etot_buf[0] = max( 0, max( ener_l, ener_r ) );
     344             : 
     345      752336 :         if ( hStereoClassif->aEn_raw[0] == 6 || ( element_mode == IVAS_CPE_TD && hStereoClassif->aEn_raw[1] == 6 ) )
     346             :         {
     347             :             /* active signal, update upper bound */
     348      538464 :             if ( hStereoClassif->Etot_buf[0] < hStereoClassif->Etot_up )
     349             :             {
     350             :                 /* energy decreases -> slower update */
     351      387338 :                 hStereoClassif->Etot_up = 0.99f * hStereoClassif->Etot_up + 0.01f * hStereoClassif->Etot_buf[0];
     352             :             }
     353             :             else
     354             :             {
     355             :                 /* energy increases -> faster update */
     356      151126 :                 hStereoClassif->Etot_up = 0.95f * hStereoClassif->Etot_up + 0.05f * hStereoClassif->Etot_buf[0];
     357             :             }
     358             :         }
     359      213872 :         else if ( ( element_mode == IVAS_CPE_DFT && hStereoClassif->aEn_raw[0] == 0 ) ||
     360        2095 :                   ( element_mode == IVAS_CPE_TD && hStereoClassif->aEn_raw[0] == 0 && hStereoClassif->aEn_raw[1] == 0 ) )
     361             :         {
     362             :             /* inactive signal, update lower bound */
     363      112424 :             if ( hStereoClassif->Etot_buf[0] < hStereoClassif->Etot_dn )
     364             :             {
     365             :                 /* energy decreases -> faster update */
     366       44083 :                 hStereoClassif->Etot_dn = 0.9f * hStereoClassif->Etot_dn + 0.1f * hStereoClassif->Etot_buf[0];
     367             :             }
     368             :             else
     369             :             {
     370             :                 /* energy increases -> slower update */
     371       68341 :                 hStereoClassif->Etot_dn = 0.95f * hStereoClassif->Etot_dn + 0.05f * hStereoClassif->Etot_buf[0];
     372             :             }
     373             : 
     374      112424 :             if ( hStereoClassif->Etot_dn < 30.0f )
     375             :             {
     376             :                 /* do not decrease below lower bound threshold */
     377       20651 :                 hStereoClassif->Etot_dn = 30.0f;
     378             :             }
     379             :         }
     380             : 
     381             :         /* upper bound is too low and close to lower bound -> update it */
     382      752336 :         if ( hStereoClassif->Etot_up < hStereoClassif->Etot_dn + 20.0f )
     383             :         {
     384      168683 :             hStereoClassif->Etot_up = hStereoClassif->Etot_dn + 20.0f;
     385             :         }
     386             : 
     387             :         /* normalize Etot to (0,1) */
     388      752336 :         hStereoClassif->relE_0_1 = lin_interp( hStereoClassif->Etot_buf[0], hStereoClassif->Etot_dn, 0.0f, hStereoClassif->Etot_up, 0.9f, 1 );
     389             : 
     390             :         /* update relE_buf */
     391      752336 :         mvr2r( &hStereoClassif->relE_buf[0], &hStereoClassif->relE_buf[1], UNCLR_L_RELE - 1 );
     392      752336 :         hStereoClassif->relE_buf[0] = hStereoClassif->relE_0_1;
     393             : 
     394      752336 :         if ( hStereoClassif->relE_0_1 >= hStereoClassif->relE_buf[1] )
     395             :         {
     396      483298 :             hStereoClassif->relE_0_1_LT = 0.9f * hStereoClassif->relE_0_1_LT + 0.1f * hStereoClassif->relE_0_1;
     397             :         }
     398             :         else
     399             :         {
     400      269038 :             hStereoClassif->relE_0_1_LT = 0.95f * hStereoClassif->relE_0_1_LT + 0.05f * hStereoClassif->relE_0_1;
     401             :         }
     402             : 
     403             :         /* estimate VAD flag based on relative energy */
     404      752336 :         if ( hStereoClassif->relE_0_1_LT < 0.1f )
     405             :         {
     406      145897 :             hStereoClassif->vad_relE = 0;
     407             :         }
     408             :         else
     409             :         {
     410      606439 :             hStereoClassif->vad_relE = 1;
     411             :         }
     412             : 
     413             :         /* combine classical VAD flag with VAD flag based on relative energy */
     414      752336 :         hStereoClassif->vad_flag_glob &= hStereoClassif->vad_relE;
     415             :     }
     416             : 
     417    10785106 :     if ( idchan == 0 )
     418             :     {
     419     5756464 :         mvr2r( lsf_new, hStereoClassif->lsf_ch1, M );
     420     5756464 :         hStereoClassif->lepsP_ch1 = logf( epsP[13] + 1e-5f ) - logf( epsP[0] + 1e-5f );
     421             : 
     422     5756464 :         hStereoClassif->xtalk_fv[E_lsf_1] = lsf_new[0];
     423     5756464 :         hStereoClassif->xtalk_fv[E_lsf_4] = lsf_new[3];
     424     5756464 :         hStereoClassif->xtalk_fv[E_lsf_9] = lsf_new[8];
     425     5756464 :         hStereoClassif->xtalk_fv[E_lsf_14] = lsf_new[13];
     426     5756464 :         hStereoClassif->xtalk_fv[E_lepsP_13] = hStereoClassif->lepsP_ch1;
     427             :     }
     428             :     else
     429             :     {
     430     5028642 :         hStereoClassif->xtalk_fv[E_sum_d_LSF] = 0;
     431    85486914 :         for ( i = 0; i < M; i++ )
     432             :         {
     433    80458272 :             hStereoClassif->xtalk_fv[E_sum_d_LSF] += fabsf( lsf_new[i] - hStereoClassif->lsf_ch1[i] );
     434             :         }
     435             : 
     436     5028642 :         if ( hStereoClassif->vad_flag_glob && ( hStereoClassif->ratio_L > 0.0f && hStereoClassif->ratio_L < 1.0f ) )
     437             :         {
     438     4644985 :             hStereoClassif->xtalk_fv[E_sum_d_LSF] = 0.0f;
     439             :         }
     440     5028642 :         lepsP_ch2 = logf( epsP[13] + 1e-5f ) - logf( epsP[0] + 1e-5f );
     441     5028642 :         hStereoClassif->xtalk_fv[E_d_lepsP_13] = fabsf( hStereoClassif->lepsP_ch1 - lepsP_ch2 );
     442             :     }
     443             : 
     444    10785106 :     if ( idchan == 0 )
     445             :     {
     446     5756464 :         mvs2s( pitch, hStereoClassif->pitch_ch1, 3 );
     447     5756464 :         mvr2r( voicing, hStereoClassif->voicing_ch1, 3 );
     448             : 
     449     5756464 :         hStereoClassif->xtalk_fv[E_pitch] = 1 / 3.0f * ( pitch[0] + pitch[1] + pitch[2] );
     450     5756464 :         hStereoClassif->xtalk_fv[E_voicing] = 1 / 3.0f * ( voicing[0] + voicing[1] + voicing[2] );
     451             :     }
     452             :     else
     453             :     {
     454     5028642 :         hStereoClassif->xtalk_fv[E_d_pitch] = 0;
     455     5028642 :         hStereoClassif->xtalk_fv[E_d_voicing] = 0;
     456    20114568 :         for ( i = 0; i < 3; i++ )
     457             :         {
     458    15085926 :             hStereoClassif->xtalk_fv[E_d_pitch] += (float) abs( pitch[i] - hStereoClassif->pitch_ch1[i] );
     459    15085926 :             hStereoClassif->xtalk_fv[E_d_voicing] += fabsf( voicing[i] - hStereoClassif->voicing_ch1[i] );
     460             :         }
     461     5028642 :         hStereoClassif->xtalk_fv[E_d_pitch] /= 3.0f;
     462     5028642 :         hStereoClassif->xtalk_fv[E_d_voicing] /= 3.0f;
     463             :     }
     464             : 
     465    10785106 :     if ( idchan == 0 )
     466             :     {
     467     5756464 :         hStereoClassif->cor_map_sum_ch1 = cor_map_sum;
     468     5756464 :         hStereoClassif->non_sta_ch1 = non_staX;
     469     5756464 :         hStereoClassif->sp_div_ch1 = sp_div;
     470             : 
     471     5756464 :         hStereoClassif->xtalk_fv[E_cor_map_sum] = cor_map_sum;
     472     5756464 :         hStereoClassif->xtalk_fv[E_nchar] = logf( hStereoClassif->nchar_ch1 + 1.0f );
     473     5756464 :         hStereoClassif->xtalk_fv[E_non_sta] = non_staX;
     474     5756464 :         hStereoClassif->xtalk_fv[E_sp_div] = logf( sp_div + 1.0f );
     475             :     }
     476             :     else
     477             :     {
     478     5028642 :         hStereoClassif->xtalk_fv[E_d_cor_map_sum] = fabsf( hStereoClassif->cor_map_sum_ch1 - cor_map_sum );
     479     5028642 :         hStereoClassif->xtalk_fv[E_d_nchar] = fabsf( logf( hStereoClassif->nchar_ch1 + 1.0f ) - logf( hStereoClassif->nchar_ch2 + 1.0f ) );
     480     5028642 :         hStereoClassif->xtalk_fv[E_d_non_sta] = fabsf( hStereoClassif->non_sta_ch1 - non_staX );
     481     5028642 :         hStereoClassif->xtalk_fv[E_d_sp_div] = fabsf( logf( hStereoClassif->sp_div_ch1 + 1.0f ) - logf( sp_div + 1.0f ) );
     482             :     }
     483             : 
     484    10785106 :     if ( idchan == 0 )
     485             :     {
     486     5756464 :         hStereoClassif->xtalk_fv[E_dE1] = logf( hStereoClassif->dE1_ch1 + 1.0f );
     487             :     }
     488             :     else
     489             :     {
     490     5028642 :         hStereoClassif->xtalk_fv[E_d_dE1] = fabsf( logf( hStereoClassif->dE1_ch1 + 1.0f ) - logf( hStereoClassif->dE1_ch2 + 1.0f ) );
     491             :     }
     492             : 
     493    10785106 :     if ( idchan == 0 )
     494             :     {
     495     5756464 :         if ( clas > VOICED_CLAS )
     496             :         {
     497      221606 :             hStereoClassif->clas_ch1 = VOICED_CLAS;
     498             :         }
     499     5534858 :         else if ( clas < VOICED_CLAS )
     500             :         {
     501     3916231 :             hStereoClassif->clas_ch1 = UNVOICED_CLAS;
     502             :         }
     503             :         else
     504             :         {
     505     1618627 :             hStereoClassif->clas_ch1 = clas;
     506             :         }
     507             : 
     508     5756464 :         hStereoClassif->xtalk_fv[E_clas] = hStereoClassif->clas_ch1;
     509             :     }
     510             :     else
     511             :     {
     512     5028642 :         if ( clas > VOICED_CLAS )
     513             :         {
     514      160439 :             clas_ch2 = VOICED_CLAS;
     515             :         }
     516     4868203 :         else if ( clas < VOICED_CLAS )
     517             :         {
     518     3774374 :             clas_ch2 = UNVOICED_CLAS;
     519             :         }
     520             :         else
     521             :         {
     522     1093829 :             clas_ch2 = clas;
     523             :         }
     524             : 
     525     5028642 :         hStereoClassif->xtalk_fv[E_d_clas] = (float) abs( hStereoClassif->clas_ch1 - clas_ch2 );
     526             :     }
     527             : 
     528    10785106 :     if ( idchan == 0 )
     529             :     {
     530     5756464 :         hStereoClassif->xtalk_fv[E_ps_diff] = hStereoClassif->ps_diff_ch1;
     531     5756464 :         hStereoClassif->xtalk_fv[E_ps_sta] = hStereoClassif->ps_sta_ch1;
     532             :     }
     533             :     else
     534             :     {
     535     5028642 :         hStereoClassif->xtalk_fv[E_d_ps_diff] = fabsf( hStereoClassif->ps_diff_ch1 - hStereoClassif->ps_diff_ch2 );
     536     5028642 :         hStereoClassif->xtalk_fv[E_d_ps_sta] = fabsf( hStereoClassif->ps_sta_ch1 - hStereoClassif->ps_sta_ch2 );
     537             :     }
     538             : 
     539    10785106 :     return;
     540             : }
     541             : 
     542             : 
     543             : /*-------------------------------------------------------------------*
     544             :  * Function unclr_classifier_td()
     545             :  *
     546             :  * Classify current TD frame as uncorrelated L/R (1) or normal (0)
     547             :  *-------------------------------------------------------------------*/
     548             : 
     549       24514 : void unclr_classifier_td(
     550             :     CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure       */
     551             : )
     552             : {
     553             :     int16_t i, ind;
     554             :     float relE_ST, edge, edge_0_1;
     555             :     float score, fvn[SSC_MAX_NFEA];
     556             : #ifdef DEBUG_MODE_TD
     557             :     int16_t dec;
     558             : #endif
     559       24514 :     STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
     560             : 
     561       24514 :     set_f( fvn, -1.0f, SSC_MAX_NFEA );
     562             : 
     563             :     /* calcualte raw score based on LR */
     564       24514 :     score = UNCLR_INTERCEPT_TD;
     565      269654 :     for ( i = 0; i < SIZE_UNCLR_ISEL_TD; i++ )
     566             :     {
     567      245140 :         ind = unclr_isel_td[i];
     568             : 
     569             :         /* mean & std removal */
     570      245140 :         fvn[i] = ( hStereoClassif->unclr_fv[ind] - unclr_mean_td[i] ) / unclr_scale_td[i];
     571             : 
     572             :         /* LR */
     573      245140 :         score += fvn[i] * unclr_coef_td[i];
     574             :     }
     575             : 
     576             : #ifdef DEBUG_MODE_TD
     577             :     /* raw decision */
     578             :     dec = score > 0;
     579             : #endif
     580             : 
     581             :     /* normalize score to -1:+1 */
     582       24514 :     if ( score > UNCLR_SCORE_THR )
     583             :     {
     584        9655 :         score = UNCLR_SCORE_THR;
     585             :     }
     586       14859 :     else if ( score < -UNCLR_SCORE_THR )
     587             :     {
     588        2288 :         score = -UNCLR_SCORE_THR;
     589             :     }
     590       24514 :     score /= 2 * UNCLR_SCORE_THR;
     591             : 
     592             :     /* weight raw score with relative energy */
     593       24514 :     score *= hStereoClassif->relE_0_1;
     594             : 
     595             :     /* rising edge detection on relE */
     596       24514 :     relE_ST = mean( hStereoClassif->relE_buf, UNCLR_L_RELE );
     597       24514 :     if ( hStereoClassif->relE_0_1 > relE_ST )
     598             :     {
     599       11079 :         rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_UP );
     600             :     }
     601             :     else
     602             :     {
     603       13435 :         rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_DOWN );
     604             :     }
     605             : 
     606       24514 :     edge = hStereoClassif->relE_0_1 - hStereoClassif->unclr_relE_0_1_LT[UNCLR_RC_ORDER - 1];
     607       24514 :     edge_0_1 = lin_interp( edge, 0.0f, 0.95f, 1.0f, 0.9f, 1 );
     608             : 
     609             :     /* LT average */
     610       24514 :     hStereoClassif->unclr_wscore = edge_0_1 * hStereoClassif->unclr_wscore + ( 1 - edge_0_1 ) * score;
     611             : 
     612             :     /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
     613       24514 :     if ( ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->unclr_wscore > 0.1f ) || ( hStereoClassif->unclr_decision == 1 && hStereoClassif->unclr_wscore < -0.07f ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) )
     614             :     {
     615             :         /* let's switch the binary decision */
     616         115 :         hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision;
     617             :     }
     618             : 
     619             : #ifdef DEBUG_MODE_TD
     620             :     dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/unclr_dec.x" );
     621             :     dbgwrite( &hStereoClassif->unclr_wscore, sizeof( float ), 1, 1, "res/unclr_wscore.x" );
     622             :     dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, 1, "res/unclr_dec_hyst.x" );
     623             : #endif
     624             : 
     625       24514 :     return;
     626             : }
     627             : 
     628             : 
     629             : /*-------------------------------------------------------------------*
     630             :  * Function unclr_classifier_dft()
     631             :  *
     632             :  * Classifies current DFT frame as uncorrelated L/R (1) or normal stereo (0)
     633             :  *-------------------------------------------------------------------*/
     634             : 
     635      727822 : void unclr_classifier_dft(
     636             :     CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure       */
     637             : )
     638             : {
     639             :     int16_t i, ind;
     640             :     float edge, relE_ST, edge_0_1;
     641             :     float score, fvn[SSC_MAX_NFEA];
     642             : #ifdef DEBUG_MODE_TD
     643             :     int16_t dec;
     644             : #endif
     645             : 
     646      727822 :     STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
     647             : 
     648             :     /* calculate raw score based on LR */
     649      727822 :     score = UNCLR_INTERCEPT_DFT;
     650     6550398 :     for ( i = 0; i < SIZE_UNCLR_ISEL_DFT; i++ )
     651             :     {
     652     5822576 :         ind = unclr_isel_dft[i];
     653             : 
     654             :         /* mean & std removal */
     655     5822576 :         fvn[i] = ( hStereoClassif->unclr_fv[ind] - unclr_mean_dft[i] ) / unclr_scale_dft[i];
     656             : 
     657             :         /* LR */
     658     5822576 :         score += fvn[i] * unclr_coef_dft[i];
     659             :     }
     660             : 
     661             : #ifdef DEBUG_MODE_TD
     662             :     /* raw decision */
     663             :     dec = score > 0;
     664             : #endif
     665             : 
     666             :     /* normalize score to -1:+1 */
     667      727822 :     if ( score > UNCLR_SCORE_THR )
     668             :     {
     669       35963 :         score = UNCLR_SCORE_THR;
     670             :     }
     671      691859 :     else if ( score < -UNCLR_SCORE_THR )
     672             :     {
     673      455044 :         score = -UNCLR_SCORE_THR;
     674             :     }
     675      727822 :     score /= 2 * UNCLR_SCORE_THR;
     676             : 
     677             :     /* weight raw score with relative energy */
     678      727822 :     score *= hStereoClassif->relE_0_1;
     679             : 
     680      727822 :     if ( !hStereoClassif->vad_flag_glob )
     681             :     {
     682      184760 :         score = 0;
     683             :     }
     684             : 
     685             :     /* rising edge detector on relE */
     686      727822 :     relE_ST = mean( hStereoClassif->relE_buf, UNCLR_L_RELE );
     687      727822 :     if ( hStereoClassif->relE_0_1 > relE_ST )
     688             :     {
     689      336898 :         rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_UP );
     690             :     }
     691             :     else
     692             :     {
     693      390924 :         rc_filter( hStereoClassif->relE_0_1, hStereoClassif->unclr_relE_0_1_LT, UNCLR_RC_ORDER, RC_FACT_DOWN );
     694             :     }
     695             : 
     696      727822 :     edge = hStereoClassif->relE_0_1 - hStereoClassif->unclr_relE_0_1_LT[UNCLR_RC_ORDER - 1];
     697      727822 :     edge_0_1 = lin_interp( edge, 0.0f, 0.95f, 1.0f, 0.9f, 1 );
     698             : 
     699             :     /* LT average */
     700      727822 :     hStereoClassif->unclr_wscore = edge_0_1 * hStereoClassif->unclr_wscore + ( 1 - edge_0_1 ) * score;
     701             : 
     702             :     /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
     703      727822 :     if ( ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->unclr_wscore > 0.1f ) || ( hStereoClassif->unclr_decision == 1 && hStereoClassif->unclr_wscore < -0.07f ) ) && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 ) )
     704             :     {
     705             :         /* let's switch the binary decision */
     706       11662 :         hStereoClassif->unclr_decision = !hStereoClassif->unclr_decision;
     707             :     }
     708             : 
     709             : #ifdef DEBUG_MODE_TD
     710             :     dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/unclr_dec.x" );
     711             :     dbgwrite( &hStereoClassif->unclr_wscore, sizeof( float ), 1, 1, "res/unclr_wscore.x" );
     712             :     dbgwrite( &hStereoClassif->unclr_decision, sizeof( int16_t ), 1, 1, "res/unclr_dec_hyst.x" );
     713             : #endif
     714             : 
     715      727822 :     return;
     716             : }
     717             : 
     718             : 
     719             : /*-------------------------------------------------------------------*
     720             :  * Function xtalk_classifier_td()
     721             :  *
     722             :  * Classify current TD frame as cross-talk frame (1) or normal stereo frame (0)
     723             :  *-------------------------------------------------------------------*/
     724             : 
     725       24514 : void xtalk_classifier_td(
     726             :     CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure       */
     727             : )
     728             : {
     729             :     int16_t i, ind, edge_type;
     730             :     float score, fvn[SSC_MAX_NFEA];
     731             :     float edge, edge_0_1, wedge, scr_min, scr_max, wrelE;
     732             : 
     733             : #ifdef DEBUG_MODE_TD
     734             :     int16_t dec;
     735             : #endif
     736             : 
     737       24514 :     STEREO_CLASSIF_HANDLE hStereoClassif = hCPE->hStereoClassif;
     738             : 
     739       24514 :     set_f( fvn, -1.0f, SSC_MAX_NFEA );
     740             : 
     741             :     /* calcualte raw score based on LR */
     742       24514 :     score = XTALK_INTERCEPT_TD;
     743      441252 :     for ( i = 0; i < SIZE_XTALK_ISEL_TD; i++ )
     744             :     {
     745      416738 :         ind = xtalk_isel_td[i];
     746             : 
     747             :         /* mean & std removal */
     748      416738 :         fvn[i] = ( hStereoClassif->xtalk_fv[ind] - xtalk_mean_td[i] ) / xtalk_scale_td[i];
     749             : 
     750             :         /* LR */
     751      416738 :         score += fvn[i] * xtalk_coef_td[i];
     752             :     }
     753             : 
     754             :     /* normalize raw score to -1:+1 */
     755       24514 :     if ( score > XTALK_SCORE_THR_TD_UP )
     756             :     {
     757        2808 :         score = 1.0f;
     758             :     }
     759       21706 :     else if ( score < -XTALK_SCORE_THR_TD_DN )
     760             :     {
     761        3564 :         score = -1.0f;
     762             :     }
     763       18142 :     else if ( score > 0.0f )
     764             :     {
     765        8963 :         score /= XTALK_SCORE_THR_TD_UP;
     766             :     }
     767             :     else
     768             :     {
     769        9179 :         score /= XTALK_SCORE_THR_TD_DN;
     770             :     }
     771             : 
     772       24514 :     if ( hCPE->last_element_mode == IVAS_CPE_DFT )
     773             :     {
     774             :         /* overwrite score if we have just switched from DFT stereo */
     775         319 :         score = hStereoClassif->xtalk_score;
     776             :     }
     777             :     else
     778             :     {
     779       24195 :         hStereoClassif->xtalk_score = score;
     780             :     }
     781             : 
     782       24514 :     if ( !hStereoClassif->vad_flag_glob )
     783             :     {
     784             :         /* reset score to 0 in inactive segments */
     785        2358 :         score = 0;
     786             :     }
     787             : 
     788             : #ifdef DEBUG_MODE_TD
     789             :     /* raw decision */
     790             :     dec = score > 0;
     791             : #endif
     792             : 
     793             :     /* weight raw score with relative energy */
     794       24514 :     wrelE = lin_interp( hStereoClassif->relE_0_1, 0.5f, 0.95f, 0.9f, 0.0f, 1 );
     795       24514 :     hStereoClassif->xtalk_score_wrelE = wrelE * hStereoClassif->xtalk_score_wrelE + ( 1 - wrelE ) * score;
     796       24514 :     score = hStereoClassif->xtalk_score_wrelE;
     797             : 
     798             :     /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the raw score buffer */
     799       24514 :     mvr2r( &hStereoClassif->xtalk_score_buf[0], &hStereoClassif->xtalk_score_buf[1], XTALK_SCORE_BUF_LEN - 1 );
     800       24514 :     hStereoClassif->xtalk_score_buf[0] = score;
     801             : 
     802       24514 :     minimum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_min );
     803       24514 :     maximum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_max );
     804             : 
     805             : #ifdef DEBUG_MODE_TD
     806             :     edge_type = 0;
     807             :     edge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f, &edge, &edge_type );
     808             :     edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.6f, 0.0f, 1 );
     809             :     dbgwrite( &score, sizeof( float ), 1, 1, "res/score.x" );
     810             :     dbgwrite( &edge_0_1, sizeof( float ), 1, 1, "res/edge_0_1.x" );
     811             : #endif
     812             : 
     813       24514 :     if ( ( ( scr_min < 0.0f && scr_max > 0.2f ) || ( scr_max - scr_min > 0.5f ) ) )
     814             :     {
     815             :         /* test rising edge (use 0 as edge_type because of newer->older buffer samples ordering) */
     816        4469 :         edge_type = 0;
     817        4469 :         edge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f, &edge, &edge_type );
     818             : 
     819        4469 :         if ( edge_type == 0 && edge < 0.3f )
     820             :         {
     821             :             /* normalize edge to 0-1 interval */
     822        2443 :             edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.6f, 0.0f, 1 );
     823             :         }
     824             :         else
     825             :         {
     826        2026 :             edge_0_1 = 0;
     827             :         }
     828             :     }
     829             :     else
     830             :     {
     831       20045 :         edge_0_1 = 0.0f;
     832             :     }
     833             : 
     834             : 
     835             :     /* weight raw score based on rising edge detector */
     836       24514 :     wedge = lin_interp( edge_0_1, 0.0f, 0.9f, 1.0f, 0.5f, 1 );
     837             : 
     838       24514 :     hStereoClassif->xtalk_wscore = wedge * hStereoClassif->xtalk_wscore + ( 1 - wedge ) * score;
     839             : 
     840       24514 :     if ( !hStereoClassif->vad_flag_glob )
     841             :     {
     842        2358 :         hStereoClassif->xtalk_wscore = 0;
     843             :     }
     844             : 
     845             :     /* binary decision w. hysteresis (switch the decision only when coder_type is GC, UC or IC) */
     846       24514 :     if ( ( hStereoClassif->unclr_decision == 0 && hStereoClassif->xtalk_decision == 0 && hStereoClassif->xtalk_wscore > 0.03f ) /*|| (hStereoClassif->xtalk_decision == 1 && hStereoClassif->xtalk_wscore < 0.00f)*/ && ( hStereoClassif->unclr_sw_enable_cnt[0] > 0 || hStereoClassif->unclr_sw_enable_cnt[1] > 0 ) )
     847             :     {
     848             :         /* let's switch the binary decision */
     849          78 :         hStereoClassif->xtalk_decision = !hStereoClassif->xtalk_decision;
     850             :     }
     851             : 
     852             : #ifdef DEBUG_MODE_TD
     853             :     dbgwrite( &hStereoClassif->xtalk_wscore, sizeof( float ), 1, 1, "res/xtalk_wscore.x" );
     854             :     dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/xtalk_dec.x" );
     855             :     dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, 1, "res/xtalk_dec_hyst.x" );
     856             : #endif
     857             : 
     858       24514 :     return;
     859             : }
     860             : 
     861             : 
     862             : /*-------------------------------------------------------------------*
     863             :  * Function xtalk_classifier_dft()
     864             :  *
     865             :  * Classify current DFT frame as cross-talk frame (1) or normal stereo frame (0)
     866             :  *-------------------------------------------------------------------*/
     867             : 
     868      868212 : void xtalk_classifier_dft(
     869             :     CPE_ENC_HANDLE hCPE,   /* i/o: CPE encoder structure                   */
     870             :     const int16_t itd,     /* i  : ITD from DFT stereo - used as a feature */
     871             :     const float gcc_phat[] /* i  : GPHAT cross-channel correlation function*/
     872             : )
     873             : {
     874             :     int16_t i, ind, itd2, thr;
     875             :     float score, m1, m2;
     876             :     STEREO_CLASSIF_HANDLE hStereoClassif;
     877             :     ITD_DATA_HANDLE hItd;
     878             :     float fvn[SSC_MAX_NFEA], edge, edge_0_1, wedge;
     879             :     float ratio_m1_m2, m2_m2, d_itd2, itd1_flip;
     880             :     float scr_min, scr_max;
     881             : #ifdef DEBUG_MODE_TD
     882             :     int16_t dec;
     883             : #endif
     884             : 
     885      868212 :     hStereoClassif = hCPE->hStereoClassif;
     886      868212 :     hItd = ( hCPE->hStereoDft != NULL ) ? hCPE->hStereoDft->hItd : hCPE->hStereoMdct->hItd;
     887             : 
     888      868212 :     m1 = 0.0f;
     889      868212 :     m2 = 0.0f;
     890             : 
     891      868212 :     itd2 = 0;
     892      868212 :     thr = ( hCPE->element_brate >= IVAS_32k ? 2 : 1 ) * CLASSIFIER_ITD_THRES;
     893             : 
     894      868212 :     if ( itd > thr )
     895             :     {
     896       37449 :         m1 = fabsf( gcc_phat[itd + XTALK_PHAT_LEN] );
     897       37449 :         m2 = fabsf( gcc_phat[0] );
     898       37449 :         itd2 = -XTALK_PHAT_LEN;
     899     7061056 :         for ( i = 1; i < XTALK_PHAT_LEN - thr; i++ )
     900             :         {
     901     7023607 :             if ( fabsf( gcc_phat[i] ) > m2 )
     902             :             {
     903      292800 :                 itd2 = -XTALK_PHAT_LEN + i;
     904      292800 :                 m2 = fabsf( gcc_phat[i] );
     905             :             }
     906             :         }
     907             :     }
     908      830763 :     else if ( itd < -thr )
     909             :     {
     910       48745 :         m1 = fabsf( gcc_phat[itd + XTALK_PHAT_LEN] );
     911       48745 :         m2 = fabsf( gcc_phat[XTALK_PHAT_LEN + thr + 1] );
     912       48745 :         itd2 = thr + 1;
     913     9190904 :         for ( i = XTALK_PHAT_LEN + thr + 2; i < 2 * XTALK_PHAT_LEN + 1; i++ )
     914             :         {
     915     9142159 :             if ( fabsf( gcc_phat[i] ) > m2 )
     916             :             {
     917      222038 :                 itd2 = -XTALK_PHAT_LEN + i;
     918      222038 :                 m2 = fabsf( gcc_phat[i] );
     919             :             }
     920             :         }
     921             :     }
     922             : 
     923      868212 :     ratio_m1_m2 = fabsf( m1 * m2 ) / fabsf( m1 + m2 + 1.0f );
     924      868212 :     m2_m2 = hItd->prev_m2 * m2;
     925      868212 :     d_itd2 = (float) abs( itd2 - hItd->prev_itd2 );
     926      868212 :     itd1_flip = (float) ( max( itd, hItd->prev_itd1 ) * ( -min( itd, hItd->prev_itd1 ) ) );
     927             : 
     928             : 
     929      868212 :     hStereoClassif->xtalk_fv[E_gphat_d_itd2] = d_itd2;
     930      868212 :     hStereoClassif->xtalk_fv[E_gphat_itd1_flip] = itd1_flip;
     931      868212 :     hStereoClassif->xtalk_fv[E_gphat_ratio_m1_m2] = ratio_m1_m2 * hStereoClassif->prev_ratio_m1_m2;
     932      868212 :     hStereoClassif->xtalk_fv[E_gphat_m2_m2] = m2_m2;
     933             : 
     934      868212 :     hStereoClassif->prev_ratio_m1_m2 = ratio_m1_m2;
     935             : 
     936      868212 :     set_f( fvn, -1.0f, SSC_MAX_NFEA );
     937             : 
     938             :     /* calcualte raw score based on LR */
     939      868212 :     score = XTALK_INTERCEPT_DFT;
     940    10418544 :     for ( i = 0; i < SIZE_XTALK_ISEL_DFT; i++ )
     941             :     {
     942     9550332 :         ind = xtalk_isel_dft[i];
     943             : 
     944             :         /* mean & std removal */
     945     9550332 :         fvn[i] = ( hStereoClassif->xtalk_fv[ind] - xtalk_mean_dft[i] ) / xtalk_scale_dft[i];
     946             : 
     947             :         /* LR */
     948     9550332 :         score += fvn[i] * xtalk_coef_dft[i];
     949             :     }
     950             : 
     951             :     /* normalize score to -1:+1 */
     952      868212 :     if ( score > XTALK_SCORE_THR_DFT )
     953             :     {
     954        5370 :         score = 1.0f;
     955             :     }
     956      862842 :     else if ( score < -XTALK_SCORE_THR_DFT )
     957             :     {
     958      179070 :         score = -1.0f;
     959             :     }
     960             :     else
     961             :     {
     962      683772 :         score /= XTALK_SCORE_THR_DFT;
     963             :     }
     964             : 
     965             :     /* raw score */
     966      868212 :     hStereoClassif->xtalk_score = score;
     967             : 
     968      868212 :     if ( !hStereoClassif->vad_flag_glob )
     969             :     {
     970      201427 :         score = 0;
     971             :     }
     972             : 
     973             : #ifdef DEBUG_MODE_TD
     974             :     /* raw decision */
     975             :     dec = score > 0;
     976             : #endif
     977             : 
     978             : 
     979             :     /* rising edge detector on raw score -> yields 1 if strong rising edge is detected in the given buffer */
     980      868212 :     mvr2r( &hStereoClassif->xtalk_score_buf[0], &hStereoClassif->xtalk_score_buf[1], XTALK_SCORE_BUF_LEN - 1 );
     981      868212 :     hStereoClassif->xtalk_score_buf[0] = score;
     982             : 
     983      868212 :     minimum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_min );
     984      868212 :     maximum( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, &scr_max );
     985             : 
     986             : #ifdef DEBUG_MODE_TD
     987             :     edge = redge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f );
     988             :     edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.83f, 0.0f, 1 );
     989             :     dbgwrite( &score, sizeof( float ), 1, 1, "res/score.x" );
     990             :     dbgwrite( &edge_0_1, sizeof( float ), 1, 1, "res/edge_0_1.x" );
     991             : #endif
     992             : 
     993      868212 :     if ( scr_min < 0.2f && scr_max > 0.0f )
     994             :     {
     995       88688 :         edge = redge_detect( hStereoClassif->xtalk_score_buf, XTALK_SCORE_BUF_LEN, -0.2f, 1.0f );
     996       88688 :         edge_0_1 = lin_interp( 1 - edge, 1.0f, 1.0f, 0.83f, 0.0f, 1 );
     997             :     }
     998             :     else
     999             :     {
    1000      779524 :         edge_0_1 = 0.0f;
    1001             :     }
    1002             : 
    1003             : 
    1004             :     /* weight raw score based on rising edge detector */
    1005      868212 :     wedge = lin_interp( edge_0_1, 0.0f, 0.95f, 1.0f, 0.3f, 1 );
    1006      868212 :     hStereoClassif->xtalk_wscore = wedge * hStereoClassif->xtalk_wscore + ( 1 - wedge ) * score;
    1007             : 
    1008      868212 :     if ( ( itd == 0 ) || ( hCPE->hCoreCoder[0]->vad_flag == 0 ) )
    1009             :     {
    1010      679174 :         hStereoClassif->xtalk_decision = 0;
    1011             :     }
    1012      189038 :     else if ( hCPE->element_brate >= IVAS_24k4 &&
    1013      118834 :               hStereoClassif->xtalk_decision == 0 && ( ( m1 * 0.8 < m2 && hItd->prev_m1 * 0.8 < hItd->prev_m2 && abs( itd2 - hItd->prev_itd2 ) < 4 && m1 > 0.15 && hItd->prev_m1 > 0.15 ) || ( hStereoClassif->xtalk_wscore > 0.8 ) || ( itd > thr && hItd->prev_itd1 < -thr && hStereoClassif->silence_flag == 0 ) || ( hItd->prev_itd1 > thr && itd < -thr && hStereoClassif->silence_flag == 0 ) ) &&
    1014        5137 :               hCPE->hCoreCoder[0]->vad_flag == 1 && hCPE->hCoreCoder[0]->flag_noisy_speech_snr == 0 && hCPE->hCoreCoder[0]->hNoiseEst->aEn_inac_cnt > 15 )
    1015             :     {
    1016          30 :         hStereoClassif->xtalk_decision = 1;
    1017             : #ifdef DEBUG_MODE_TD
    1018             :         printf( "\nSwitch DFT-stereo -> TD-LR on frame %d\n", frame );
    1019             : #endif
    1020             :     }
    1021      189008 :     else if ( hCPE->element_brate >= IVAS_16k4 && hStereoClassif->xtalk_decision == 0 && abs( itd ) > STEREO_DFT_ITD_MAX && ( hCPE->hCoreCoder[0]->lp_speech - hCPE->hCoreCoder[0]->lp_noise ) > 25.0f )
    1022             :     {
    1023         166 :         hStereoClassif->xtalk_decision = 1;
    1024             :     }
    1025             : 
    1026             : #ifdef DEBUG_MODE_TD
    1027             :     dbgwrite( &hStereoClassif->xtalk_wscore, sizeof( float ), 1, 1, "res/xtalk_wscore.x" );
    1028             :     dbgwrite( &dec, sizeof( int16_t ), 1, 1, "res/xtalk_dec.x" );
    1029             :     dbgwrite( &hStereoClassif->xtalk_decision, sizeof( int16_t ), 1, 1, "res/xtalk_dec_hyst.x" );
    1030             : #endif
    1031             : 
    1032             :     /* updates */
    1033      868212 :     hItd->prev_m1 = m1;
    1034      868212 :     hItd->prev_m2 = m2;
    1035      868212 :     hItd->prev_itd1 = itd;
    1036      868212 :     hItd->prev_itd2 = itd2;
    1037             : 
    1038      868212 :     return;
    1039             : }
    1040             : 
    1041             : 
    1042             : /*-------------------------------------------------------------------*
    1043             :  * Function rc_filter()
    1044             :  *
    1045             :  *
    1046             :  *-------------------------------------------------------------------*/
    1047             : 
    1048      752336 : static void rc_filter(
    1049             :     const float x,
    1050             :     float *y,
    1051             :     const int16_t order,
    1052             :     const float tau )
    1053             : {
    1054             :     int16_t i;
    1055             : 
    1056      752336 :     y[0] = tau * y[0] + ( 1 - tau ) * x;
    1057    15046720 :     for ( i = 1; i < order; i++ )
    1058             :     {
    1059    14294384 :         y[i] = tau * y[i] + ( 1 - tau ) * y[i - 1];
    1060             :     }
    1061             : 
    1062      752336 :     return;
    1063             : }
    1064             : 
    1065             : 
    1066             : /*-------------------------------------------------------------------*
    1067             :  * Function edge_detect()
    1068             :  *
    1069             :  * Rising/falling edge detection algorithm
    1070             :  * Analyzes the input buffer and outputs strength and type of the detected edge (rising or falling)
    1071             :  * Set edge_type to 0/1/2 when calling this function to specify the edge type you want to detect. The returned value will be modified
    1072             :  * according to the edge type detected (-1 indicates that no edge has been detected)
    1073             :  *-------------------------------------------------------------------*/
    1074             : 
    1075        4469 : static void edge_detect(
    1076             :     const float *inp,    /* i  : input buffer                       */
    1077             :     const int16_t len,   /* i  : length of the input buffer         */
    1078             :     const float inp_min, /* i  : minimum value for edge detection   */
    1079             :     const float inp_max, /* i  : maximum value for edge detection   */
    1080             :     float *edge_str,     /* o  : edge strength (from 0 to Inf)      */
    1081             :     int16_t *edge_type   /* i/o: edge type (to be) detected: 0 = falling, 1 = rising, 2 = both */
    1082             : )
    1083             : {
    1084             :     int16_t i, j, et;
    1085             :     float y, err, edge_slope, edge[EDGE_MAX_LEN];
    1086             :     float edge_min, err0;
    1087             : 
    1088        4469 :     et = -1;
    1089        4469 :     edge_min = 1e7f;
    1090             : 
    1091        4469 :     if ( *edge_type == 0 || *edge_type == 2 )
    1092             :     {
    1093             :         /* falling edge detection */
    1094        4469 :         set_f( edge, 0.0f, EDGE_MAX_LEN );
    1095             : 
    1096             :         /* set error at 0th index */
    1097        4469 :         if ( inp[0] > inp_max )
    1098             :         {
    1099           0 :             err0 = 0.0f;
    1100             :         }
    1101             :         else
    1102             :         {
    1103             :             /* inhibits edge smearing effect */
    1104        4469 :             err0 = powf( inp[0] - inp_max, 2 );
    1105             :         }
    1106             : 
    1107             :         /* test edges on intervals from 2 to len */
    1108       22345 :         for ( i = 1; i < len; i++ )
    1109             :         {
    1110       17876 :             edge_slope = ( inp_max - inp_min ) / i;
    1111       17876 :             edge[i] = err0;
    1112       62566 :             for ( j = 1; j <= i; j++ )
    1113             :             {
    1114       44690 :                 y = inp_max - edge_slope * j;
    1115       44690 :                 err = y - check_bounds( inp[j], inp_min, inp_max );
    1116       44690 :                 edge[i] += err * err;
    1117             :             }
    1118             : 
    1119       17876 :             edge[i] /= i + 1;
    1120             : 
    1121       17876 :             if ( edge[i] < edge_min )
    1122             :             {
    1123       14078 :                 edge_min = edge[i];
    1124       14078 :                 et = 0;
    1125             :             }
    1126             :         }
    1127             :     }
    1128             : 
    1129        4469 :     if ( *edge_type == 1 || *edge_type == 2 )
    1130             :     {
    1131             :         /* rising edge detection */
    1132           0 :         set_f( edge, 0.0f, EDGE_MAX_LEN );
    1133             : 
    1134             :         /* set error at 0th index */
    1135           0 :         if ( inp[0] < inp_min )
    1136             :         {
    1137           0 :             err0 = 0.0f;
    1138             :         }
    1139             :         else
    1140             :         {
    1141             :             /* inhibits edge smearing effect */
    1142           0 :             err0 = powf( inp[0] - inp_min, 2 );
    1143             :         }
    1144             : 
    1145             :         /* test edges on intervals from 2 to len */
    1146           0 :         for ( i = 1; i <= len; i++ )
    1147             :         {
    1148           0 :             edge_slope = ( inp_max - inp_min ) / i;
    1149           0 :             edge[i] = err0;
    1150           0 :             for ( j = 1; j < i; j++ )
    1151             :             {
    1152           0 :                 y = inp_min + edge_slope * j;
    1153           0 :                 err = y - check_bounds( inp[j], inp_min, inp_max );
    1154           0 :                 edge[i] += err * err;
    1155             :             }
    1156             : 
    1157           0 :             edge[i] /= i + 1;
    1158             : 
    1159           0 :             if ( edge[i] < edge_min )
    1160             :             {
    1161           0 :                 edge_min = edge[i];
    1162           0 :                 et = 1;
    1163             :             }
    1164             :         }
    1165             :     }
    1166             : 
    1167        4469 :     *edge_str = edge_min;
    1168        4469 :     *edge_type = et;
    1169             : 
    1170        4469 :     return;
    1171             : }
    1172             : 
    1173             : 
    1174             : /*-------------------------------------------------------------------*
    1175             :  * Function redge_detect()
    1176             :  *
    1177             :  * Rising edge detection algorithm
    1178             :  * Analyzes the input buffer and outputs value close to 1 when it detects strong rising edge
    1179             :  *-------------------------------------------------------------------*/
    1180             : 
    1181             : /*! r: rising edge strength normalized to 0-1 */
    1182       88688 : static float redge_detect(
    1183             :     const float *inp,    /* i  : input buffer (ordered from newest to oldest values)*/
    1184             :     const int16_t len,   /* i  : length of the input buffer                         */
    1185             :     const float inp_min, /* i  : minimum value for edge detection                   */
    1186             :     const float inp_max  /* i  : maximum value for edge detection                   */
    1187             : )
    1188             : {
    1189             :     int16_t i, j;
    1190             :     float y, err, edge_slope, edge[REDGE_MAX_LEN];
    1191             :     float edge_min, err0;
    1192             : 
    1193       88688 :     set_f( edge, 0.0f, REDGE_MAX_LEN );
    1194       88688 :     edge_min = 1e7f;
    1195             : 
    1196             :     /* test rising edges on intervals from 2 to len */
    1197       88688 :     if ( inp[0] > inp_max )
    1198             :     {
    1199           0 :         err0 = 0.0f;
    1200             :     }
    1201             :     else
    1202             :     {
    1203       88688 :         err0 = powf( inp[0] - inp_max, 2 );
    1204             :     }
    1205      443440 :     for ( i = 1; i < len; i++ )
    1206             :     {
    1207      354752 :         edge_slope = ( inp_max - inp_min ) / i;
    1208      354752 :         edge[i] = err0;
    1209      886880 :         for ( j = 1; j < i; j++ )
    1210             :         {
    1211      532128 :             y = inp_max - edge_slope * j;
    1212      532128 :             if ( inp[j] == inp[j - 1] && inp[j] == inp_max )
    1213             :             {
    1214             :                 /* we are saturated at inp_max */
    1215        1087 :                 err = 0.0f;
    1216             :             }
    1217      531041 :             else if ( inp[j] < inp_min )
    1218             :             {
    1219             :                 /* we are below inp_min */
    1220       46716 :                 err = y - inp_min;
    1221             :             }
    1222             :             else
    1223             :             {
    1224      484325 :                 err = y - inp[j];
    1225             :             }
    1226      532128 :             err = err * err;
    1227      532128 :             edge[i] += err;
    1228             :         }
    1229             : 
    1230      354752 :         edge[i] /= i + 1;
    1231             : 
    1232      354752 :         if ( edge[i] < edge_min )
    1233             :         {
    1234      323553 :             edge_min = edge[i];
    1235             :         }
    1236             :     }
    1237             : 
    1238       88688 :     return edge_min;
    1239             : }

Generated by: LCOV version 1.14