LCOV - code coverage report
Current view: top level - lib_enc - speech_music_classif.c (source / functions) Hit Total Coverage
Test: Coverage on main -- short test vectors @ 6c9ddc4024a9c0e1ecb8f643f114a84a0e26ec6b Lines: 1050 1151 91.2 %
Date: 2025-05-23 08:37:30 Functions: 17 17 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <assert.h>
      38             : #include <stdint.h>
      39             : #include "options.h"
      40             : #ifdef DEBUGGING
      41             : #include "debug.h"
      42             : #endif
      43             : #include <math.h>
      44             : #include "cnst.h"
      45             : #include "prot.h"
      46             : #include "ivas_prot.h"
      47             : #include "rom_enc.h"
      48             : #include "rom_com.h" /* Common static table prototypes         */
      49             : #include "wmc_auto.h"
      50             : 
      51             : 
      52             : /*---------------------------------------------------------------------*
      53             :  * Local constants
      54             :  *---------------------------------------------------------------------*/
      55             : 
      56             : #define ATT_SEG_LEN       ( L_FRAME / ATT_NSEG )
      57             : #define ATT_3LSUB_POS     ( 3 * ATT_NSEG / NB_SUBFR )
      58             : #define ATT_3LSUB_POS_16k ( int16_t )( ( 4.0f * ATT_NSEG / (float) NB_SUBFR16k ) + 0.5f )
      59             : 
      60             : #define THR_CORR_PEAK 0.95f
      61             : #define TON_FACT      0.95f
      62             : #define TON_ALPHA     0.95f
      63             : 
      64             : #define DLP_BIAS 0.138121f
      65             : 
      66             : #define THR_MASS_MAX     0.85f
      67             : #define THR_MASS_MIN     0.75f
      68             : #define THR_MASS_STEP_UP 0.01f
      69             : #define THR_MASS_STEP_DN 0.02f
      70             : 
      71             : 
      72             : /*---------------------------------------------------------------------*
      73             :  * Local function prototypes
      74             :  *---------------------------------------------------------------------*/
      75             : 
      76             : static void spec_analysis( float *Bin_E, float *p2v_map );
      77             : 
      78             : static void flux( float *Bin_E, float *p2v_map, float *old_Bin_E, float *buf_flux, int16_t attack_hangover, float dec_mov );
      79             : 
      80             : static void tonal_dist( float *p2v_map, float *buf_pkh, float *buf_Ntonal, float *buf_Ntonal2, float *buf_Ntonal_lf );
      81             : 
      82             : static int16_t mode_decision( Encoder_State *st, int16_t len, float *dec_mov, float *buf_flux, float *buf_epsP_tilt, float *buf_pkh, float *buf_cor_map_sum, float *buf_Ntonal, float *buf_Ntonal2, float *buf_Ntonal_lf, float *buf_dlp );
      83             : 
      84             : static void var_cor_calc( const float old_corr, float *mold_corr, float var_cor_t[], int16_t *high_stable_cor );
      85             : 
      86             : static int16_t attack_det( const float *inp, const int16_t last_clas, const int16_t localVAD, const int16_t coder_type, const int32_t total_brate, const int16_t element_mode, const int16_t clas, float finc_prev[], float *lt_finc, int16_t *last_strong_attack );
      87             : 
      88             : static float tonal_det( const float S[], int16_t vad_flag, float tod_S_map_lt[], float *tod_thr_lt, float *tod_weight, float *tod_S_mass_prev, float *tod_S_mass_lt );
      89             : 
      90             : static void tonal_context_improv( Encoder_State *st, const float PS[], const float voi_fv, const float cor_map_sum_fv, const float LPCErr );
      91             : 
      92             : static void order_spectrum( float *vec, const int16_t len );
      93             : 
      94             : static void detect_sparseness( Encoder_State *st, const int16_t localVAD_HE_SAD, const float voi_fv );
      95             : 
      96             : static int16_t sp_mus_classif_1st( Encoder_State *st, const int16_t localVAD_HE_SAD, const float lsp_new[M], const float cor_map_sum, const float epsP[M + 1], const float PS[], float non_sta, float relE, float *voi_fv, float *cor_map_sum_fv, float *LPCErr, int16_t *high_lpn_flag );
      97             : 
      98             : static void sp_mus_classif_2nd( Encoder_State *st, const float Etot, int16_t *attack_flag, const float *inp );
      99             : 
     100             : static void music_mixed_classif_improv( Encoder_State *st, const float *new_inp, const float *epsP, const float etot, const float old_cor, const float cor_map_sum );
     101             : 
     102             : 
     103             : /*---------------------------------------------------------------------*
     104             :  * speech_music_clas_init()
     105             :  *
     106             :  * Initialization of speech/music classifier
     107             :  *---------------------------------------------------------------------*/
     108             : 
     109        9534 : void speech_music_clas_init(
     110             :     SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle   */
     111             : )
     112             : {
     113             :     int16_t i;
     114             : 
     115        9534 :     set_f( hSpMusClas->FV_st, 0.0f, N_SMC_FEATURES );
     116             : 
     117        9534 :     hSpMusClas->inact_cnt = 0;
     118        9534 :     set_s( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
     119        9534 :     set_f( hSpMusClas->past_dlp, 0, HANG_LEN - 1 );
     120        9534 :     set_f( hSpMusClas->past_dlp_mean_ST, 0, HANG_LEN - 1 );
     121        9534 :     hSpMusClas->dlp_mean_ST = 0.0f;
     122        9534 :     hSpMusClas->dlp_mean_LT = 0.0f;
     123        9534 :     hSpMusClas->dlp_var_LT = 0.0f;
     124             : 
     125      152544 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
     126             :     {
     127      143010 :         hSpMusClas->prev_FV[i] = 0.5f * hout_intervals[2 * i] + 0.5f * hout_intervals[2 * i + 1];
     128             :     }
     129             : 
     130      152544 :     for ( i = 0; i < NB_BANDS_SPMUS; i++ )
     131             :     {
     132      143010 :         hSpMusClas->past_log_enr[i] = logf( E_MIN );
     133             :     }
     134             : 
     135        9534 :     hSpMusClas->sp_mus_state = -8;
     136        9534 :     hSpMusClas->wdrop = 0.0f;
     137        9534 :     hSpMusClas->wrise = 0.0f;
     138        9534 :     hSpMusClas->wdlp_0_95_sp = 0.0f;
     139        9534 :     hSpMusClas->wdlp_xtalk = 0.0f;
     140        9534 :     set_f( hSpMusClas->last_lsp, 0.0f, M_LSP_SPMUS );
     141        9534 :     hSpMusClas->last_cor_map_sum = 0.0f;
     142        9534 :     hSpMusClas->last_non_sta = 0.0f;
     143        9534 :     set_f( hSpMusClas->past_PS, 0.0f, HIGHEST_FBIN - LOWEST_FBIN );
     144        9534 :     hSpMusClas->past_ps_diff = 0;
     145        9534 :     hSpMusClas->past_epsP2 = 01;
     146        9534 :     hSpMusClas->past_epsP = 0;
     147        9534 :     hSpMusClas->flag_spitch_cnt = 0;
     148             : 
     149        9534 :     hSpMusClas->gsc_thres[0] = TH_0_MIN;
     150        9534 :     hSpMusClas->gsc_thres[1] = TH_1_MIN;
     151        9534 :     hSpMusClas->gsc_thres[2] = TH_2_MIN;
     152        9534 :     hSpMusClas->gsc_thres[3] = TH_3_MIN;
     153        9534 :     set_f( hSpMusClas->gsc_lt_diff_etot, 0.0f, MAX_LT );
     154        9534 :     hSpMusClas->gsc_mem_etot = 0.0f;
     155        9534 :     hSpMusClas->gsc_last_music_flag = 0;
     156        9534 :     hSpMusClas->gsc_nb_thr_1 = 0;
     157        9534 :     hSpMusClas->gsc_nb_thr_3 = 0;
     158        9534 :     hSpMusClas->mold_corr = 0.9f;
     159        9534 :     hSpMusClas->mean_avr_dyn = 0.5f;
     160        9534 :     hSpMusClas->last_sw_dyn = 10.0f;
     161             : 
     162        9534 :     hSpMusClas->relE_attack_cnt = 0;
     163        9534 :     hSpMusClas->prev_relE = 0.0f;
     164        9534 :     hSpMusClas->prev_Etot = 0.0f;
     165        9534 :     hSpMusClas->prev_vad = 0;
     166        9534 :     hSpMusClas->vad_0_1_cnt = 0;
     167        9534 :     hSpMusClas->relE_attack_sum = 0;
     168             : 
     169             :     /* speech/music classifier improvement */
     170      581574 :     for ( i = 0; i < BUF_LEN; i++ )
     171             :     {
     172      572040 :         hSpMusClas->buf_flux[i] = -100;
     173      572040 :         hSpMusClas->buf_pkh[i] = 0;
     174      572040 :         hSpMusClas->buf_epsP_tilt[i] = 0;
     175      572040 :         hSpMusClas->buf_cor_map_sum[i] = 0;
     176      572040 :         hSpMusClas->buf_Ntonal[i] = 0;
     177      572040 :         hSpMusClas->buf_Ntonal2[i] = 0;
     178      572040 :         hSpMusClas->buf_Ntonal_lf[i] = 0;
     179             :     }
     180             : 
     181        9534 :     set_f( hSpMusClas->lpe_buf, 0, HANG_LEN_INIT );
     182        9534 :     set_f( hSpMusClas->voicing_buf, 0, HANG_LEN_INIT );
     183        9534 :     hSpMusClas->gsc_hangover = 0;
     184        9534 :     set_f( hSpMusClas->sparse_buf, 0, HANG_LEN_INIT );
     185        9534 :     set_f( hSpMusClas->hf_spar_buf, 0, HANG_LEN_INIT );
     186        9534 :     hSpMusClas->LT_sparse = 0.0f;
     187        9534 :     hSpMusClas->gsc_cnt = 0;
     188        9534 :     hSpMusClas->last_vad_spa = 0;
     189             : 
     190        9534 :     set_f( hSpMusClas->old_Bin_E, 0.0f, 3 * N_OLD_BIN_E );
     191        9534 :     set_f( hSpMusClas->buf_etot, 0, 4 );
     192        9534 :     set_f( hSpMusClas->buf_dlp, 0, 10 );
     193             : 
     194        9534 :     hSpMusClas->UV_cnt1 = 300;
     195        9534 :     hSpMusClas->LT_UV_cnt1 = 250.0f;
     196        9534 :     hSpMusClas->onset_cnt = 0;
     197        9534 :     hSpMusClas->attack_hangover = 0;
     198        9534 :     hSpMusClas->dec_mov = 0.0f;
     199        9534 :     hSpMusClas->dec_mov1 = 0.0f;
     200        9534 :     hSpMusClas->mov_log_max_spl = 200.0f;
     201        9534 :     hSpMusClas->old_lt_diff[0] = 0.0f;
     202        9534 :     hSpMusClas->old_lt_diff[1] = 0.0f;
     203             : 
     204        9534 :     set_f( hSpMusClas->finc_prev, 0.0f, ATT_NSEG );
     205        9534 :     hSpMusClas->lt_finc = 0.0f;
     206        9534 :     hSpMusClas->last_strong_attack = 0;
     207        9534 :     hSpMusClas->tdm_lt_Etot = 0.01f;
     208        9534 :     set_f( hSpMusClas->tod_lt_Bin_E, 0.0f, TOD_NSPEC );
     209        9534 :     set_f( hSpMusClas->tod_S_map_lt, 0.0f, TOD_NSPEC );
     210        9534 :     hSpMusClas->tod_thr_lt = TOD_THR_MASS;
     211        9534 :     hSpMusClas->tod_weight = 0.0f;
     212        9534 :     hSpMusClas->tod_S_mass_prev = 0.0f;
     213        9534 :     hSpMusClas->tod_S_mass_lt = 0.0f;
     214             : 
     215             :     /* speech/music classification */
     216        9534 :     set_s( hSpMusClas->lt_old_mode, 1, 3 );
     217        9534 :     hSpMusClas->lt_voicing = 0.5f;
     218        9534 :     hSpMusClas->lt_corr = 0.5f;
     219        9534 :     hSpMusClas->lt_tonality = 0;
     220        9534 :     set_s( hSpMusClas->lt_corr_pitch, 0, 3 );
     221        9534 :     hSpMusClas->lt_hangover = 0;
     222        9534 :     hSpMusClas->lowrate_pitchGain = 0;
     223             : 
     224        9534 :     hSpMusClas->lt_music_hangover = 0;
     225        9534 :     set_f( hSpMusClas->tonality2_buf, 0, HANG_LEN_INIT );
     226        9534 :     set_f( hSpMusClas->tonality3_buf, 0, HANG_LEN_INIT );
     227        9534 :     set_f( hSpMusClas->LPCErr_buf, 0, HANG_LEN_INIT );
     228        9534 :     hSpMusClas->lt_music_state = 0;
     229        9534 :     hSpMusClas->lt_speech_state = 0;
     230        9534 :     hSpMusClas->lt_speech_hangover = 0;
     231             : 
     232        9534 :     hSpMusClas->lt_dec_thres = 10.0f;
     233        9534 :     hSpMusClas->ener_RAT = 0.0f;
     234             : 
     235        9534 :     hSpMusClas->high_stable_cor = 0;
     236        9534 :     set_f( hSpMusClas->var_cor_t, 0.0f, VAR_COR_LEN );
     237             : 
     238        9534 :     hSpMusClas->lps = 0.0f;
     239        9534 :     hSpMusClas->lpm = 0.0f;
     240        9534 :     hSpMusClas->lpn = 0.0f;
     241             : 
     242        9534 :     return;
     243             : }
     244             : 
     245             : 
     246             : /*---------------------------------------------------------------------*
     247             :  * speech_music_classif()
     248             :  *
     249             :  * Speech/music classification
     250             :  *
     251             :  * The following technologies are used based on the outcome of the sp/mus classifier
     252             :  * sp_aud_decision1  sp_aud_decision2
     253             :  *       0                 0             use ACELP (+TD BWE)
     254             :  *       1                 0             use ACELP (+FD BWE) or HQ/LR-MDCT depending on bitrate
     255             :  *       1                 1             use GSC (+FD BWE) or HQ/LR-MDCT depending on bitrate
     256             :  *
     257             :  *       0                 1             exceptionally use GSC (+FD BWE) instead of LR-MDCT at 13.2 kbps (WB/SWB) for sparse spectra
     258             :  *---------------------------------------------------------------------*/
     259             : 
     260             : /*! r: 1st stage decision (1-music, 0-speech or noise) */
     261        3100 : void speech_music_classif(
     262             :     Encoder_State *st,             /* i/o: state structure                                 */
     263             :     const float *new_inp,          /* i  : new input signal                                */
     264             :     const float *inp,              /* i  : input signal to locate attach position          */
     265             :     const int16_t localVAD_HE_SAD, /* i  : HE-SAD flag without hangover                    */
     266             :     const float lsp_new[M],        /* i  : LSPs in current frame                           */
     267             :     const float cor_map_sum,       /* i  : correlation map sum (from multi-harmonic anal.) */
     268             :     const float epsP[M + 1],       /* i  : LP prediciton error                             */
     269             :     const float PS[],              /* i  : energy spectrum                                 */
     270             :     const float Etot,              /* i  : total frame energy                              */
     271             :     const float old_cor,           /* i  : max correlation from previous frame             */
     272             :     int16_t *attack_flag,          /* o  : attack flag (GSC or TC)                         */
     273             :     const float non_sta,           /* i  : unbound non-stationarity for sp/mus classifier  */
     274             :     const float relE,              /* i  : relative frame energy                           */
     275             :     int16_t *high_lpn_flag,        /* o  : sp/mus LPN flag                                 */
     276             :     const int16_t flag_spitch      /* i  : flag to indicate very short stable pitch        */
     277             : )
     278             : {
     279             :     float voi_fv, cor_map_sum_fv, LPCErr;
     280             : 
     281             :     /* 1st stage speech/music classification based on the GMM model */
     282        3100 :     st->sp_aud_decision1 = sp_mus_classif_1st( st, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, non_sta, relE, &voi_fv, &cor_map_sum_fv, &LPCErr, high_lpn_flag );
     283             : 
     284        3100 :     if ( st->codec_mode == MODE1 || st->sr_core == INT_FS_12k8 )
     285             :     {
     286             : 
     287             :         /* Improvement of the 1st stage decision for mixed/music content */
     288        2050 :         if ( !st->Opt_SC_VBR && ( st->total_brate != ACELP_24k40 ) )
     289             :         {
     290        2050 :             music_mixed_classif_improv( st, new_inp, epsP, Etot, old_cor, cor_map_sum );
     291             :         }
     292             : 
     293        2050 :         st->sp_aud_decision0 = st->sp_aud_decision1;
     294             : 
     295             :         /* 2nd stage speech/music classification (rewrite music to speech in onsets) */
     296        2050 :         st->sp_aud_decision2 = st->sp_aud_decision1;
     297             : 
     298        2050 :         if ( st->bwidth > NB )
     299             :         {
     300        2050 :             sp_mus_classif_2nd( st, Etot, attack_flag, inp );
     301             : 
     302        2050 :             if ( flag_spitch && st->bwidth == WB && st->total_brate < ACELP_13k20 )
     303             :             {
     304             :                 /* avoid switch to AUDIO/MUSIC class for very short stable high pitch
     305             :                    and/or stable pitch with high correlation at low bitrates*/
     306           0 :                 st->sp_aud_decision2 = 0;
     307             :             }
     308             :         }
     309             : 
     310             :         /* Context-based improvement of 1st and 2nd stage decision on stable tonal signals */
     311        2050 :         if ( !st->Opt_SC_VBR && st->total_brate != ACELP_24k40 )
     312             :         {
     313        2050 :             tonal_context_improv( st, PS, voi_fv, cor_map_sum_fv, LPCErr );
     314             :         }
     315             : 
     316             :         /* Avoid using LR-MDCT on sparse spectra, use GSC instead at 13.2 kbps (WB/SWB) */
     317        2050 :         if ( !st->Opt_SC_VBR && st->total_brate == ACELP_13k20 && st->vad_flag == 1 && ( st->bwidth == WB || st->bwidth == SWB ) )
     318             :         {
     319        1042 :             detect_sparseness( st, localVAD_HE_SAD, voi_fv );
     320             :         }
     321             : 
     322             :         /* override speech/music classification to ACELP when background noise level reaches certain level */
     323             :         /* this is a patch against mis-classifications during active noisy speech segments */
     324        2050 :         if ( st->lp_noise > 12.0f )
     325             :         {
     326           0 :             st->sp_aud_decision1 = 0;
     327           0 :             st->sp_aud_decision2 = 0;
     328             :         }
     329             : 
     330             :         /* set GSC noisy speech flag on unvoiced SWB segments */
     331        2050 :         st->GSC_noisy_speech = 0;
     332        2050 :         if ( st->vad_flag == 1 && st->total_brate >= ACELP_13k20 && st->total_brate < ACELP_24k40 &&
     333        1042 :              st->lp_noise > 12.0f && st->sp_aud_decision1 == 0 && st->bwidth >= SWB &&
     334           0 :              st->coder_type_raw == UNVOICED )
     335             :         {
     336           0 :             st->GSC_noisy_speech = 1;
     337             :         }
     338             : 
     339             :         /* Select AUDIO frames */
     340             : #ifdef DEBUGGING
     341             :         if ( st->codec_mode == MODE1 && ( st->force == 1 || ( st->force == -1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) ) ) )
     342             : #else
     343        2050 :         if ( st->codec_mode == MODE1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) )
     344             : #endif
     345             :         {
     346         634 :             st->coder_type = AUDIO;
     347         634 :             st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
     348             :         }
     349             :     }
     350             :     else
     351             :     {
     352        1050 :         st->sp_aud_decision0 = st->sp_aud_decision1;
     353             :     }
     354             : 
     355             : 
     356        3100 :     return;
     357             : }
     358             : 
     359             : 
     360             : /*---------------------------------------------------------------------*
     361             :  * sp_mus_classif_1st()
     362             :  *
     363             :  * 1st stage speech/music classification (based on the GMM model)
     364             :  *---------------------------------------------------------------------*/
     365             : 
     366             : /*! r: decision flag (1-music, 0-speech or noise) */
     367        3100 : static int16_t sp_mus_classif_1st(
     368             :     Encoder_State *st,             /* i/o: state structure                                 */
     369             :     const int16_t localVAD_HE_SAD, /* i  : local VAD HE flag                               */
     370             :     const float lsp_new[M],        /* i  : LSPs in current frame                           */
     371             :     const float cor_map_sum,       /* i  : correlation map sum (from multi-harmonic anal.) */
     372             :     const float epsP[M + 1],       /* i  : LP prediciton error                             */
     373             :     const float PS[],              /* i  : energy spectrum                                 */
     374             :     float non_sta,                 /* i  : unbound non-stationarity                        */
     375             :     float relE,                    /* i  : relative frame energy                           */
     376             :     float *voi_fv,                 /* o  : scaled voicing feature                          */
     377             :     float *cor_map_sum_fv,         /* o  : scaled correlation map feature                  */
     378             :     float *LPCErr,                 /* o  : scaled LP prediction error feature              */
     379             :     int16_t *high_lpn_flag         /* o  : sp/mus LPN flag                                 */
     380             : )
     381             : {
     382             :     int16_t i, k, p, dec, vad;
     383             :     float dlp, ftmp, lepsP1, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght, mx;
     384        3100 :     float FV[N_FEATURES], *pFV = FV, PS_norm[128], dPS[128], lsp[M];
     385        3100 :     float pys, pym, xm[N_FEATURES], py, lps = 0, lpm = 0;
     386             :     const float *pSF;
     387        3100 :     float pyn, lpn = 0;
     388             : 
     389        3100 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
     390             : 
     391             :     /*------------------------------------------------------------------*
     392             :      * Initialization
     393             :      *------------------------------------------------------------------*/
     394             : 
     395        3100 :     vad = localVAD_HE_SAD;
     396             : 
     397             :     /*------------------------------------------------------------------*
     398             :      * Preparation of the feature vector
     399             :      *------------------------------------------------------------------*/
     400             : 
     401             :     /* [0] OL pitch */
     402        3100 :     if ( st->tc_cnt == 1 || st->tc_cnt == 2 )
     403             :     {
     404         262 :         *pFV++ = (float) st->pitch[2];
     405             :     }
     406             :     else
     407             :     {
     408        2838 :         *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
     409             :     }
     410             : 
     411             :     /* [1] voicing */
     412        3100 :     if ( st->tc_cnt == 1 || st->tc_cnt == 2 )
     413             :     {
     414         262 :         *pFV++ = st->voicing[2];
     415             :     }
     416             :     else
     417             :     {
     418        2838 :         *pFV++ = (float) ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
     419             :     }
     420             : 
     421             :     /* [2,3,4,5,6] LSFs */
     422        3100 :     mvr2r( lsp_new, lsp, M );
     423             : 
     424        3100 :     ftmp = (float) acos( lsp[1] );
     425        3100 :     *pFV++ = ftmp + hSpMusClas->last_lsp[1];
     426        3100 :     hSpMusClas->last_lsp[1] = ftmp;
     427             : 
     428        3100 :     ftmp = (float) acos( lsp[2] );
     429        3100 :     *pFV++ = ftmp + hSpMusClas->last_lsp[2];
     430        3100 :     hSpMusClas->last_lsp[2] = ftmp;
     431             : 
     432        3100 :     ftmp = (float) acos( lsp[3] );
     433        3100 :     *pFV++ = ftmp + hSpMusClas->last_lsp[3];
     434        3100 :     hSpMusClas->last_lsp[3] = ftmp;
     435             : 
     436        3100 :     ftmp = (float) acos( lsp[4] );
     437        3100 :     *pFV++ = ftmp + hSpMusClas->last_lsp[4];
     438        3100 :     hSpMusClas->last_lsp[4] = ftmp;
     439             : 
     440        3100 :     ftmp = (float) acos( lsp[5] );
     441        3100 :     *pFV++ = ftmp + hSpMusClas->last_lsp[5];
     442        3100 :     hSpMusClas->last_lsp[5] = ftmp;
     443             : 
     444             :     /* [7] cor_map_sum */
     445        3100 :     *pFV++ = cor_map_sum + hSpMusClas->last_cor_map_sum;
     446        3100 :     hSpMusClas->last_cor_map_sum = cor_map_sum;
     447             : 
     448             :     /* [8] non_sta */
     449        3100 :     *pFV++ = non_sta + hSpMusClas->last_non_sta;
     450        3100 :     hSpMusClas->last_non_sta = non_sta;
     451             : 
     452             :     /* [9] epsP */
     453        3100 :     if ( st->bwidth == NB )
     454             :     {
     455             :         /* do not take into account (statistics are too different) */
     456           0 :         *pFV++ = -1.647f;
     457             :     }
     458             :     else
     459             :     {
     460        3100 :         lepsP1 = logf( epsP[1] + 1e-5f );
     461        3100 :         ftmp = logf( epsP[13] ) - lepsP1;
     462        3100 :         *pFV++ = ftmp + hSpMusClas->past_epsP2;
     463        3100 :         hSpMusClas->past_epsP2 = ftmp;
     464             :     }
     465             : 
     466             :     /* calculation of differential normalized power spectrum */
     467        3100 :     sum_PS = 1e-5f;
     468      210800 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     469             :     {
     470      207700 :         sum_PS += PS[i];
     471             :     }
     472             : 
     473      210800 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     474             :     {
     475      207700 :         PS_norm[i] = PS[i] / sum_PS;
     476      207700 :         dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] );
     477             :     }
     478             : 
     479             :     /* [10] ps_diff (spectral difference) */
     480        3100 :     ps_diff = 0;
     481      210800 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     482             :     {
     483      207700 :         ps_diff += dPS[i];
     484             :     }
     485             : 
     486        3100 :     ps_diff = logf( ps_diff + 1e-5f );
     487        3100 :     *pFV++ = ps_diff + hSpMusClas->past_ps_diff;
     488        3100 :     hSpMusClas->past_ps_diff = ps_diff;
     489             : 
     490             :     /* [11] ps_sta (spectral stationarity) */
     491        3100 :     ps_sta = 0;
     492      210800 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     493             :     {
     494      207700 :         mx = PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] ? PS_norm[i] : hSpMusClas->past_PS[i - LOWEST_FBIN];
     495      207700 :         ps_sta += mx / ( dPS[i] + 1e-5f );
     496             :     }
     497             : 
     498        3100 :     *pFV++ = logf( ps_sta + 1e-5f );
     499        3100 :     mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN );
     500             : 
     501             :     /*------------------------------------------------------------------*
     502             :      * Scaling of the feature vector
     503             :      *------------------------------------------------------------------*/
     504             : 
     505        3100 :     pFV = FV;
     506        3100 :     if ( st->bwidth == NB )
     507             :     {
     508           0 :         pSF = SF_8k;
     509             :     }
     510             :     else
     511             :     {
     512        3100 :         pSF = SF;
     513             :     }
     514             : 
     515       40300 :     for ( i = 0; i < N_FEATURES; i++, pFV++, pSF += 2 )
     516             :     {
     517       37200 :         *pFV = pSF[0] * *pFV + pSF[1];
     518             :     }
     519             : 
     520             :     /* store some scaled parameters for later correction of the 1st stage speech/music classification */
     521        3100 :     *voi_fv = FV[1];
     522        3100 :     *cor_map_sum_fv = FV[7];
     523        3100 :     *LPCErr = FV[9];
     524             : 
     525             :     /*------------------------------------------------------------------*
     526             :      * Calculation of posterior probability
     527             :      * Log-probability
     528             :      *------------------------------------------------------------------*/
     529             : 
     530        3100 :     pys = pym = pyn = 1e-5f;
     531             : 
     532             :     /* run loop for all mixtures (for each mixture, calculate the probability of speech or noise and the probability of music) */
     533       21700 :     for ( k = 0; k < N_MIXTURES; k++ )
     534             :     {
     535             :         /* active frames - calculate the probability of speech */
     536      241800 :         for ( p = 0; p < N_FEATURES; p++ )
     537             :         {
     538      223200 :             xm[p] = FV[p] - m_speech[k * N_FEATURES + p];
     539             :         }
     540             : 
     541       18600 :         py = lvm_speech[k] + dot_product_mat( xm, &invV_speech[k * N_FEATURES * N_FEATURES], N_FEATURES );
     542       18600 :         pys += expf( py );
     543             :         /* inactive frames - calculate the probability of noise */
     544      241800 :         for ( p = 0; p < N_FEATURES; p++ )
     545             :         {
     546      223200 :             xm[p] = FV[p] - m_noise[k * N_FEATURES + p];
     547             :         }
     548             : 
     549       18600 :         py = lvm_noise[k] + dot_product_mat( xm, &invV_noise[k * N_FEATURES * N_FEATURES], N_FEATURES );
     550       18600 :         pyn += expf( py );
     551             : 
     552             :         /* either active or inactive frames - calculate the probability of music */
     553      241800 :         for ( p = 0; p < N_FEATURES; p++ )
     554             :         {
     555      223200 :             xm[p] = FV[p] - m_music[k * N_FEATURES + p];
     556             :         }
     557             : 
     558       18600 :         py = lvm_music[k] + dot_product_mat( xm, &invV_music[k * N_FEATURES * N_FEATURES], N_FEATURES );
     559       18600 :         pym += expf( py );
     560             :     }
     561             : 
     562             :     /* calculate log-probability */
     563        3100 :     lps = logf( pys ) - 0.5f * N_FEATURES * logf( PI2 );
     564        3100 :     lpm = logf( pym ) - 0.5f * N_FEATURES * logf( PI2 );
     565        3100 :     lpn = logf( pyn ) - 0.5f * N_FEATURES * logf( PI2 );
     566             : 
     567        3100 :     *high_lpn_flag = 0;
     568        3100 :     if ( lpn > lps && lpn > lpm )
     569             :     {
     570          43 :         *high_lpn_flag = 1;
     571             :     }
     572             : 
     573        3100 :     if ( !vad )
     574             :     {
     575             :         /* artificially increase log-probability of noise */
     576          84 :         lps = lpn * 1.2f;
     577             :     }
     578             : 
     579        3100 :     hSpMusClas->lpm = lpm;
     580        3100 :     hSpMusClas->lps = lps;
     581             : 
     582             :     /* determine HQ Generic speech class */
     583        3100 :     if ( st->hHQ_core != NULL )
     584             :     {
     585        3100 :         if ( lps > lpm + 0.5f )
     586             :         {
     587        1414 :             st->hHQ_core->hq_generic_speech_class = 1;
     588             :         }
     589             :         else
     590             :         {
     591        1686 :             st->hHQ_core->hq_generic_speech_class = 0;
     592             :         }
     593             :     }
     594             : 
     595             :     /*------------------------------------------------------------------*
     596             :      * State machine (sp_mus_state < 0 .. inactive, > 0 .. entry, = 0 .. active )
     597             :      *------------------------------------------------------------------*/
     598             : 
     599        3100 :     if ( vad )
     600             :     {
     601        3016 :         if ( relE < -20 || ( lps <= -5 && lpm <= -5 ) )
     602             :         {
     603         397 :             if ( hSpMusClas->sp_mus_state > 0 )
     604             :             {
     605          71 :                 if ( hSpMusClas->sp_mus_state < HANG_LEN )
     606             :                 {
     607             :                     /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
     608           9 :                     hSpMusClas->inact_cnt = 0;
     609             :                 }
     610             : 
     611             :                 /* energy is too low -> we are going to instable state */
     612          71 :                 hSpMusClas->sp_mus_state = 0;
     613             :             }
     614         326 :             else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
     615             :             {
     616             :                 /* energy is still too low -> we are still in instable state */
     617         154 :                 hSpMusClas->sp_mus_state--;
     618             :             }
     619             :         }
     620        2619 :         else if ( hSpMusClas->sp_mus_state <= 0 )
     621             :         {
     622          71 :             if ( hSpMusClas->inact_cnt == 0 )
     623             :             {
     624             : 
     625          24 :                 hSpMusClas->sp_mus_state = 1;
     626             :             }
     627             :             else
     628             :             {
     629             : 
     630          47 :                 hSpMusClas->sp_mus_state = HANG_LEN;
     631             :             }
     632             : 
     633          71 :             hSpMusClas->inact_cnt = 12;
     634             :         }
     635        2548 :         else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     636             :         {
     637             :             /* we are inside an entry period -> increment the counter of entry frames */
     638         129 :             hSpMusClas->sp_mus_state++;
     639             :         }
     640             : 
     641        3016 :         if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
     642             :         {
     643         196 :             hSpMusClas->inact_cnt--;
     644             :         }
     645             :     }
     646             :     else
     647             :     {
     648          84 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     649             :         {
     650           0 :             hSpMusClas->inact_cnt = 0;
     651             :         }
     652          84 :         else if ( hSpMusClas->inact_cnt > 0 )
     653             :         {
     654          26 :             hSpMusClas->inact_cnt--;
     655             :         }
     656             : 
     657          84 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     658             :         {
     659           0 :             hSpMusClas->sp_mus_state = -HANG_LEN;
     660             :         }
     661          84 :         else if ( hSpMusClas->sp_mus_state > 0 )
     662             :         {
     663           0 :             hSpMusClas->sp_mus_state = -1;
     664             :         }
     665          84 :         else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
     666             :         {
     667             :             /* we are in inactive state */
     668          45 :             hSpMusClas->sp_mus_state--;
     669             :         }
     670             :     }
     671             : 
     672             :     /*------------------------------------------------------------------*
     673             :      * Decision without hangover
     674             :      * Weighted decision
     675             :      *------------------------------------------------------------------*/
     676             : 
     677             :     /* decision without hangover (0 - speech/noise, 1 - music) */
     678        3100 :     dec = lpm > lps;
     679        3100 :     dlp = lpm - lps;
     680             : 
     681        3100 :     if ( !vad )
     682             :     {
     683          84 :         dec = 0;
     684          84 :         dlp = 0;
     685             :     }
     686             : 
     687             :     /* calculate weight based on relE (close to 0.01 in low-E regions, close to 1 in high-E regions) */
     688        3100 :     wrelE = 1.0f + relE / 15;
     689             : 
     690        3100 :     if ( wrelE > 1.0f )
     691             :     {
     692        1164 :         wrelE = 1.0f;
     693             :     }
     694        1936 :     else if ( wrelE < 0.01f )
     695             :     {
     696         665 :         wrelE = 0.01f;
     697             :     }
     698             : 
     699             :     /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
     700        3100 :     if ( dlp < 0 && dlp < hSpMusClas->past_dlp[0] )
     701             :     {
     702         881 :         if ( hSpMusClas->past_dlp[0] > 0 )
     703             :         {
     704         280 :             hSpMusClas->wdrop = -dlp;
     705             :         }
     706             :         else
     707             :         {
     708         601 :             hSpMusClas->wdrop += hSpMusClas->past_dlp[0] - dlp;
     709             :         }
     710             :     }
     711             :     else
     712             :     {
     713        2219 :         hSpMusClas->wdrop = 0;
     714             :     }
     715             : 
     716        3100 :     wdrop = hSpMusClas->wdrop / 20;
     717             : 
     718        3100 :     if ( wdrop > 1.0f )
     719             :     {
     720           0 :         wdrop = 1.0f;
     721             :     }
     722        3100 :     else if ( wdrop < 0.1f )
     723             :     {
     724        2627 :         wdrop = 0.1f;
     725             :     }
     726             : 
     727             :     /* combine weights into one */
     728        3100 :     wght = wrelE * wdrop;
     729        3100 :     if ( wght < 0.01f )
     730             :     {
     731         727 :         wght = 0.01f;
     732             :     }
     733             : 
     734             :     /* calculate weighted decision */
     735        3100 :     hSpMusClas->wdlp_0_95_sp = wght * dlp + ( 1 - wght ) * hSpMusClas->wdlp_0_95_sp;
     736             : 
     737        3100 :     if ( hSpMusClas->sp_mus_state == -HANG_LEN )
     738             :     {
     739         230 :         hSpMusClas->wdlp_0_95_sp = 0;
     740             :     }
     741             : 
     742             :     /*------------------------------------------------------------------*
     743             :      * Final speech/music decision
     744             :      *------------------------------------------------------------------*/
     745             : 
     746        3100 :     if ( !vad && hSpMusClas->sp_mus_state == -HANG_LEN )
     747             :     {
     748             :         /* inactive state */
     749          43 :         dec = 0;
     750             :     }
     751        3057 :     else if ( hSpMusClas->sp_mus_state <= 0 )
     752             :     {
     753             :         /* transition from active to inactive state or instable state */
     754         438 :         dec = hSpMusClas->past_dec[0];
     755             :     }
     756        2619 :     else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     757             :     {
     758             :         /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
     759         138 :         ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp;
     760         138 :         ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 );
     761         138 :         dec = ftmp > 2.0f;
     762             :     }
     763             :     else
     764             :     {
     765             :         /* stable active state */
     766        2481 :         if ( hSpMusClas->wdlp_0_95_sp > 0 && hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 )
     767             :         {
     768             :             /* switching from speech to music */
     769          17 :             dec = 1;
     770             :         }
     771        2464 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < 0 )
     772             :         {
     773             :             /* switching from music to speech */
     774          17 :             dec = 0;
     775             :         }
     776             :         else
     777             :         {
     778        2447 :             dec = hSpMusClas->past_dec[0];
     779             :         }
     780             :     }
     781             : 
     782             :     /*------------------------------------------------------------------*
     783             :      * Updates
     784             :      *------------------------------------------------------------------*/
     785             : 
     786             :     /* update buffer of past non-binary decisions */
     787        3100 :     mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 );
     788        3100 :     hSpMusClas->past_dlp[0] = dlp;
     789             : 
     790             :     /* update buffer of past binary decisions */
     791        3100 :     mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
     792        3100 :     hSpMusClas->past_dec[0] = dec;
     793             : 
     794        3100 :     return dec;
     795             : }
     796             : 
     797             : 
     798             : /*---------------------------------------------------------------------*
     799             :  * sp_mus_classif_2nd()
     800             :  *
     801             :  * 2nd stage speech/music classifier (convert music to speech for onsets)
     802             :  *---------------------------------------------------------------------*/
     803             : 
     804        2050 : static void sp_mus_classif_2nd(
     805             :     Encoder_State *st,    /* i/o: encoder state structure     */
     806             :     const float Etot,     /* i  : total frame energy          */
     807             :     int16_t *attack_flag, /* i/o: attack flag (GSC or TC)     */
     808             :     const float *inp      /* i  : input signal                */
     809             : )
     810             : {
     811             :     int16_t attack;
     812        2050 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
     813             : 
     814             :     /* initialization */
     815        2050 :     *attack_flag = 0;
     816             : 
     817             :     /* signal stability estimation */
     818        2050 :     stab_est( Etot, hSpMusClas->gsc_lt_diff_etot, &hSpMusClas->gsc_mem_etot, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
     819             : 
     820             :     /* calculate variance of correlation */
     821        2050 :     var_cor_calc( st->old_corr, &hSpMusClas->mold_corr, hSpMusClas->var_cor_t, &hSpMusClas->high_stable_cor );
     822             : 
     823             :     /* attack detection */
     824        2050 :     attack = attack_det( inp, st->clas, st->localVAD, st->coder_type, st->total_brate, EVS_MONO, st->clas, hSpMusClas->finc_prev, &hSpMusClas->lt_finc, &hSpMusClas->last_strong_attack );
     825             : 
     826             :     /* change decision from music to speech in certain special cases */
     827        2050 :     if ( st->sp_aud_decision1 == 1 )
     828             :     {
     829         677 :         if ( hSpMusClas->ener_RAT < 0.18f && hSpMusClas->lt_dec_thres > 15.0f )
     830             :         {
     831             :             /* strong music decision but almost no content below 1kHz */
     832           0 :             st->sp_aud_decision2 = 0;
     833             :         }
     834         677 :         else if ( hSpMusClas->high_stable_cor && st->pitch[0] >= 130 )
     835             :         {
     836             :             /* prevent GSC in highly correlated signal with low energy variation */
     837             :             /* this is basically a patch against bassoon-type of music */
     838           0 :             st->sp_aud_decision2 = 0;
     839             : 
     840           0 :             if ( st->codec_mode == MODE1 && st->coder_type == TRANSITION )
     841             :             {
     842           0 :                 st->coder_type = GENERIC;
     843             :             }
     844             :         }
     845         677 :         else if ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] > 4.5f && ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] - hSpMusClas->gsc_lt_diff_etot[MAX_LT - 2] > 10.0f ) )
     846             :         {
     847          21 :             if ( st->tc_cnt == 1 )
     848             :             {
     849             :                 /* do TC coding instead of GC/VC if onset has been already declared before */
     850           0 :                 st->sp_aud_decision2 = 0;
     851             : 
     852           0 :                 if ( st->codec_mode == MODE1 )
     853             :                 {
     854           0 :                     st->coder_type = TRANSITION;
     855             :                 }
     856             :             }
     857             :             else
     858             :             {
     859          21 :                 if ( attack >= ATT_3LSUB_POS )
     860             :                 {
     861             :                     /* do TC coding if attack is located in the last subframe */
     862           6 :                     st->sp_aud_decision2 = 0;
     863           6 :                     *attack_flag = attack + 1;
     864             : 
     865           6 :                     if ( st->codec_mode == MODE1 )
     866             :                     {
     867           6 :                         st->coder_type = TRANSITION;
     868             :                     }
     869             :                 }
     870          15 :                 else if ( attack >= ATT_SEG_LEN / 2 )
     871             :                 {
     872             :                     /* do GSC coding if attack is located after the first quarter of the first subframe */
     873             :                     /* (pre-echo will be treated at the decoder side) */
     874           0 :                     st->sp_aud_decision2 = 1;
     875           0 :                     *attack_flag = 31;
     876             :                 }
     877             :             }
     878             :         }
     879             :     }
     880        1373 :     else if ( st->localVAD == 1 && st->coder_type == GENERIC && ( ( attack >= ATT_3LSUB_POS && st->total_brate < ACELP_24k40 ) || ( attack >= ATT_3LSUB_POS_16k && st->total_brate >= ACELP_24k40 && st->total_brate < ACELP_48k ) ) )
     881             :     {
     882             :         /* do TC coding if attack is located in the last subframe */
     883          19 :         *attack_flag = attack + 1;
     884             : 
     885          19 :         if ( st->codec_mode == MODE1 )
     886             :         {
     887          19 :             st->coder_type = TRANSITION;
     888             :         }
     889             :     }
     890             : 
     891        2050 :     return;
     892             : }
     893             : 
     894             : 
     895             : /*---------------------------------------------------------------------*
     896             :  * tonal_det()
     897             :  *
     898             :  * Tonal detector based on spectral stability and harmonicity
     899             :  *---------------------------------------------------------------------*/
     900             : 
     901      414383 : static float tonal_det(
     902             :     const float S[],
     903             :     int16_t vad_flag,
     904             :     float tod_S_map_lt[],
     905             :     float *tod_thr_lt,
     906             :     float *tod_weight,
     907             :     float *tod_S_mass_prev,
     908             :     float *tod_S_mass_lt )
     909             : {
     910             :     int16_t i;
     911             :     float S_mass, alpha;
     912             : 
     913             :     /* update the adaptive weight */
     914      414383 :     *tod_weight = TON_ALPHA * *tod_weight + ( 1 - TON_ALPHA ) * vad_flag;
     915      414383 :     if ( *tod_weight > TON_ALPHA )
     916             :     {
     917      268725 :         *tod_weight = TON_ALPHA;
     918             :     }
     919      145658 :     else if ( *tod_weight < ( 1 - TON_ALPHA ) )
     920             :     {
     921       29279 :         *tod_weight = 1 - TON_ALPHA;
     922             :     }
     923             : 
     924             :     /* calculate LT spectral correlation in each band up to 4KHz */
     925      414383 :     S_mass = 0.0f;
     926    33565023 :     for ( i = 0; i < TOD_NSPEC; i++ )
     927             :     {
     928    33150640 :         tod_S_map_lt[i] = *tod_weight * tod_S_map_lt[i] + ( 1 - *tod_weight ) * S[i];
     929             : 
     930    33150640 :         S_mass += tod_S_map_lt[i];
     931             :     }
     932      414383 :     S_mass /= TOD_NSPEC;
     933             : 
     934      414383 :     if ( S_mass > *tod_S_mass_prev )
     935             :     {
     936      201357 :         alpha = 0.7f;
     937             :     }
     938             :     else
     939             :     {
     940      213026 :         alpha = 0.3f;
     941             :     }
     942      414383 :     *tod_S_mass_prev = S_mass;
     943      414383 :     *tod_S_mass_lt = alpha * *tod_S_mass_lt + ( 1 - alpha ) * S_mass;
     944      414383 :     S_mass = *tod_S_mass_lt;
     945             : 
     946             :     /* updating adaptive decision threshold */
     947      414383 :     if ( S_mass > *tod_thr_lt )
     948             :     {
     949        3580 :         *tod_thr_lt -= THR_MASS_STEP_DN;
     950             :     }
     951             :     else
     952             :     {
     953      410803 :         *tod_thr_lt += THR_MASS_STEP_UP;
     954             :     }
     955             : 
     956      414383 :     if ( *tod_thr_lt > THR_MASS_MAX )
     957             :     {
     958      409801 :         *tod_thr_lt = THR_MASS_MAX;
     959             :     }
     960             : 
     961      414383 :     if ( *tod_thr_lt < THR_MASS_MIN )
     962             :     {
     963        3026 :         *tod_thr_lt = THR_MASS_MIN;
     964             :     }
     965             : 
     966      414383 :     return S_mass;
     967             : }
     968             : 
     969             : /*---------------------------------------------------------------------*
     970             :  * var_cor_calc()
     971             :  *
     972             :  * Calculate variance of correlation
     973             :  *---------------------------------------------------------------------*/
     974             : 
     975      416433 : static void var_cor_calc(
     976             :     const float old_corr,
     977             :     float *mold_corr,
     978             :     float var_cor_t[],
     979             :     int16_t *high_stable_cor )
     980             : {
     981             :     int16_t i;
     982             :     float var_cor;
     983             : 
     984             :     /* update buffer of old correlation values */
     985     4164330 :     for ( i = VAR_COR_LEN - 1; i > 0; i-- )
     986             :     {
     987     3747897 :         var_cor_t[i] = var_cor_t[i - 1];
     988             :     }
     989      416433 :     var_cor_t[i] = old_corr;
     990             : 
     991             :     /* calculate variance of correlation */
     992      416433 :     var_cor = var( var_cor_t, VAR_COR_LEN );
     993             : 
     994             :     /* set flag in case of highly-correlated stable signal */
     995      416433 :     if ( *mold_corr > 0.8f && var_cor < 5e-4f )
     996             :     {
     997        8550 :         *high_stable_cor = 1;
     998             :     }
     999             :     else
    1000             :     {
    1001      407883 :         *high_stable_cor = 0;
    1002             :     }
    1003             : 
    1004             :     /* update average correlation */
    1005      416433 :     *mold_corr = 0.1f * old_corr + 0.9f * *mold_corr;
    1006             : 
    1007      416433 :     return;
    1008             : }
    1009             : 
    1010             : /*---------------------------------------------------------------------*
    1011             :  * attack_det()
    1012             :  *
    1013             :  * Attack detection
    1014             :  *---------------------------------------------------------------------*/
    1015             : 
    1016      416433 : static int16_t attack_det(
    1017             :     const float *inp,           /* i  : input signal                           */
    1018             :     const int16_t last_clas,    /* i  : last signal clas                       */
    1019             :     const int16_t localVAD,     /* i  : local VAD flag                         */
    1020             :     const int16_t coder_type,   /* i  : coder type                             */
    1021             :     const int32_t total_brate,  /* i  : total bitrate                          */
    1022             :     const int16_t element_mode, /* i  : IVAS element mode                      */
    1023             :     const int16_t clas,         /* i  : signal class                           */
    1024             :     float finc_prev[],          /* i/o: previous finc                          */
    1025             :     float *lt_finc,             /* i/o: long-term mean finc                    */
    1026             :     int16_t *last_strong_attack /* i/o: last strong attack flag                */
    1027             : )
    1028             : {
    1029             :     int16_t i, attack;
    1030             :     float etmp, etmp2, finc[ATT_NSEG];
    1031             :     int16_t att_3lsub_pos;
    1032             :     int16_t attack1;
    1033             : 
    1034      416433 :     att_3lsub_pos = ATT_3LSUB_POS;
    1035      416433 :     if ( total_brate >= ACELP_24k40 )
    1036             :     {
    1037        1000 :         att_3lsub_pos = ATT_3LSUB_POS_16k;
    1038             :     }
    1039             : 
    1040             :     /* compute energy per section */
    1041    13742289 :     for ( i = 0; i < ATT_NSEG; i++ )
    1042             :     {
    1043    13325856 :         finc[i] = sum2_f( inp + i * ATT_SEG_LEN, ATT_SEG_LEN );
    1044             :     }
    1045             : 
    1046      416433 :     attack = maximum( finc, ATT_NSEG, &etmp );
    1047      416433 :     attack1 = attack;
    1048             : 
    1049      416433 :     if ( localVAD == 1 && coder_type == GENERIC )
    1050             :     {
    1051             :         /* compute mean energy in the first three subframes */
    1052      209252 :         etmp = mean( finc, att_3lsub_pos );
    1053             : 
    1054             :         /* compute mean energy after the attack */
    1055      209252 :         etmp2 = mean( finc + attack, ATT_NSEG - attack );
    1056             : 
    1057             :         /* and compare them */
    1058      209252 :         if ( etmp * 8 > etmp2 )
    1059             :         {
    1060             :             /* stop, if the attack is not sufficiently strong */
    1061      202823 :             attack = 0;
    1062             :         }
    1063             : 
    1064      209252 :         if ( last_clas == VOICED_CLAS && etmp * 20 > etmp2 )
    1065             :         {
    1066             :             /* stop, if the signal was voiced and the attack is not sufficiently strong */
    1067       49980 :             attack = 0;
    1068             :         }
    1069             : 
    1070             :         /* compare wrt. other sections (reduces miss-classification) */
    1071      209252 :         if ( attack > 0 )
    1072             :         {
    1073        5824 :             etmp2 = finc[attack];
    1074             : 
    1075      119270 :             for ( i = 2; i < att_3lsub_pos - 2; i++ )
    1076             :             {
    1077      113892 :                 if ( finc[i] * 2.0f > etmp2 )
    1078             :                 {
    1079             :                     /* stop, if the attack is not sufficiently strong */
    1080         446 :                     attack = 0;
    1081         446 :                     break;
    1082             :                 }
    1083             :             }
    1084             :         }
    1085             : 
    1086      209252 :         if ( attack == 0 && element_mode > EVS_MONO && ( clas < VOICED_TRANSITION || clas == ONSET ) )
    1087             :         {
    1088      135235 :             mvr2r( finc, finc_prev, attack1 );
    1089             : 
    1090             :             /* compute mean energy before the attack */
    1091      135235 :             etmp = mean( finc_prev, ATT_NSEG );
    1092             : 
    1093      135235 :             etmp2 = finc[attack1];
    1094             : 
    1095      135235 :             if ( ( etmp * 16 < etmp2 ) || ( etmp * 12 < etmp2 && last_clas == UNVOICED_CLAS ) )
    1096             :             {
    1097        5020 :                 attack = attack1;
    1098             :             }
    1099             : 
    1100      135235 :             if ( 20 * *lt_finc > etmp2 || *last_strong_attack )
    1101             :             {
    1102      127360 :                 attack = 0;
    1103             :             }
    1104             :         }
    1105             : 
    1106      209252 :         *last_strong_attack = attack;
    1107             :     }
    1108             : 
    1109             :     /* compare wrt. other sections (reduces miss-classification) */
    1110      207181 :     else if ( attack > 0 )
    1111             :     {
    1112     2551047 :         for ( i = 2; i < att_3lsub_pos - 2; i++ )
    1113             :         {
    1114     2476273 :             if ( i != attack && finc[i] * 1.3f > finc[attack] )
    1115             :             {
    1116             :                 /* stop, if the attack is not sufficiently strong */
    1117      122805 :                 attack = 0;
    1118      122805 :                 break;
    1119             :             }
    1120             :         }
    1121      197579 :         *last_strong_attack = 0;
    1122             :     }
    1123             : 
    1124             :     /* updates */
    1125      416433 :     mvr2r( finc, finc_prev, ATT_NSEG );
    1126      416433 :     *lt_finc = 0.95f * *lt_finc + 0.05f * mean( finc, ATT_NSEG );
    1127             : 
    1128      416433 :     return attack;
    1129             : }
    1130             : 
    1131             : /*---------------------------------------------------------------------*
    1132             :  * ivas_smc_gmm()
    1133             :  *
    1134             :  * 1st stage of the speech/music classification (based on the GMM model)
    1135             :  *---------------------------------------------------------------------*/
    1136             : 
    1137             : /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */
    1138     1150734 : int16_t ivas_smc_gmm(
    1139             :     Encoder_State *st,                    /* i/o: state structure                                     */
    1140             :     STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure                         */
    1141             :     const int16_t localVAD_HE_SAD,        /* i  : HE-SAD flag without hangover                        */
    1142             :     const float Etot,                     /* i  : total frame energy                                  */
    1143             :     const float lsp_new[M],               /* i  : LSPs in current frame                               */
    1144             :     const float cor_map_sum,              /* i  : correlation map sum (from multi-harmonic anal.)     */
    1145             :     const float epsP[M + 1],              /* i  : LP prediciton error                                 */
    1146             :     const float PS[],                     /* i  : energy spectrum                                     */
    1147             :     const float non_sta,                  /* i  : unbound non-stationarity                            */
    1148             :     const float relE,                     /* i  : relative frame energy                               */
    1149             :     int16_t *high_lpn_flag,               /* i/o: sp/mus LPN flag                                     */
    1150             :     const int16_t flag_spitch             /* i  : flag to indicate very short stable pitch            */
    1151             : )
    1152             : {
    1153             :     int16_t i, m, dec;
    1154             :     int16_t flag_odv;
    1155             :     float lps, lpm, lpn;
    1156             :     float ps[N_SMC_MIXTURES], pm[N_SMC_MIXTURES], pn[N_SMC_MIXTURES];
    1157             :     float fvm[N_PCA_COEF], lprob;
    1158             :     float dlp, ftmp, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght;
    1159             :     float wrise;
    1160             :     float dlp_mean2var;
    1161             :     float FV[N_SMC_FEATURES], *pFV, PS_norm[128], dPS[128];
    1162             :     const float *pODV;
    1163             :     float *pFV_st, smc_st_mean_fact;
    1164             :     int16_t relE_attack_flag;
    1165             :     int16_t j, len;
    1166             :     const float *pt_mel_fb;
    1167             :     float melS[NB_MEL_BANDS], mfcc[NB_MEL_BANDS];
    1168             :     int16_t odv_cnt;
    1169             :     int16_t i_out[N_SMC_FEATURES], *p_out;
    1170             : 
    1171             :     /*------------------------------------------------------------------*
    1172             :      * Initialization
    1173             :      *------------------------------------------------------------------*/
    1174             : 
    1175     1150734 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    1176             : 
    1177             :     /*------------------------------------------------------------------*
    1178             :      * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE )
    1179             :      *------------------------------------------------------------------*/
    1180             : 
    1181     1150734 :     if ( localVAD_HE_SAD )
    1182             :     {
    1183      974933 :         if ( relE < -20 )
    1184             :         {
    1185       99681 :             if ( hSpMusClas->sp_mus_state > 0 )
    1186             :             {
    1187       10654 :                 if ( hSpMusClas->sp_mus_state < HANG_LEN )
    1188             :                 {
    1189             :                     /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
    1190        2326 :                     hSpMusClas->inact_cnt = 0;
    1191             :                 }
    1192             : 
    1193             :                 /* energy is too low -> we are going to instable state */
    1194       10654 :                 hSpMusClas->sp_mus_state = 0;
    1195             :             }
    1196       89027 :             else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
    1197             :             {
    1198             :                 /* energy is still too low -> we are still in instable state */
    1199       29447 :                 hSpMusClas->sp_mus_state--;
    1200             :             }
    1201             :         }
    1202      875252 :         else if ( hSpMusClas->sp_mus_state <= 0 )
    1203             :         {
    1204       22078 :             if ( hSpMusClas->inact_cnt == 0 )
    1205             :             {
    1206             : 
    1207       13799 :                 hSpMusClas->sp_mus_state = 1;
    1208             :             }
    1209             :             else
    1210             :             {
    1211             : 
    1212        8279 :                 hSpMusClas->sp_mus_state = HANG_LEN;
    1213             :             }
    1214             : 
    1215       22078 :             hSpMusClas->inact_cnt = 12;
    1216             :         }
    1217      853174 :         else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1218             :         {
    1219             :             /* we are inside an entry period -> increment the counter of entry frames */
    1220       65577 :             hSpMusClas->sp_mus_state++;
    1221             :         }
    1222             : 
    1223      974933 :         if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
    1224             :         {
    1225       30363 :             hSpMusClas->inact_cnt--;
    1226             :         }
    1227             :     }
    1228             :     else
    1229             :     {
    1230      175801 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1231             :         {
    1232        1057 :             hSpMusClas->inact_cnt = 0;
    1233             :         }
    1234      174744 :         else if ( hSpMusClas->inact_cnt > 0 )
    1235             :         {
    1236       23520 :             hSpMusClas->inact_cnt--;
    1237             :         }
    1238             : 
    1239      175801 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1240             :         {
    1241        1057 :             hSpMusClas->sp_mus_state = -HANG_LEN;
    1242             :         }
    1243      174744 :         else if ( hSpMusClas->sp_mus_state > 0 )
    1244             :         {
    1245        3479 :             hSpMusClas->sp_mus_state = -1;
    1246             :         }
    1247      171265 :         else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
    1248             :         {
    1249             :             /* we are in inactive state */
    1250       15599 :             hSpMusClas->sp_mus_state--;
    1251             :         }
    1252             :     }
    1253             : 
    1254             :     /* detect attacks based on relE */
    1255     1150734 :     if ( relE > hSpMusClas->prev_relE )
    1256             :     {
    1257      488181 :         hSpMusClas->relE_attack_sum += relE - hSpMusClas->prev_relE;
    1258             :     }
    1259             :     else
    1260             :     {
    1261      662553 :         hSpMusClas->relE_attack_sum = 0;
    1262             :     }
    1263     1150734 :     hSpMusClas->prev_relE = relE;
    1264             : 
    1265             :     /* update counter from last VAD 0->1 change */
    1266     1150734 :     if ( hSpMusClas->prev_vad == 0 && localVAD_HE_SAD == 1 )
    1267             :     {
    1268       15822 :         hSpMusClas->vad_0_1_cnt = 1;
    1269             :     }
    1270     1134912 :     else if ( localVAD_HE_SAD == 1 && hSpMusClas->vad_0_1_cnt > 0 && hSpMusClas->vad_0_1_cnt < 50 )
    1271             :     {
    1272      248931 :         hSpMusClas->vad_0_1_cnt++;
    1273             :     }
    1274             :     else
    1275             :     {
    1276      885981 :         hSpMusClas->vad_0_1_cnt = 0;
    1277             :     }
    1278     1150734 :     hSpMusClas->prev_vad = localVAD_HE_SAD;
    1279             : 
    1280     1150734 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && hSpMusClas->relE_attack_sum > 5.0f )
    1281             :     {
    1282       23128 :         hSpMusClas->relE_attack_cnt++;
    1283             : 
    1284             :         /* set flag only in the first X frames in a series */
    1285       23128 :         if ( hSpMusClas->relE_attack_cnt > 0 && hSpMusClas->relE_attack_cnt < 3 )
    1286             :         {
    1287       16591 :             relE_attack_flag = 1;
    1288             :         }
    1289             :         else
    1290             :         {
    1291        6537 :             relE_attack_flag = 0;
    1292             :         }
    1293             :     }
    1294             :     else
    1295             :     {
    1296     1127606 :         hSpMusClas->relE_attack_cnt = 0;
    1297     1127606 :         relE_attack_flag = 0;
    1298             :     }
    1299             : 
    1300     1150734 :     hSpMusClas->prev_Etot = Etot;
    1301             : 
    1302             :     /*------------------------------------------------------------------*
    1303             :      * Preparation of the feature vector
    1304             :      *------------------------------------------------------------------*/
    1305             : 
    1306     1150734 :     pFV = FV;
    1307             : 
    1308             :     /* [0] OL pitch */
    1309     1150734 :     if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 )
    1310             :     {
    1311      115893 :         *pFV++ = (float) st->pitch[2];
    1312             :     }
    1313             :     else
    1314             :     {
    1315     1034841 :         *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
    1316             :     }
    1317             : 
    1318             :     /* [1] voicing */
    1319     1150734 :     if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 )
    1320             :     {
    1321      115893 :         *pFV++ = st->voicing[2];
    1322             :     }
    1323             :     else
    1324             :     {
    1325     1034841 :         *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
    1326             :     }
    1327             : 
    1328             :     /* [2,3,4,5,6] LSFs */
    1329     1150734 :     *pFV++ = acosf( lsp_new[2] );
    1330     1150734 :     *pFV++ = acosf( lsp_new[3] );
    1331     1150734 :     *pFV++ = acosf( lsp_new[4] );
    1332     1150734 :     *pFV++ = acosf( lsp_new[5] );
    1333     1150734 :     *pFV++ = acosf( lsp_new[6] );
    1334             : 
    1335             :     /* [7] cor_map_sum */
    1336     1150734 :     *pFV++ = cor_map_sum;
    1337             : 
    1338             :     /* [8] non_sta */
    1339     1150734 :     *pFV++ = non_sta;
    1340             : 
    1341             :     /* [9] epsP */
    1342     1150734 :     *pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f );
    1343             : 
    1344             :     /* [10,11,12] MFCCs */
    1345     1150734 :     set_zero( melS, NB_MEL_BANDS );
    1346     1150734 :     pt_mel_fb = mel_fb;
    1347    47180094 :     for ( i = 0; i < NB_MEL_BANDS; i++ )
    1348             :     {
    1349    46029360 :         j = mel_fb_start[i];
    1350    46029360 :         len = mel_fb_len[i];
    1351    46029360 :         melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f );
    1352    46029360 :         pt_mel_fb += len;
    1353             :     }
    1354             : 
    1355     1150734 :     v_mult_mat( mfcc, melS, dct_mtx, NB_MEL_BANDS, NB_MEL_COEF );
    1356             : 
    1357     1150734 :     *pFV++ = mfcc[2];
    1358     1150734 :     *pFV++ = mfcc[6];
    1359     1150734 :     *pFV++ = mfcc[12];
    1360             : 
    1361             :     /* calculation of differential normalized power spectrum */
    1362     1150734 :     sum_PS = 1e-5f;
    1363    78249912 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1364             :     {
    1365    77099178 :         sum_PS += PS[i];
    1366             :     }
    1367             : 
    1368    78249912 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1369             :     {
    1370    77099178 :         PS_norm[i] = PS[i] / sum_PS;
    1371    77099178 :         dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] );
    1372             :     }
    1373             : 
    1374             :     /* [13] ps_diff (spectral difference) */
    1375     1150734 :     ps_diff = 0;
    1376    78249912 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1377             :     {
    1378    77099178 :         ps_diff += dPS[i];
    1379             :     }
    1380             : 
    1381     1150734 :     *pFV++ = ps_diff;
    1382             : 
    1383             :     /* [14] ps_sta (spectral stationarity) */
    1384     1150734 :     ps_sta = 0;
    1385    78249912 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1386             :     {
    1387    77099178 :         if ( PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] )
    1388             :         {
    1389    36353323 :             ps_sta += PS_norm[i] / ( dPS[i] + 1e-5f );
    1390             :         }
    1391             :         else
    1392             :         {
    1393    40745855 :             ps_sta += hSpMusClas->past_PS[i - LOWEST_FBIN] / ( dPS[i] + 1e-5f );
    1394             :         }
    1395             :     }
    1396             : 
    1397     1150734 :     *pFV++ = logf( ps_sta + 1e-5f );
    1398     1150734 :     mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN );
    1399             : 
    1400             :     /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */
    1401     1150734 :     if ( hStereoClassif != NULL )
    1402             :     {
    1403      782031 :         if ( st->idchan == 0 )
    1404             :         {
    1405      420855 :             hStereoClassif->ps_diff_ch1 = ps_diff;
    1406      420855 :             hStereoClassif->ps_sta_ch1 = logf( ps_sta + 1e-5f );
    1407             :         }
    1408             :         else
    1409             :         {
    1410      361176 :             hStereoClassif->ps_diff_ch2 = ps_diff;
    1411      361176 :             hStereoClassif->ps_sta_ch2 = logf( ps_sta + 1e-5f );
    1412             :         }
    1413             :     }
    1414             : 
    1415             :     /*------------------------------------------------------------------*
    1416             :      * Outlier detection based on feature histograms
    1417             :      *------------------------------------------------------------------*/
    1418             : 
    1419     1150734 :     flag_odv = 0;
    1420     1150734 :     if ( localVAD_HE_SAD )
    1421             :     {
    1422      974933 :         pFV = FV;
    1423      974933 :         pODV = hout_intervals;
    1424      974933 :         p_out = i_out;
    1425      974933 :         odv_cnt = 0;
    1426    15598928 :         for ( i = 0; i < N_SMC_FEATURES; i++ )
    1427             :         {
    1428    14623995 :             if ( *pFV < pODV[0] || *pFV > pODV[1] )
    1429             :             {
    1430        2602 :                 *p_out++ = i;
    1431        2602 :                 odv_cnt++;
    1432             :             }
    1433             : 
    1434    14623995 :             pFV++;
    1435    14623995 :             pODV += 2;
    1436             :         }
    1437             : 
    1438             :         /* set outlier flag */
    1439      974933 :         if ( odv_cnt >= 2 )
    1440             :         {
    1441         587 :             flag_odv = 1;
    1442             : 
    1443             :             /* replace outlying features with values from the previous frame */
    1444        2096 :             for ( i = 0; i < odv_cnt; i++ )
    1445             :             {
    1446        1509 :                 FV[i_out[i]] = hSpMusClas->prev_FV[i_out[i]];
    1447             :             }
    1448             :         }
    1449             :     }
    1450             : 
    1451             :     /*------------------------------------------------------------------*
    1452             :      * Adaptive short-term mean filter on feature vector
    1453             :      *------------------------------------------------------------------*/
    1454             : 
    1455     1150734 :     pFV = FV;
    1456     1150734 :     pFV_st = hSpMusClas->FV_st;
    1457     1150734 :     smc_st_mean_fact = SMC_ST_MEAN_FACT;
    1458    18411744 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1459             :     {
    1460    17261010 :         *pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV );
    1461             : 
    1462    17261010 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) )
    1463             :         {
    1464             :             /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */
    1465             :             /* -> do whatever you want because dlp will be reset to 0 anyway */
    1466      248910 :             pFV++;
    1467      248910 :             pFV_st++;
    1468             :         }
    1469    17012100 :         else if ( hSpMusClas->sp_mus_state == HANG_LEN && ( st->tc_cnt == 1 || st->tc_cnt == 2 ) )
    1470             :         {
    1471             :             /* energy attack in stable state -> use current features intead of the long-term average */
    1472     1360095 :             pFV++;
    1473     1360095 :             pFV_st++;
    1474             :         }
    1475             :         else
    1476             :         {
    1477    15652005 :             *pFV++ = *pFV_st++;
    1478             :         }
    1479             :     }
    1480             : 
    1481             :     /* update */
    1482     1150734 :     mvr2r( FV, hSpMusClas->prev_FV, N_SMC_FEATURES );
    1483             : 
    1484             :     /*------------------------------------------------------------------*
    1485             :      * Non-linear power transformation (boxcox) on certain features
    1486             :      *------------------------------------------------------------------*/
    1487             : 
    1488     1150734 :     pFV = FV;
    1489    18411744 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1490             :     {
    1491    17261010 :         if ( bcox_lmbd[i] != 0 )
    1492             :         {
    1493     3452202 :             *pFV -= bcox_add_cnst[i];
    1494     3452202 :             if ( *pFV < 1 )
    1495             :             {
    1496      112160 :                 *pFV = 1;
    1497             :             }
    1498     3452202 :             *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i];
    1499             :         }
    1500             : 
    1501    17261010 :         pFV++;
    1502             :     }
    1503             : 
    1504             :     /*------------------------------------------------------------------*
    1505             :      * Scaling of the feature vector
    1506             :      * PCA
    1507             :      *------------------------------------------------------------------*/
    1508             : 
    1509     1150734 :     pFV = FV;
    1510    18411744 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1511             :     {
    1512             :         /* Standard scaler - mean and variance normalization */
    1513    17261010 :         *pFV = ( *pFV - sm_means[i] ) / sm_scale[i];
    1514    17261010 :         pFV++;
    1515             : 
    1516             :         /* MinMax sclaer - mean and variance normalization */
    1517             :         /**pFV = *pFV * sm_scale[i] + sm_min[i];*/
    1518             :         /*pFV++;*/
    1519             :     }
    1520             : 
    1521             :     /* PCA */
    1522     1150734 :     v_sub( FV, pca_mean_, FV, N_SMC_FEATURES );
    1523     1150734 :     v_mult_mat( FV, FV, pca_components_, N_SMC_FEATURES, N_PCA_COEF );
    1524             : 
    1525             :     /*------------------------------------------------------------------*
    1526             :      * Calculation of posterior probability
    1527             :      * Log-probability
    1528             :      *------------------------------------------------------------------*/
    1529             : 
    1530             :     /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */
    1531     1150734 :     lps = lpm = lpn = 0;
    1532     8055138 :     for ( m = 0; m < N_SMC_MIXTURES; m++ )
    1533             :     {
    1534     6904404 :         v_sub( FV, &means_speech[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1535     6904404 :         lprob = dot_product_cholesky( fvm, &prec_chol_speech[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1536     6904404 :         ps[m] = logf( weights_speech[m] ) + log_det_chol_speech[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1537             : 
    1538     6904404 :         v_sub( FV, &means_music[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1539     6904404 :         lprob = dot_product_cholesky( fvm, &prec_chol_music[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1540     6904404 :         pm[m] = logf( weights_music[m] ) + log_det_chol_music[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1541             : 
    1542     6904404 :         v_sub( FV, &means_noise[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1543     6904404 :         lprob = dot_product_cholesky( fvm, &prec_chol_noise[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1544     6904404 :         pn[m] = logf( weights_noise[m] ) + log_det_chol_noise[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1545             :     }
    1546             : 
    1547     1150734 :     lps = logsumexp( ps, N_SMC_MIXTURES );
    1548     1150734 :     lpm = logsumexp( pm, N_SMC_MIXTURES );
    1549     1150734 :     lpn = logsumexp( pn, N_SMC_MIXTURES );
    1550             : 
    1551     1150734 :     *high_lpn_flag = 0;
    1552     1150734 :     if ( lpn > lps && lpn > lpm )
    1553             :     {
    1554      141950 :         *high_lpn_flag = 1;
    1555             :     }
    1556             : 
    1557     1150734 :     hSpMusClas->lpm = lpm;
    1558     1150734 :     hSpMusClas->lps = lps;
    1559     1150734 :     hSpMusClas->lpn = lpn;
    1560             : 
    1561             :     /* determine HQ Generic speech class */
    1562     1150734 :     if ( st->hHQ_core != NULL )
    1563             :     {
    1564      421725 :         if ( lps > lpm + 0.5f )
    1565             :         {
    1566      160042 :             st->hHQ_core->hq_generic_speech_class = 1;
    1567             :         }
    1568             :         else
    1569             :         {
    1570      261683 :             st->hHQ_core->hq_generic_speech_class = 0;
    1571             :         }
    1572             :     }
    1573             : 
    1574             :     /*------------------------------------------------------------------*
    1575             :      * Decision without hangover
    1576             :      * Weighted decision
    1577             :      *------------------------------------------------------------------*/
    1578             : 
    1579             :     /* decision without hangover (0 - speech/noise, 1 - music) */
    1580     1150734 :     if ( !localVAD_HE_SAD || Etot < 10 || ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) )
    1581             :     {
    1582      214380 :         dlp = 0;
    1583             :     }
    1584             :     else
    1585             :     {
    1586      936354 :         dlp = lpm - lps + DLP_BIAS;
    1587             : 
    1588      936354 :         if ( dlp > 30.0f )
    1589             :         {
    1590       33541 :             dlp = 30.0f;
    1591             :         }
    1592      902813 :         else if ( dlp < -30.0f )
    1593             :         {
    1594           0 :             dlp = -30.0f;
    1595             :         }
    1596             :     }
    1597             : 
    1598     1150734 :     dec = dlp > 0;
    1599             : 
    1600             :     /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */
    1601     1150734 :     wrelE = lin_interp( relE, 15.0f, 0.9f, -15.0f, 0.99f, 1 );
    1602             : 
    1603             :     /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
    1604     1150734 :     hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp;
    1605     1150734 :     hSpMusClas->lt_dec_thres = hSpMusClas->dlp_mean_ST;
    1606             : 
    1607     1150734 :     if ( dlp < 0 && dlp < hSpMusClas->dlp_mean_ST )
    1608             :     {
    1609      258685 :         if ( hSpMusClas->dlp_mean_ST > 0 )
    1610             :         {
    1611       77455 :             hSpMusClas->wdrop = -dlp;
    1612             :         }
    1613      181230 :         else if ( hSpMusClas->wdrop > 0 )
    1614             :         {
    1615       42931 :             hSpMusClas->wdrop += hSpMusClas->dlp_mean_ST - dlp;
    1616             :         }
    1617             :     }
    1618             :     else
    1619             :     {
    1620      892049 :         hSpMusClas->wdrop = 0;
    1621             :     }
    1622             : 
    1623     1150734 :     wdrop = lin_interp( hSpMusClas->wdrop, 15.0f, 0.7f, 0.0f, 1.0f, 1 );
    1624             : 
    1625             :     /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */
    1626     1150734 :     if ( hSpMusClas->sp_mus_state == HANG_LEN && hSpMusClas->dlp_mean_ST > 0 && hSpMusClas->dlp_mean_ST > hSpMusClas->past_dlp_mean_ST[0] )
    1627             :     {
    1628      251990 :         if ( hSpMusClas->past_dlp_mean_ST[0] < 0 )
    1629             :         {
    1630       14052 :             hSpMusClas->wrise = hSpMusClas->dlp_mean_ST;
    1631             :         }
    1632      237938 :         else if ( hSpMusClas->wrise > 0 )
    1633             :         {
    1634       34759 :             hSpMusClas->wrise += hSpMusClas->dlp_mean_ST - hSpMusClas->past_dlp_mean_ST[0];
    1635             :         }
    1636             :     }
    1637             :     else
    1638             :     {
    1639      898744 :         hSpMusClas->wrise = 0;
    1640             :     }
    1641             : 
    1642     1150734 :     wrise = lin_interp( hSpMusClas->wrise, 5.0f, 0.95f, 0.0f, 1.0f, 1 );
    1643             : 
    1644             :     /* combine weights into one */
    1645     1150734 :     wght = wrelE * wdrop * wrise;
    1646             : 
    1647             :     /* ratio of delta means vs. delta variances */
    1648     1150734 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1649             :     {
    1650       71537 :         hSpMusClas->dlp_mean_LT = dlp;
    1651       71537 :         hSpMusClas->dlp_var_LT = 0;
    1652             :     }
    1653             : 
    1654     1150734 :     hSpMusClas->dlp_mean_LT = 0.9f * hSpMusClas->dlp_mean_LT + 0.1f * dlp;
    1655     1150734 :     ftmp = dlp - hSpMusClas->dlp_mean_LT;
    1656     1150734 :     hSpMusClas->dlp_var_LT = 0.9f * hSpMusClas->dlp_var_LT + 0.1f * ( ftmp * ftmp );
    1657             : 
    1658     1150734 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1659             :     {
    1660       71537 :         dlp_mean2var = 0;
    1661             :     }
    1662             :     else
    1663             :     {
    1664     1079197 :         dlp_mean2var = fabsf( hSpMusClas->dlp_mean_LT ) / ( sqrtf( fabsf( hSpMusClas->dlp_var_LT ) ) + 1.0f );
    1665             :     }
    1666             : 
    1667     1150734 :     if ( dlp_mean2var > 15.0f )
    1668             :     {
    1669             :         /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */
    1670        3225 :         wght *= 0.9f;
    1671             :     }
    1672             : 
    1673     1150734 :     if ( wght > 1.0f )
    1674             :     {
    1675           0 :         wght = 1.0f;
    1676             :     }
    1677     1150734 :     else if ( wght < 0.01f )
    1678             :     {
    1679           0 :         wght = 0.01f;
    1680             :     }
    1681             : 
    1682     1150734 :     if ( Etot < 10 )
    1683             :     {
    1684             :         /* silence */
    1685      135252 :         wght = 0.92f;
    1686             :     }
    1687             : 
    1688             :     /* calculate weighted decision */
    1689     1150734 :     hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp;
    1690             : 
    1691             :     /* xtalk classifier: apply long hysteresis to prevent LRTD on music */
    1692     1150734 :     hSpMusClas->wdlp_xtalk = 0.995f * hSpMusClas->wdlp_xtalk + 0.005f * dlp;
    1693             : 
    1694             :     /*------------------------------------------------------------------*
    1695             :      * Final speech/music decision
    1696             :      *------------------------------------------------------------------*/
    1697             : 
    1698     1150734 :     if ( flag_spitch )
    1699             :     {
    1700       39748 :         hSpMusClas->flag_spitch_cnt = 5;
    1701             :     }
    1702     1110986 :     else if ( hSpMusClas->flag_spitch_cnt > 0 )
    1703             :     {
    1704        5871 :         hSpMusClas->flag_spitch_cnt--;
    1705             :     }
    1706             : 
    1707     1150734 :     if ( Etot < 10 )
    1708             :     {
    1709             :         /* silence */
    1710      135252 :         dec = 0;
    1711             :     }
    1712     1015482 :     else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1713             :     {
    1714             :         /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
    1715       71537 :         ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp;
    1716       71537 :         ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 );
    1717       71537 :         if ( ftmp > 2.0f )
    1718             :         {
    1719       35380 :             if ( dlp > 2.0f )
    1720             :             {
    1721       24507 :                 dec = 2;
    1722             :             }
    1723             :             else
    1724             :             {
    1725       10873 :                 dec = 1;
    1726             :             }
    1727             :         }
    1728             :         else
    1729             :         {
    1730       36157 :             dec = 0;
    1731             :         }
    1732             :     }
    1733             :     else
    1734             :     {
    1735             :         /* stable active state */
    1736      943945 :         if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 &&
    1737      471305 :              ( ( hSpMusClas->flag_spitch_cnt > 0 && hSpMusClas->wdlp_0_95_sp > 3.4f ) || ( hSpMusClas->flag_spitch_cnt == 0 && hSpMusClas->wdlp_0_95_sp > 2.1f ) ) )
    1738             :         {
    1739             :             /* switching from speech to unclear */
    1740        1879 :             dec = 1;
    1741             :         }
    1742      942066 :         else if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->vad_0_1_cnt < 50 && hSpMusClas->relE_attack_sum == 0.0f && hSpMusClas->wdlp_0_95_sp > 1.0f )
    1743             :         {
    1744             :             /* switch from speech to unclear also during slowly rising weak music onsets */
    1745        3431 :             dec = 1;
    1746             :         }
    1747      938635 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp > 2.5f )
    1748             :         {
    1749             :             /* switching from unclear to music */
    1750        4227 :             dec = 2;
    1751             :         }
    1752      934408 :         else if ( hSpMusClas->past_dec[0] == 2 && hSpMusClas->past_dec[1] == 2 && hSpMusClas->past_dec[2] == 2 && hSpMusClas->wdlp_0_95_sp < -1.0f )
    1753             :         {
    1754             :             /* switching from music to unclear */
    1755        2466 :             dec = 1;
    1756             :         }
    1757      931942 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < -2.5f )
    1758             :         {
    1759             :             /* switching from unclear to speech */
    1760        2542 :             dec = 0;
    1761             :         }
    1762             :         else
    1763             :         {
    1764      929400 :             dec = hSpMusClas->past_dec[0];
    1765             :         }
    1766             :     }
    1767             : 
    1768             :     /*------------------------------------------------------------------*
    1769             :      * raw S/M decision based on smoothed GMM score
    1770             :      *------------------------------------------------------------------*/
    1771             : 
    1772     1150734 :     if ( dec == 0 || st->hSpMusClas->wdlp_0_95_sp <= 0 )
    1773             :     {
    1774      696332 :         st->sp_aud_decision0 = 0;
    1775      696332 :         st->sp_aud_decision1 = 0;
    1776             :     }
    1777             :     else
    1778             :     {
    1779      454402 :         st->sp_aud_decision0 = 1;
    1780      454402 :         st->sp_aud_decision1 = 1;
    1781             :     }
    1782             : 
    1783             :     /*------------------------------------------------------------------*
    1784             :      * Updates
    1785             :      *------------------------------------------------------------------*/
    1786             : 
    1787             :     /* update buffer of past non-binary decisions */
    1788     1150734 :     mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 );
    1789     1150734 :     hSpMusClas->past_dlp[0] = dlp;
    1790             : 
    1791     1150734 :     mvr2r( &hSpMusClas->past_dlp_mean_ST[0], &hSpMusClas->past_dlp_mean_ST[1], HANG_LEN - 2 );
    1792     1150734 :     hSpMusClas->past_dlp_mean_ST[0] = hSpMusClas->dlp_mean_ST;
    1793             : 
    1794             :     /* update buffer of past binary decisions */
    1795     1150734 :     mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
    1796     1150734 :     hSpMusClas->past_dec[0] = dec;
    1797             : 
    1798             : #ifdef DEBUG_MODE_INFO
    1799             :     dbgwrite( &st->hSpMusClas->wdlp_0_95_sp, sizeof( float ), 1, 1, "res/wdlp_0_95_sp.x" );
    1800             : #endif
    1801             : 
    1802     1150734 :     return dec;
    1803             : }
    1804             : 
    1805             : /*---------------------------------------------------------------------*
    1806             :  * ivas_smc_mode_selection()
    1807             :  *
    1808             :  * 2nd stage speech/music classifier (select coding mode (ACELP, GSC and TCX) based on S/M classification)
    1809             :  * output (sp_aud_decision1 - sp_aud_decision2 -> coding mode):
    1810             :  * 0 - 0 -> ACELP
    1811             :  * 1 - 0 -> GSC
    1812             :  * 1 - 1 -> TCX
    1813             :  *---------------------------------------------------------------------*/
    1814             : 
    1815      414383 : void ivas_smc_mode_selection(
    1816             :     Encoder_State *st,           /* i/o: encoder state structure                 */
    1817             :     const int32_t element_brate, /* i  : element bitrate                         */
    1818             :     int16_t smc_dec,             /* i  : raw decision of the 1st stage classifier*/
    1819             :     const float relE,            /* i  : relative frame energy                   */
    1820             :     const float Etot,            /* i  : total frame energy                      */
    1821             :     int16_t *attack_flag,        /* i/o: attack flag (GSC or TC)                 */
    1822             :     const float *inp,            /* i  : input signal                            */
    1823             :     const float S_map[],         /* i  : short-term correlation map              */
    1824             :     const int16_t flag_spitch    /* i  : flag to indicate very short stable pitch*/
    1825             : )
    1826             : {
    1827             :     int16_t attack;
    1828             :     float ton;
    1829             :     int16_t i;
    1830             :     float S_p2a, S_max, S_ave;
    1831             :     float thr_sp2a;
    1832             : 
    1833      414383 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    1834             : 
    1835             :     /* initialization */
    1836      414383 :     *attack_flag = 0;
    1837      414383 :     st->sp_aud_decision2 = 0;
    1838             : 
    1839             :     /* signal stability estimation */
    1840      414383 :     stab_est( Etot, hSpMusClas->gsc_lt_diff_etot, &hSpMusClas->gsc_mem_etot, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
    1841             : 
    1842             :     /* calculate variance of correlation */
    1843      414383 :     var_cor_calc( st->old_corr, &hSpMusClas->mold_corr, hSpMusClas->var_cor_t, &hSpMusClas->high_stable_cor );
    1844             : 
    1845             :     /* attack detection */
    1846      414383 :     attack = attack_det( inp, st->clas, st->localVAD, st->coder_type, 0, st->element_mode, st->clas, hSpMusClas->finc_prev, &hSpMusClas->lt_finc, &hSpMusClas->last_strong_attack );
    1847             : 
    1848             :     /* tonal detector */
    1849      414383 :     ton = tonal_det( S_map, st->vad_flag, hSpMusClas->tod_S_map_lt, &hSpMusClas->tod_thr_lt, &hSpMusClas->tod_weight, &hSpMusClas->tod_S_mass_prev, &hSpMusClas->tod_S_mass_lt );
    1850             : 
    1851             : 
    1852             :     /* calculate spectral peak-to-average ratio */
    1853    33565023 :     for ( i = 0; i < TOD_NSPEC; i++ )
    1854             :     {
    1855    33150640 :         st->hSpMusClas->tod_lt_Bin_E[i] = P2A_FACT * st->hSpMusClas->tod_lt_Bin_E[i] + ( 1 - P2A_FACT ) * st->Bin_E[i];
    1856             :     }
    1857             : 
    1858      414383 :     maximum( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC, &S_max );
    1859      414383 :     S_ave = sum_f( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC ) / TOD_NSPEC;
    1860      414383 :     S_p2a = S_max - S_ave;
    1861             : 
    1862      414383 :     if ( element_brate <= IVAS_16k4 )
    1863             :     {
    1864      135734 :         thr_sp2a = THR_P2A_HIGH;
    1865             :     }
    1866             :     else
    1867             :     {
    1868      278649 :         thr_sp2a = THR_P2A;
    1869             :     }
    1870             : 
    1871             :     /* initial 3-way selection of coding modes (ACELP/GSC/TCX) */
    1872      414383 :     if ( relE > -10.0f && ( S_p2a > thr_sp2a || ton > hSpMusClas->tod_thr_lt ) )
    1873             :     {
    1874             :         /* select TCX to encode extremely peaky signals or strongly tonal signals */
    1875       19590 :         st->sp_aud_decision1 = 1;
    1876       19590 :         st->sp_aud_decision2 = 1;
    1877             :     }
    1878      394793 :     else if ( smc_dec == SPEECH )
    1879             :     {
    1880             :         /* select ACELP to encode speech */
    1881      153265 :         st->sp_aud_decision1 = 0;
    1882      153265 :         st->sp_aud_decision2 = 0;
    1883             :     }
    1884      241528 :     else if ( smc_dec == SPEECH_OR_MUSIC )
    1885             :     {
    1886             :         /* select GSC to encode "unclear" segments (classifier's score on the borderline) */
    1887        6438 :         st->sp_aud_decision1 = 1;
    1888        6438 :         st->sp_aud_decision2 = 0;
    1889             :     }
    1890             :     else
    1891             :     {
    1892             :         /* select TCX to encode music */
    1893      235090 :         st->sp_aud_decision1 = 1;
    1894      235090 :         st->sp_aud_decision2 = 1;
    1895             :     }
    1896             : 
    1897             :     /* change decision from GSC to ACELP/TCX in some special cases */
    1898      414383 :     if ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 )
    1899             :     {
    1900        6438 :         if ( hSpMusClas->ener_RAT < 0.18f && hSpMusClas->lt_dec_thres > 15.0f )
    1901             :         {
    1902             :             /* prevent GSC on strong music with almost no content below 1kHz */
    1903           4 :             st->sp_aud_decision2 = 1;
    1904             :         }
    1905        6434 :         else if ( flag_spitch )
    1906             :         {
    1907             :             /* prevent GSC on signals with very short and stable high pitch period */
    1908         120 :             if ( hSpMusClas->wdlp_0_95_sp < 2.5f )
    1909             :             {
    1910             :                 /* select ACELP instead */
    1911         116 :                 st->sp_aud_decision1 = 0;
    1912             :             }
    1913             :             else
    1914             :             {
    1915             :                 /* select TCX instead */
    1916           4 :                 st->sp_aud_decision2 = 1;
    1917             :             }
    1918             :         }
    1919        6314 :         else if ( hSpMusClas->high_stable_cor && st->pitch[0] >= 130 )
    1920             :         {
    1921             :             /* prevent GSC in highly correlated signal with low energy variation */
    1922             :             /* this is basically a patch against bassoon-type of music */
    1923           0 :             st->sp_aud_decision2 = 1;
    1924             :         }
    1925             :     }
    1926             : 
    1927             :     /* change decision from GSC to ACELP TC during attacks/onsets */
    1928      414383 :     if ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 )
    1929             :     {
    1930        6314 :         if ( ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] > 4.5f ) &&
    1931         559 :              ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] - hSpMusClas->gsc_lt_diff_etot[MAX_LT - 2] > 10.0f ) )
    1932             :         {
    1933         123 :             if ( st->tc_cnt == 1 )
    1934             :             {
    1935             :                 /* do ACELP TC coding instead of GC/VC if onset has been already declared before */
    1936          47 :                 st->sp_aud_decision1 = 0;
    1937          47 :                 st->coder_type = TRANSITION;
    1938             :             }
    1939             :             else
    1940             :             {
    1941          76 :                 if ( attack >= ATT_3LSUB_POS )
    1942             :                 {
    1943             :                     /* do ACELP TC coding also if attack is located in the last subframe */
    1944          16 :                     st->sp_aud_decision1 = 0;
    1945          16 :                     *attack_flag = attack + 1;
    1946          16 :                     st->coder_type = TRANSITION;
    1947             :                 }
    1948          60 :                 else if ( attack >= ATT_SEG_LEN / 2 )
    1949             :                 {
    1950             :                     /* do GSC coding if attack is located after the first quarter of the first subframe */
    1951             :                     /* (pre-echo will be treated at the decoder side) */
    1952           1 :                     *attack_flag = 31;
    1953           1 :                     *attack_flag = attack + 1;
    1954             :                 }
    1955             :             }
    1956             :         }
    1957             :     }
    1958             : 
    1959      414383 :     if ( st->localVAD == 1 && st->coder_type == GENERIC && attack > 0 /*&& *attack_flag < 32*/ /*&& st->tc_cnt != 2*/ && !( st->sp_aud_decision2 == 1 && ton > 0.65f ) )
    1960             :     {
    1961             :         /* change ACELP coder_type to TC if attack has been detected */
    1962        6751 :         st->sp_aud_decision1 = 0;
    1963        6751 :         st->sp_aud_decision2 = 0;
    1964             : 
    1965        6751 :         st->coder_type = TRANSITION;
    1966        6751 :         *attack_flag = attack + 1;
    1967             :     }
    1968             : 
    1969             : #ifdef DEBUGGING
    1970             :     if ( st->idchan == 0 && st->coder_type != INACTIVE )
    1971             :     {
    1972             :         if ( st->force == FORCE_GSC && element_brate < IVAS_24k4 )
    1973             :         {
    1974             :             /* enforce GSC */
    1975             :             st->sp_aud_decision1 = 1;
    1976             :             st->sp_aud_decision2 = 0;
    1977             :         }
    1978             :         else if ( st->force == FORCE_SPEECH && ( st->sp_aud_decision1 == 1 || st->sp_aud_decision2 == 1 ) )
    1979             :         {
    1980             :             if ( element_brate < IVAS_24k4 )
    1981             :             {
    1982             :                 /* convert TCX to GSC */
    1983             :                 st->sp_aud_decision1 = 1;
    1984             :                 st->sp_aud_decision2 = 0;
    1985             :             }
    1986             :             else
    1987             :             {
    1988             :                 /* convert TCX to ACELP */
    1989             :                 st->sp_aud_decision1 = 0;
    1990             :                 st->sp_aud_decision2 = 0;
    1991             :             }
    1992             :         }
    1993             :         else if ( st->force == FORCE_MUSIC )
    1994             :         {
    1995             :             /* enforce TCX */
    1996             :             st->sp_aud_decision1 = 1;
    1997             :             st->sp_aud_decision2 = 1;
    1998             :         }
    1999             :     }
    2000             : #endif
    2001             : 
    2002             :     /* set GSC noisy speech flag on unvoiced SWB segments */
    2003      414383 :     st->GSC_noisy_speech = 0;
    2004      414383 :     if ( st->vad_flag == 1 && element_brate <= IVAS_16k4 && st->lp_noise > 30.0f && st->sp_aud_decision1 == 0 && st->bwidth >= SWB && st->coder_type_raw == UNVOICED )
    2005             :     {
    2006        1230 :         st->GSC_noisy_speech = 1;
    2007             :     }
    2008             : 
    2009             :     /* set GSC submode */
    2010      414383 :     if ( st->element_mode > EVS_MONO && ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 ) && st->total_brate > STEREO_GSC_BIT_RATE_ALLOC ) /* below STEREO_GSC_BIT_RATE_ALLOC, fall back on normal GSC */
    2011             :     {
    2012        5237 :         st->GSC_IVAS_mode = 1;
    2013        5237 :         if ( st->hSpMusClas->wdlp_0_95_sp > 0.0f )
    2014             :         {
    2015             :             /* music-like content */
    2016        3121 :             st->GSC_IVAS_mode = 3;
    2017             :         }
    2018        2116 :         else if ( st->tc_cnt > 0 )
    2019             :         {
    2020             :             /* likely presence of an onset, GSC bit allocation will be more focused on LF */
    2021         265 :             st->GSC_IVAS_mode = 2;
    2022             :         }
    2023             : 
    2024        5237 :         if ( st->coder_type_raw == UNVOICED && st->sp_aud_decision0 == 0 /*&& st->GSC_IVAS_mode < 3*/ )
    2025             :         {
    2026         113 :             st->GSC_noisy_speech = 1;
    2027             :         }
    2028             :         else
    2029             :         {
    2030        5124 :             st->GSC_noisy_speech = 0;
    2031             :         }
    2032             :     }
    2033             : 
    2034             :     /* set coder_type to AUDIO when GSC is selected (st->core will be set later in the decision matrix) */
    2035      414383 :     if ( ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 ) || st->GSC_noisy_speech )
    2036             :     {
    2037        7391 :         st->coder_type = AUDIO;
    2038        7391 :         if ( st->hGSCEnc != NULL && st->GSC_noisy_speech == 0 ) /* In case of GSC_noisy_speech, NOISE_LEVEL should remain at NOISE_LEVEL_SP3 */
    2039             :         {
    2040        6048 :             st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
    2041             :         }
    2042             :     }
    2043             : 
    2044      414383 :     return;
    2045             : }
    2046             : 
    2047             : 
    2048             : /*------------------------------------------------------------------------*
    2049             :  * music_mixed_classif_improv()
    2050             :  *
    2051             :  * Improve 1st stage speech/music decision for mixed&music signals
    2052             :  *------------------------------------------------------------------------*/
    2053             : 
    2054        2050 : static void music_mixed_classif_improv(
    2055             :     Encoder_State *st,      /* i/o: Encoder state structure                         */
    2056             :     const float *new_inp,   /* i  : new input signal                                */
    2057             :     const float *epsP,      /* i  : LP prediction error                             */
    2058             :     const float etot,       /* i  : total frame energy                              */
    2059             :     const float old_cor,    /* i  : normalized correlation                          */
    2060             :     const float cor_map_sum /* i  : correlation map sum                             */
    2061             : )
    2062             : {
    2063             :     int16_t i, dec, len, percus_flag;
    2064             :     float p2v_map[128], ftmp, ftmp1, lt_diff, log_max_spl, epsP_tilt, max_spl;
    2065             : 
    2066        2050 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2067             : 
    2068             :     /* find sample with maximum absolute amplitude */
    2069        2050 :     max_spl = 0;
    2070      526850 :     for ( i = 0; i < L_FRAME; i++ )
    2071             :     {
    2072      524800 :         if ( fabs( new_inp[i] ) > max_spl )
    2073             :         {
    2074       18405 :             max_spl = fabsf( new_inp[i] );
    2075             :         }
    2076             :     }
    2077             : 
    2078             :     /* music is considered only appearing in high SNR condition and active signal */
    2079        2050 :     if ( st->vad_flag == 0 || st->lp_speech - st->lp_noise < 25 )
    2080             :     {
    2081           8 :         hSpMusClas->dec_mov = 0.5f;
    2082           8 :         hSpMusClas->dec_mov1 = 0.5f;
    2083             : 
    2084           8 :         if ( st->vad_flag == 0 )
    2085             :         {
    2086           8 :             hSpMusClas->onset_cnt = 0;
    2087             :         }
    2088             : 
    2089           8 :         return;
    2090             :     }
    2091             : 
    2092        2042 :     hSpMusClas->onset_cnt++;
    2093             : 
    2094        2042 :     if ( hSpMusClas->onset_cnt > 9 )
    2095             :     {
    2096        2006 :         hSpMusClas->onset_cnt = 9;
    2097             :     }
    2098             : 
    2099        2042 :     if ( hSpMusClas->onset_cnt == 1 )
    2100             :     {
    2101           4 :         set_f( hSpMusClas->buf_flux, -100, BUF_LEN );
    2102             :     }
    2103             : 
    2104             :     /* spectral analysis */
    2105        2042 :     spec_analysis( st->Bin_E, p2v_map );
    2106             : 
    2107             :     /* percussive music detection */
    2108        2042 :     log_max_spl = 20 * logf( max_spl + 0.0001f );
    2109        2042 :     lt_diff = log_max_spl - hSpMusClas->mov_log_max_spl;
    2110             : 
    2111        8168 :     for ( i = 0; i < 3; i++ )
    2112             :     {
    2113        6126 :         hSpMusClas->buf_etot[i] = hSpMusClas->buf_etot[i + 1];
    2114             :     }
    2115        2042 :     hSpMusClas->buf_etot[i] = etot;
    2116             : 
    2117        2042 :     percus_flag = 0;
    2118        2042 :     if ( hSpMusClas->buf_etot[1] - hSpMusClas->buf_etot[0] > 6 && hSpMusClas->buf_etot[2] < hSpMusClas->buf_etot[1] && hSpMusClas->buf_etot[1] - st->lp_speech > 3 )
    2119             :     {
    2120          15 :         if ( hSpMusClas->buf_etot[1] - hSpMusClas->buf_etot[3] > 3 && hSpMusClas->buf_etot[3] < hSpMusClas->buf_etot[2] && 0.5f * ( 0.5f * ( st->voicing[0] + st->voicing[1] ) + old_cor ) < 0.75f )
    2121             :         {
    2122           4 :             if ( hSpMusClas->dec_mov > 0.8f )
    2123             :             {
    2124           0 :                 percus_flag = 1;
    2125             :             }
    2126           4 :             else if ( old_cor < 0.75f && st->voicing[0] < 0.75f && st->voicing[1] < 0.75f && hSpMusClas->old_lt_diff[0] > 10 )
    2127             :             {
    2128           0 :                 percus_flag = 1;
    2129             :             }
    2130             :         }
    2131             :     }
    2132             : 
    2133             :     /* sound attack detection */
    2134        2042 :     if ( hSpMusClas->buf_etot[3] - hSpMusClas->buf_etot[2] > 6 && hSpMusClas->dec_mov > 0.9f && etot - st->lp_speech > 5 && hSpMusClas->old_lt_diff[0] > 5 )
    2135             :     {
    2136           0 :         hSpMusClas->attack_hangover = 3;
    2137             :     }
    2138             : 
    2139        2042 :     if ( st->voicing[0] > 0.9f && st->voicing[1] > 0.9f )
    2140             :     {
    2141         561 :         if ( log_max_spl > hSpMusClas->mov_log_max_spl )
    2142             :         {
    2143           8 :             hSpMusClas->mov_log_max_spl = 0.75f * hSpMusClas->mov_log_max_spl + ( 1 - 0.75f ) * log_max_spl;
    2144             :         }
    2145             :         else
    2146             :         {
    2147         553 :             hSpMusClas->mov_log_max_spl = 0.995f * hSpMusClas->mov_log_max_spl + ( 1 - 0.995f ) * log_max_spl;
    2148             :         }
    2149             :     }
    2150             : 
    2151        2042 :     hSpMusClas->old_lt_diff[0] = hSpMusClas->old_lt_diff[1];
    2152        2042 :     hSpMusClas->old_lt_diff[1] = lt_diff;
    2153             : 
    2154             :     /* calculate and buffer spectral energy fluctuation */
    2155        2042 :     flux( st->Bin_E, p2v_map, hSpMusClas->old_Bin_E, hSpMusClas->buf_flux, hSpMusClas->attack_hangover, hSpMusClas->dec_mov );
    2156             : 
    2157        2042 :     hSpMusClas->attack_hangover--;
    2158        2042 :     if ( hSpMusClas->attack_hangover < 0 )
    2159             :     {
    2160        2042 :         hSpMusClas->attack_hangover = 0;
    2161             :     }
    2162             : 
    2163             :     /* identify flux buffer status */
    2164        2042 :     len = 0;
    2165      117653 :     for ( i = BUF_LEN - 1; i >= 0 && hSpMusClas->buf_flux[i] >= 0; i-- )
    2166             :     {
    2167      115611 :         len++;
    2168             :     }
    2169             : 
    2170             :     /* reset flux buffer if percussive music is detected */
    2171        2042 :     if ( percus_flag == 1 )
    2172             :     {
    2173           0 :         set_f( &hSpMusClas->buf_flux[BUF_LEN - len], 5, len );
    2174             :     }
    2175             : 
    2176             :     /* calculate and buffer the tilt of residual LP analysis energies */
    2177        2042 :     ftmp = 0.00001f;
    2178        2042 :     ftmp1 = 0;
    2179       32672 :     for ( i = 1; i < 16; i++ )
    2180             :     {
    2181       30630 :         ftmp += epsP[i] * epsP[i];
    2182       30630 :         ftmp1 += epsP[i] * epsP[i + 1];
    2183             :     }
    2184             : 
    2185        2042 :     epsP_tilt = ftmp1 / ftmp;
    2186             : 
    2187      122520 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2188             :     {
    2189      120478 :         hSpMusClas->buf_epsP_tilt[i] = hSpMusClas->buf_epsP_tilt[i + 1];
    2190             :     }
    2191        2042 :     hSpMusClas->buf_epsP_tilt[i] = epsP_tilt;
    2192             : 
    2193             :     /* calculate and buffer highband spectral peakness */
    2194        2042 :     tonal_dist( p2v_map, hSpMusClas->buf_pkh, hSpMusClas->buf_Ntonal, hSpMusClas->buf_Ntonal2, hSpMusClas->buf_Ntonal_lf );
    2195             : 
    2196             :     /* buffer sum of correlation map */
    2197      122520 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2198             :     {
    2199      120478 :         hSpMusClas->buf_cor_map_sum[i] = hSpMusClas->buf_cor_map_sum[i + 1];
    2200             :     }
    2201        2042 :     hSpMusClas->buf_cor_map_sum[i] = cor_map_sum;
    2202             : 
    2203             :     /* buffer voicing metric */
    2204       20420 :     for ( i = 0; i < 9; i++ )
    2205             :     {
    2206       18378 :         hSpMusClas->buf_dlp[i] = hSpMusClas->buf_dlp[i + 1];
    2207             :     }
    2208        2042 :     hSpMusClas->buf_dlp[i] = hSpMusClas->lps - hSpMusClas->lpm;
    2209             : 
    2210             :     /* classification */
    2211        2042 :     dec = mode_decision( st, len, &hSpMusClas->dec_mov, hSpMusClas->buf_flux, hSpMusClas->buf_epsP_tilt, hSpMusClas->buf_pkh, hSpMusClas->buf_cor_map_sum, hSpMusClas->buf_Ntonal, hSpMusClas->buf_Ntonal2, hSpMusClas->buf_Ntonal_lf, hSpMusClas->buf_dlp );
    2212             : 
    2213             :     /* update long term moving average of the classification decisions */
    2214        2042 :     if ( len > 30 )
    2215             :     {
    2216        1922 :         hSpMusClas->dec_mov = 0.97f * hSpMusClas->dec_mov + ( 1 - 0.97f ) * dec;
    2217        1922 :         hSpMusClas->dec_mov1 = 0.97f * hSpMusClas->dec_mov1 + ( 1 - 0.97f ) * dec;
    2218             :     }
    2219             : 
    2220             :     /* update long-term unvoiced counter */
    2221        2042 :     if ( ( st->coder_type_raw == UNVOICED || st->coder_type_raw == INACTIVE ) && etot > 1.5f && hSpMusClas->buf_Ntonal2[59] < 2 )
    2222             :     {
    2223          76 :         hSpMusClas->UV_cnt1 -= 8;
    2224             :     }
    2225             :     else
    2226             :     {
    2227        1966 :         hSpMusClas->UV_cnt1++;
    2228             :     }
    2229             : 
    2230        2042 :     if ( hSpMusClas->UV_cnt1 > 300 )
    2231             :     {
    2232        1358 :         hSpMusClas->UV_cnt1 = 300;
    2233             :     }
    2234         684 :     else if ( hSpMusClas->UV_cnt1 < 0 )
    2235             :     {
    2236           0 :         hSpMusClas->UV_cnt1 = 0;
    2237             :     }
    2238             : 
    2239        2042 :     hSpMusClas->LT_UV_cnt1 = 0.9f * hSpMusClas->LT_UV_cnt1 + 0.1f * hSpMusClas->UV_cnt1;
    2240             : 
    2241             :     /* revert classification decision due to long-term unvoiced counter */
    2242        2042 :     if ( dec == 1 && hSpMusClas->dec_mov1 < 0.2f && hSpMusClas->LT_UV_cnt1 < 200 )
    2243             :     {
    2244           0 :         dec = 0;
    2245             :     }
    2246             : 
    2247             :     /* overwrite 1st stage speech/music decision to music */
    2248        2042 :     if ( dec == 1 )
    2249             :     {
    2250         387 :         st->sp_aud_decision1 = 1;
    2251             :     }
    2252             : 
    2253        2042 :     return;
    2254             : }
    2255             : 
    2256             : 
    2257             : /*---------------------------------------------------------------------*
    2258             :  * spec_analysis()
    2259             :  *
    2260             :  * Spectral analysis for mixed/music classification improvement
    2261             :  *---------------------------------------------------------------------*/
    2262             : 
    2263        2042 : static void spec_analysis(
    2264             :     float *Bin_E,  /* i  : log energy spectrum of the current frame        */
    2265             :     float *p2v_map /* o  : spectral peakiness map                          */
    2266             : )
    2267             : {
    2268             :     int16_t i, k, m;
    2269             :     float peak[L_FFT / 4 + 1];
    2270             :     float valley[L_FFT / 4 + 1];
    2271             :     int16_t peak_idx[L_FFT / 4 + 1];
    2272             :     int16_t valey_idx[L_FFT / 4 + 1];
    2273             :     float p2v[L_FFT / 4 + 1];
    2274             : 
    2275             :     /* find spectral peaks */
    2276        2042 :     k = 0;
    2277      257292 :     for ( i = 1; i < L_FFT / 2 - 2; i++ )
    2278             :     {
    2279      255250 :         if ( Bin_E[i] > Bin_E[i - 1] && Bin_E[i] > Bin_E[i + 1] )
    2280             :         {
    2281       68279 :             peak[k] = Bin_E[i];
    2282       68279 :             peak_idx[k] = i;
    2283       68279 :             k++;
    2284             :         }
    2285             :     }
    2286        2042 :     assert( k + 1 < L_FFT / 4 + 1 );
    2287        2042 :     peak_idx[k] = -1;
    2288        2042 :     peak_idx[k + 1] = -1;
    2289             : 
    2290        2042 :     if ( k == 0 )
    2291             :     {
    2292           0 :         for ( i = 0; i < L_FFT / 2 - 1; i++ )
    2293             :         {
    2294           0 :             p2v_map[i] = 0;
    2295             :         }
    2296             : 
    2297           0 :         return;
    2298             :     }
    2299             : 
    2300             :     /* find spectral valleys */
    2301        2042 :     m = 0;
    2302        2042 :     if ( Bin_E[0] < Bin_E[1] )
    2303             :     {
    2304        1111 :         valley[0] = Bin_E[0];
    2305        1111 :         valey_idx[0] = 0;
    2306        1111 :         m++;
    2307             :     }
    2308             : 
    2309        2042 :     k = L_FFT / 2 - 2;
    2310        3590 :     for ( i = L_FFT / 2 - 3; i >= 0 && Bin_E[i + 1] > Bin_E[i]; i-- )
    2311             :     {
    2312        1548 :         k = i;
    2313             :     }
    2314             : 
    2315      255744 :     for ( i = 1; i < k; i++ )
    2316             :     {
    2317      253702 :         if ( Bin_E[i] < Bin_E[i - 1] && Bin_E[i] < Bin_E[i + 1] )
    2318             :         {
    2319       67168 :             valley[m] = Bin_E[i];
    2320       67168 :             valey_idx[m] = i;
    2321       67168 :             m++;
    2322             :         }
    2323             :     }
    2324             : 
    2325        2042 :     valley[m] = Bin_E[k];
    2326        2042 :     valey_idx[m] = k;
    2327             : 
    2328             :     /* find spectral peak to valley distances */
    2329        2042 :     k = 0;
    2330       70321 :     for ( i = 0; i < m; i++ )
    2331             :     {
    2332       68279 :         if ( peak_idx[k] > valey_idx[i] && peak_idx[k] < valey_idx[i + 1] )
    2333             :         {
    2334       68279 :             p2v[k] = 2 * peak[k] - valley[i] - valley[i + 1];
    2335       68279 :             k++;
    2336             :         }
    2337             :     }
    2338             : 
    2339      261376 :     for ( i = 0; i < L_FFT / 2 - 1; i++ )
    2340             :     {
    2341      259334 :         p2v_map[i] = 0;
    2342             :     }
    2343             : 
    2344       70321 :     for ( i = 0; i < k; i++ )
    2345             :     {
    2346       68279 :         p2v_map[peak_idx[i]] = p2v[i];
    2347             :     }
    2348             : 
    2349        2042 :     return;
    2350             : }
    2351             : 
    2352             : /*---------------------------------------------------------------------*
    2353             :  * flux()
    2354             :  *
    2355             :  * Calculation of spectral flux
    2356             :  *---------------------------------------------------------------------*/
    2357             : 
    2358        2042 : static void flux(
    2359             :     float *Bin_E,            /* i  : log energy spectrum of the current frame        */
    2360             :     float *p2v_map,          /* i  : spectral peakiness map                          */
    2361             :     float *old_Bin_E,        /* i/o: log energy spectrum of the frame 60ms ago       */
    2362             :     float *buf_flux,         /* i/o: buffer storing spectral energy fluctuation      */
    2363             :     int16_t attack_hangover, /* i/o: hangover preventing flux buffering              */
    2364             :     float dec_mov            /* i/o: moving average of classifier decision           */
    2365             : )
    2366             : {
    2367             :     int16_t i;
    2368             :     float *pt1, *pt2, *pt3, *pt4, *pt5, *pt6;
    2369             :     float flux;
    2370             :     int16_t cnt;
    2371             : 
    2372             :     /* calculate flux */
    2373        2042 :     flux = 0;
    2374        2042 :     cnt = 0;
    2375       87806 :     for ( i = 0; i < N_OLD_BIN_E; i++ )
    2376             :     {
    2377       85764 :         if ( p2v_map[i] != 0 )
    2378             :         {
    2379       21213 :             flux += fabsf( Bin_E[i] - old_Bin_E[i] );
    2380       21213 :             cnt++;
    2381             :         }
    2382             :     }
    2383             : 
    2384        2042 :     if ( cnt == 0 )
    2385             :     {
    2386           0 :         flux = 5;
    2387             :     }
    2388             :     else
    2389             :     {
    2390        2042 :         flux = flux / (float) cnt;
    2391             :     }
    2392             : 
    2393        2042 :     if ( flux > 20 && dec_mov > 0.8f )
    2394             :     {
    2395          48 :         flux = 20;
    2396             :     }
    2397             : 
    2398             :     /* update old Bin_E buffer */
    2399        2042 :     pt1 = old_Bin_E;
    2400        2042 :     pt2 = old_Bin_E + N_OLD_BIN_E;
    2401        2042 :     pt3 = Bin_E;
    2402        2042 :     pt4 = old_Bin_E + N_OLD_BIN_E;
    2403        2042 :     pt5 = old_Bin_E + 2 * N_OLD_BIN_E;
    2404        2042 :     pt6 = old_Bin_E + 2 * N_OLD_BIN_E;
    2405             : 
    2406       87806 :     for ( i = 0; i < N_OLD_BIN_E; i++ )
    2407             :     {
    2408       85764 :         *pt1++ = *pt2++;
    2409       85764 :         *pt4++ = *pt5++;
    2410       85764 :         *pt6++ = *pt3++;
    2411             :     }
    2412             : 
    2413             :     /* update flux buffer */
    2414        2042 :     if ( attack_hangover <= 0 )
    2415             :     {
    2416      122520 :         for ( i = 0; i < BUF_LEN - 1; i++ )
    2417             :         {
    2418      120478 :             buf_flux[i] = buf_flux[i + 1];
    2419             :         }
    2420             : 
    2421        2042 :         buf_flux[i] = flux;
    2422             :     }
    2423             : 
    2424        2042 :     return;
    2425             : }
    2426             : 
    2427             : 
    2428             : /*---------------------------------------------------------------------*
    2429             :  * tonal_dist()
    2430             :  *
    2431             :  * Calculation of spectral distance
    2432             :  *---------------------------------------------------------------------*/
    2433             : 
    2434        2042 : static void tonal_dist(
    2435             :     float *p2v_map,      /* i  : spectral peakiness map                          */
    2436             :     float *buf_pkh,      /* i/o: buffer storing highband spectral peakiness      */
    2437             :     float *buf_Ntonal,   /* i/o: buffer storing No.of 1st spectral tone          */
    2438             :     float *buf_Ntonal2,  /* i/o: buffer storing No.of 2nd spectral tone          */
    2439             :     float *buf_Ntonal_lf /* i/o: buffer storing low band spectral tone ratio     */
    2440             : )
    2441             : {
    2442             :     int16_t i;
    2443             :     float pk;
    2444             :     int16_t Ntonal;
    2445             :     int16_t Ntonal2;
    2446             :     int16_t Ntonal_lf;
    2447             : 
    2448             :     /* find number of tonals, number of tonals at low-band,
    2449             :     spectral peakiness at high-band */
    2450        2042 :     pk = 0;
    2451        2042 :     Ntonal = 0;
    2452        2042 :     Ntonal2 = 0;
    2453        2042 :     Ntonal_lf = 0;
    2454      132730 :     for ( i = 0; i < 64; i++ )
    2455             :     {
    2456      130688 :         if ( p2v_map[i] > 55 )
    2457             :         {
    2458        9999 :             Ntonal++;
    2459             :         }
    2460             : 
    2461      130688 :         if ( p2v_map[i] > 80 )
    2462             :         {
    2463        5854 :             Ntonal2++;
    2464        5854 :             Ntonal_lf++;
    2465             :         }
    2466             :     }
    2467             : 
    2468      130688 :     for ( i = 64; i < 127; i++ )
    2469             :     {
    2470      128646 :         if ( p2v_map[i] != 0 )
    2471             :         {
    2472       35136 :             pk += p2v_map[i];
    2473             :         }
    2474             : 
    2475      128646 :         if ( p2v_map[i] > 55 )
    2476             :         {
    2477        4382 :             Ntonal++;
    2478             :         }
    2479             : 
    2480      128646 :         if ( p2v_map[i] > 80 )
    2481             :         {
    2482        1596 :             Ntonal2++;
    2483             :         }
    2484             :     }
    2485             : 
    2486             :     /* update buffers */
    2487      122520 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2488             :     {
    2489      120478 :         buf_pkh[i] = buf_pkh[i + 1];
    2490      120478 :         buf_Ntonal[i] = buf_Ntonal[i + 1];
    2491      120478 :         buf_Ntonal2[i] = buf_Ntonal2[i + 1];
    2492      120478 :         buf_Ntonal_lf[i] = buf_Ntonal_lf[i + 1];
    2493             :     }
    2494             : 
    2495        2042 :     buf_pkh[i] = pk;
    2496        2042 :     buf_Ntonal[i] = (float) Ntonal;
    2497        2042 :     buf_Ntonal2[i] = (float) Ntonal2;
    2498        2042 :     buf_Ntonal_lf[i] = (float) Ntonal_lf;
    2499             : 
    2500        2042 :     return;
    2501             : }
    2502             : 
    2503             : 
    2504             : /*---------------------------------------------------------------------*
    2505             :  * mode_decision()
    2506             :  *
    2507             :  * Decision about internal mode of the mixed/music classifier improvement
    2508             :  *---------------------------------------------------------------------*/
    2509             : 
    2510        2042 : static int16_t mode_decision(
    2511             :     Encoder_State *st,
    2512             :     int16_t len,            /* i  : buffering status                                */
    2513             :     float *dec_mov,         /* i/o: moving average of classifier decision           */
    2514             :     float *buf_flux,        /* i  : buffer storing spectral energy fluctuation      */
    2515             :     float *buf_epsP_tilt,   /* i  : buffer storing LP prediciton error tilt         */
    2516             :     float *buf_pkh,         /* i  : buffer storing highband spectral peakiness      */
    2517             :     float *buf_cor_map_sum, /* i  : buffer storing correlation map sum              */
    2518             :     float *buf_Ntonal,      /* i  : buffer storing No.of 1st spectral tone          */
    2519             :     float *buf_Ntonal2,     /* i  : buffer storing No.of 2nd spectral tone          */
    2520             :     float *buf_Ntonal_lf,   /* i  : buffer storing low band spectral tone ratio     */
    2521             :     float *buf_dlp          /* i  : buffer storing voicing estimate                 */
    2522             : )
    2523             : {
    2524             :     int16_t mode;
    2525             :     int16_t i;
    2526             :     int16_t voiced_cnt;
    2527             :     float M_pkh;
    2528             :     float M_cor_map_sum;
    2529             :     float M_Ntonal;
    2530             :     float M_flux;
    2531             :     float V_epsP_tilt;
    2532             :     float lf_Ntonal_ratio;
    2533             : 
    2534        2042 :     mode = *dec_mov > 0.5f;
    2535             : 
    2536        2042 :     if ( len <= 5 )
    2537             :     {
    2538          20 :         return ( mode );
    2539             :     }
    2540        2022 :     else if ( len < 10 )
    2541             :     {
    2542          16 :         M_pkh = mean( buf_pkh + BUF_LEN - len, len );
    2543          16 :         M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - len, len );
    2544          16 :         M_Ntonal = mean( buf_Ntonal + BUF_LEN - len, len );
    2545          16 :         V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - len, len );
    2546             : 
    2547          16 :         voiced_cnt = 0;
    2548         112 :         for ( i = 9; i > 3; i-- )
    2549             :         {
    2550          96 :             if ( buf_dlp[i] > 0.0f )
    2551             :             {
    2552           4 :                 voiced_cnt++;
    2553             :             }
    2554             :         }
    2555             : 
    2556          16 :         if ( ( M_pkh > 1100 || V_epsP_tilt < 0.00008f || M_cor_map_sum > 100 ) && voiced_cnt < 4 )
    2557             :         {
    2558           1 :             mode = 1;
    2559             :         }
    2560          15 :         else if ( M_Ntonal > 27 && voiced_cnt < 4 )
    2561             :         {
    2562           0 :             mode = 1;
    2563             :         }
    2564             :     }
    2565             :     else
    2566             :     {
    2567        2006 :         voiced_cnt = 0;
    2568       22066 :         for ( i = 0; i < 10; i++ )
    2569             :         {
    2570       20060 :             if ( buf_dlp[i] > 0.0f )
    2571             :             {
    2572       10018 :                 voiced_cnt++;
    2573             :             }
    2574             :         }
    2575             : 
    2576        2006 :         M_flux = mean( &buf_flux[BUF_LEN - 10], 10 );
    2577        2006 :         M_pkh = mean( buf_pkh + BUF_LEN - 10, 10 );
    2578        2006 :         M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - 10, 10 );
    2579        2006 :         V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - 10, 10 );
    2580             : 
    2581        2006 :         if ( ( M_flux < 8.5f || ( V_epsP_tilt < 0.001f && M_flux < 12.0f ) || M_pkh > 1050 || M_cor_map_sum > 100 ) && voiced_cnt < 3 && mean( &buf_flux[55], 5 ) < 15 )
    2582             :         {
    2583         240 :             mode = 1;
    2584         240 :             *dec_mov = 1;
    2585         240 :             return ( mode );
    2586             :         }
    2587             : 
    2588        1766 :         if ( M_flux > 16.0f || ( M_flux > 15 && voiced_cnt > 2 ) || mean( &buf_flux[55], 5 ) > 19.0f || ( buf_flux[59] >= 20 && st->hSpMusClas->lps - st->hSpMusClas->lpm > 0 ) )
    2589             :         {
    2590        1545 :             *dec_mov = 0;
    2591        1545 :             mode = 0;
    2592        1545 :             return ( mode );
    2593             :         }
    2594             : 
    2595        4580 :         for ( i = 10; i < len; i++ )
    2596             :         {
    2597        4492 :             M_flux = mean( &buf_flux[BUF_LEN - i], i );
    2598        4492 :             M_pkh = mean( buf_pkh + BUF_LEN - i, i );
    2599        4492 :             M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - i, i );
    2600        4492 :             V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - i, i );
    2601             : 
    2602        4492 :             if ( ( ( M_flux < 12 + 0.05f * ( len - 10 ) && mean( &buf_flux[BUF_LEN - 10], 10 ) < 15 ) || V_epsP_tilt < 0.0001f + 0.000018f * ( len - 10 ) || M_pkh > 1050 - 5.0f * ( len - 10 ) || M_cor_map_sum > 95 - 0.3f * ( len - 10 ) ) && voiced_cnt < 3 )
    2603             :             {
    2604         133 :                 mode = 1;
    2605         133 :                 return ( mode );
    2606             :             }
    2607             :         }
    2608             : 
    2609          88 :         if ( len == BUF_LEN )
    2610             :         {
    2611          87 :             M_Ntonal = mean( buf_Ntonal, BUF_LEN );
    2612          87 :             lf_Ntonal_ratio = sum_f( buf_Ntonal_lf, BUF_LEN ) / ( sum_f( buf_Ntonal2, BUF_LEN ) + 0.0001f );
    2613             : 
    2614          87 :             if ( M_Ntonal > 18 || lf_Ntonal_ratio < 0.2f )
    2615             :             {
    2616           0 :                 mode = 1;
    2617             :             }
    2618          87 :             else if ( M_Ntonal < 1 )
    2619             :             {
    2620           0 :                 mode = 0;
    2621             :             }
    2622             :         }
    2623             :     }
    2624             : 
    2625         104 :     return ( mode );
    2626             : }
    2627             : 
    2628             : 
    2629             : /*----------------------------------------------------------------------------------*
    2630             :  * tonal_context_improv()
    2631             :  *
    2632             :  * Context-based improvement of 1st/2nd stage speech/music decision on stable tonal signals
    2633             :  *----------------------------------------------------------------------------------*/
    2634             : 
    2635        2050 : static void tonal_context_improv(
    2636             :     Encoder_State *st,          /* i/o: encoder state structure                       */
    2637             :     const float PS[],           /* i  : energy spectrum                               */
    2638             :     const float voi_fv,         /* i  : scaled voicing feature                        */
    2639             :     const float cor_map_sum_fv, /* i  : scaled correlation map feature                */
    2640             :     const float LPCErr          /* i  : scaled LP prediction error feature            */
    2641             : )
    2642             : {
    2643             :     int16_t lt_pitch_diff;
    2644             :     float sort_max, sort_avg, sort_val[80];
    2645             :     float tonality, tonality1, tonality2, tonality3, t2, t3, tL, err, cor, dft;
    2646             : 
    2647        2050 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2648             : 
    2649             :     /* reset in case of codec mode switching */
    2650        2050 :     if ( st->last_codec_mode == MODE2 )
    2651             :     {
    2652         275 :         set_f( hSpMusClas->tonality2_buf, 0, HANG_LEN_INIT );
    2653         275 :         set_f( hSpMusClas->tonality3_buf, 0, HANG_LEN_INIT );
    2654         275 :         set_f( hSpMusClas->LPCErr_buf, 0, HANG_LEN_INIT );
    2655         275 :         hSpMusClas->lt_music_hangover = 0;
    2656         275 :         hSpMusClas->lt_music_state = 0;
    2657         275 :         hSpMusClas->lt_speech_state = 0;
    2658         275 :         hSpMusClas->lt_speech_hangover = 0;
    2659             :     }
    2660             : 
    2661             :     /* estimate maximum tonality in bands [0-1 kHz], [1-2kHz] and [2-4kHz] */
    2662        2050 :     mvr2r( PS, sort_val, 80 );
    2663             : 
    2664             :     /* tonality in band 0-1 kHz */
    2665        2050 :     v_sort( sort_val, 0, 19 );
    2666        2050 :     sort_max = sort_val[19];
    2667        2050 :     sort_avg = sum_f( &sort_val[0], 10 );
    2668        2050 :     tonality1 = sort_max / sort_avg;
    2669             : 
    2670             :     /* tonality in band 1-2 kHz */
    2671        2050 :     v_sort( sort_val, 20, 39 );
    2672        2050 :     sort_max = sort_val[39];
    2673        2050 :     sort_avg = sum_f( &sort_val[20], 10 );
    2674        2050 :     tonality2 = sort_max / sort_avg;
    2675             : 
    2676             :     /* tonality in band 2-4 kHz */
    2677        2050 :     v_sort( sort_val, 40, 79 );
    2678        2050 :     sort_max = sort_val[79];
    2679        2050 :     sort_avg = sum_f( &sort_val[40], 20 );
    2680        2050 :     tonality3 = sort_max / sort_avg;
    2681             : 
    2682        2050 :     tonality = max( max( tonality1, tonality2 ), tonality3 );
    2683             : 
    2684        2050 :     if ( st->hVAD->hangover_cnt == 10 && st->vad_flag == 1 )
    2685             :     {
    2686             :         /* long-term voicing parameter */
    2687          10 :         hSpMusClas->lt_voicing = 0.1f * hSpMusClas->lt_voicing + 0.9f * *st->voicing;
    2688             : 
    2689             :         /* long-term correlation value */
    2690          10 :         hSpMusClas->lt_corr = 0.1f * hSpMusClas->lt_corr + 0.9f * st->old_corr;
    2691             : 
    2692             :         /* long-term tonality measure */
    2693          10 :         hSpMusClas->lt_tonality = 0.1f * hSpMusClas->lt_tonality + 0.9f * tonality;
    2694             :     }
    2695             :     else
    2696             :     {
    2697             :         /* long-term voicing parameter */
    2698        2040 :         hSpMusClas->lt_voicing = 0.7f * hSpMusClas->lt_voicing + 0.3f * *st->voicing;
    2699             : 
    2700             :         /* long-term correlation value */
    2701        2040 :         hSpMusClas->lt_corr = 0.7f * hSpMusClas->lt_corr + 0.3f * st->old_corr;
    2702             : 
    2703             :         /* long-term tonality measure */
    2704        2040 :         hSpMusClas->lt_tonality = 0.5f * hSpMusClas->lt_tonality + 0.5f * tonality;
    2705             :     }
    2706             : 
    2707             :     /* pitch difference w.r.t to past 3 frames */
    2708        2050 :     lt_pitch_diff = (int16_t) abs( hSpMusClas->lt_corr_pitch[0] - st->pitch[0] );
    2709        2050 :     lt_pitch_diff += (int16_t) abs( hSpMusClas->lt_corr_pitch[1] - st->pitch[0] );
    2710        2050 :     lt_pitch_diff += (int16_t) abs( hSpMusClas->lt_corr_pitch[2] - st->pitch[0] );
    2711             : 
    2712        2050 :     hSpMusClas->lt_corr_pitch[0] = hSpMusClas->lt_corr_pitch[1];
    2713        2050 :     hSpMusClas->lt_corr_pitch[1] = hSpMusClas->lt_corr_pitch[2];
    2714        2050 :     hSpMusClas->lt_corr_pitch[2] = st->pitch[0];
    2715             : 
    2716        2050 :     hSpMusClas->lt_old_mode[0] = hSpMusClas->lt_old_mode[1];
    2717        2050 :     hSpMusClas->lt_old_mode[1] = hSpMusClas->lt_old_mode[2];
    2718             : 
    2719        2727 :     if ( st->sp_aud_decision1 == 1 &&
    2720        1224 :          ( min( min( tonality1, tonality2 ), tonality3 ) > 50.0f ) &&
    2721          51 :          ( tonality1 + tonality2 > 200.0f && tonality2 + tonality3 > 200.0f && tonality1 + tonality3 > 200.0f ) &&
    2722          33 :          ( hSpMusClas->lt_tonality < 20000.0f ) &&
    2723          33 :          ( ( hSpMusClas->lt_tonality > 1000 && max( hSpMusClas->lt_voicing, *st->voicing ) > 0.99f ) ||
    2724          33 :            ( hSpMusClas->lt_tonality > 1500 && hSpMusClas->lt_corr > 0.99f ) ||
    2725          33 :            ( hSpMusClas->lt_tonality > 3000 && hSpMusClas->lowrate_pitchGain > 0.96f ) ||
    2726          19 :            ( lt_pitch_diff == 0 && hSpMusClas->lowrate_pitchGain > 0.89f ) ) )
    2727             :     {
    2728           0 :         if ( sum_s( hSpMusClas->lt_old_mode, 2 ) < 2 )
    2729             :         {
    2730             :             /* probably speech - change the decision to speech */
    2731           0 :             st->sp_aud_decision1 = 0;
    2732           0 :             st->sp_aud_decision2 = 0;
    2733             : 
    2734           0 :             if ( hSpMusClas->lt_hangover == 0 )
    2735             :             {
    2736           0 :                 hSpMusClas->lt_hangover = 6;
    2737             :             }
    2738             :         }
    2739             :     }
    2740             :     else
    2741             :     {
    2742             :         /* not speech, but still in the hangover period - change the decision to speech */
    2743        2050 :         if ( hSpMusClas->lt_hangover > 0 )
    2744             :         {
    2745           0 :             st->sp_aud_decision1 = 0;
    2746           0 :             st->sp_aud_decision2 = 0;
    2747           0 :             hSpMusClas->lt_hangover--;
    2748             :         }
    2749             :     }
    2750             : 
    2751             :     /* calculate standard deviation of log-tonality */
    2752        2050 :     mvr2r( hSpMusClas->tonality2_buf + 1, hSpMusClas->tonality2_buf, HANG_LEN_INIT - 1 );
    2753        2050 :     hSpMusClas->tonality2_buf[HANG_LEN_INIT - 1] = 0.2f * log10f( tonality2 );
    2754        2050 :     t2 = std_dev( hSpMusClas->tonality2_buf, HANG_LEN_INIT );
    2755             : 
    2756        2050 :     mvr2r( hSpMusClas->tonality3_buf + 1, hSpMusClas->tonality3_buf, HANG_LEN_INIT - 1 );
    2757        2050 :     hSpMusClas->tonality3_buf[HANG_LEN_INIT - 1] = 0.2f * log10f( tonality3 );
    2758        2050 :     t3 = std_dev( hSpMusClas->tonality3_buf, HANG_LEN_INIT );
    2759             : 
    2760        2050 :     tL = 0.2f * log10f( hSpMusClas->lt_tonality );
    2761             : 
    2762             :     /* calculate standard deviation of residual LP energy */
    2763        2050 :     mvr2r( hSpMusClas->LPCErr_buf + 1, hSpMusClas->LPCErr_buf, HANG_LEN_INIT - 1 );
    2764        2050 :     hSpMusClas->LPCErr_buf[HANG_LEN_INIT - 1] = LPCErr;
    2765        2050 :     err = std_dev( hSpMusClas->LPCErr_buf, HANG_LEN_INIT );
    2766             : 
    2767        2050 :     cor = max( voi_fv - cor_map_sum_fv, 0.0f );
    2768        2050 :     dft = 0.2f * fabsf( log10f( tonality2 ) - log10f( tonality3 ) );
    2769             : 
    2770             :     /* state machine for strong music */
    2771        2050 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_music_state == 0 && hSpMusClas->lt_music_hangover == 0 &&
    2772         540 :          t2 < 0.54f && t2 > 0.26f && t3 > 0.22f && tL < 0.54f && tL > 0.26f && err > 0.5f )
    2773             :     {
    2774           7 :         hSpMusClas->lt_music_state = 1;
    2775           7 :         hSpMusClas->lt_music_hangover = 6;
    2776             :     }
    2777        2043 :     else if ( hSpMusClas->lt_music_state == 1 && hSpMusClas->lt_music_hangover == 0 && t2 < 0.34 && t3 < 0.26f && tL < 0.45f )
    2778             :     {
    2779           6 :         hSpMusClas->lt_music_state = 0;
    2780           6 :         hSpMusClas->lt_music_hangover = 6;
    2781             :     }
    2782             : 
    2783        2050 :     if ( hSpMusClas->lt_music_hangover > 0 )
    2784             :     {
    2785          74 :         hSpMusClas->lt_music_hangover--;
    2786             :     }
    2787             : 
    2788             :     /* state machine for strong speech */
    2789        2050 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_speech_state == 0 && hSpMusClas->lt_speech_hangover == 0 &&
    2790         129 :          cor > 0.40f && dft < 0.1f && voi_fv > 2 * cor_map_sum_fv + 0.12f &&
    2791          17 :          t2 < cor && t3 < cor && tL < cor && cor_map_sum_fv < cor && voi_fv > cor && voi_fv > 0.76f )
    2792             :     {
    2793           7 :         hSpMusClas->lt_speech_state = 1;
    2794           7 :         hSpMusClas->lt_speech_hangover = 6;
    2795             :     }
    2796        2043 :     else if ( hSpMusClas->lt_speech_state == 1 && hSpMusClas->lt_speech_hangover == 0 && cor < 0.40f )
    2797             :     {
    2798           6 :         hSpMusClas->lt_speech_state = 0;
    2799           6 :         hSpMusClas->lt_speech_hangover = 6;
    2800             :     }
    2801             : 
    2802        2050 :     if ( hSpMusClas->lt_speech_hangover > 0 )
    2803             :     {
    2804          66 :         hSpMusClas->lt_speech_hangover--;
    2805             :     }
    2806             : 
    2807             :     /* final decision */
    2808        2050 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_speech_state == 1 )
    2809             :     {
    2810             :         /* strong speech - probably error in speech/music classification */
    2811          37 :         st->sp_aud_decision1 = 0;
    2812          37 :         st->sp_aud_decision2 = 0;
    2813             :     }
    2814        2013 :     else if ( st->sp_aud_decision1 == 0 && hSpMusClas->lt_music_state == 1 )
    2815             :     {
    2816             :         /* strong music - probably error in speech/music classification */
    2817           0 :         st->sp_aud_decision1 = 1;
    2818           0 :         st->sp_aud_decision2 = 1;
    2819             :     }
    2820             : 
    2821             :     /* update the buffer of past decisions */
    2822        2050 :     hSpMusClas->lt_old_mode[2] = st->sp_aud_decision1;
    2823             : 
    2824        2050 :     return;
    2825             : }
    2826             : 
    2827             : /*---------------------------------------------------------------------*
    2828             :  * detect_sparseness()
    2829             :  *
    2830             :  *
    2831             :  *---------------------------------------------------------------------*/
    2832             : 
    2833        1042 : static void detect_sparseness(
    2834             :     Encoder_State *st,             /* i/o: encoder state structure                */
    2835             :     const int16_t localVAD_HE_SAD, /* i  : HE-SAD flag without hangover           */
    2836             :     const float voi_fv             /* i  : scaled voicing feature                 */
    2837             : )
    2838             : {
    2839             :     float sum;
    2840             :     float ftmp;
    2841             :     float ftmp1;
    2842             :     float S1[128];
    2843             :     int16_t i, j;
    2844        1042 :     int16_t hb_sp_high_flag = 0;
    2845        1042 :     int16_t lb_sp_high_flag = 0;
    2846             :     float sumh;
    2847             :     float sparse;
    2848             :     float tmp_buf[4];
    2849        1042 :     float Mlpe = 0.0f;
    2850        1042 :     float Mv = 0.0f;
    2851             :     float Msp;
    2852             : 
    2853        1042 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2854             : 
    2855        1042 :     mvr2r( st->Bin_E, S1, 128 );
    2856             : 
    2857        1042 :     sum = 0;
    2858       84402 :     for ( i = 0; i < 80; i++ )
    2859             :     {
    2860       83360 :         if ( S1[i] < 0 )
    2861             :         {
    2862       17654 :             S1[i] = 0;
    2863             :         }
    2864       83360 :         sum += S1[i];
    2865             :     }
    2866             : 
    2867        1042 :     sumh = 0;
    2868       51058 :     for ( i = 80; i < 128; i++ )
    2869             :     {
    2870       50016 :         if ( S1[i] < 0 )
    2871             :         {
    2872       13266 :             S1[i] = 0;
    2873             :         }
    2874       50016 :         sumh += S1[i];
    2875             :     }
    2876             : 
    2877        1042 :     sum += sumh;
    2878             : 
    2879             :     /* order spectral from max to min */
    2880        1042 :     order_spectrum( S1, 128 );
    2881             : 
    2882             :     /* calculate spectral sparseness in the range 0 - 6.4 kHz */
    2883        1042 :     j = 0;
    2884        1042 :     ftmp = 0.0f;
    2885        1042 :     ftmp1 = 0.75f * sum;
    2886       55534 :     for ( i = 0; i < 128; i++ )
    2887             :     {
    2888       55528 :         ftmp += S1[i];
    2889       55528 :         if ( ftmp > ftmp1 )
    2890             :         {
    2891        1036 :             j = i;
    2892        1036 :             break;
    2893             :         }
    2894             :     }
    2895             : 
    2896        8336 :     for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2897             :     {
    2898        7294 :         hSpMusClas->sparse_buf[i] = hSpMusClas->sparse_buf[i + 1];
    2899             :     }
    2900             : 
    2901        1042 :     sparse = (float) j;
    2902        1042 :     hSpMusClas->sparse_buf[i] = sparse;
    2903             : 
    2904        1042 :     if ( st->bwidth == WB )
    2905             :     {
    2906           0 :         Msp = mean( hSpMusClas->sparse_buf, 8 );
    2907             : 
    2908             :         /* find long-term smoothed sparseness */
    2909           0 :         if ( hSpMusClas->last_vad_spa == 0 )
    2910             :         {
    2911           0 :             set_f( &hSpMusClas->sparse_buf[0], sparse, HANG_LEN_INIT - 1 );
    2912           0 :             hSpMusClas->LT_sparse = sparse;
    2913             :         }
    2914             :         else
    2915             :         {
    2916           0 :             set_f( tmp_buf, 0.0f, 4 );
    2917             : 
    2918           0 :             for ( i = 0; i < HANG_LEN_INIT; i++ )
    2919             :             {
    2920           0 :                 for ( j = 0; j < 4; j++ )
    2921             :                 {
    2922           0 :                     if ( hSpMusClas->sparse_buf[i] > tmp_buf[j] )
    2923             :                     {
    2924           0 :                         mvr2r( &tmp_buf[j], &tmp_buf[j + 1], 3 - j );
    2925           0 :                         tmp_buf[j] = hSpMusClas->sparse_buf[i];
    2926           0 :                         break;
    2927             :                     }
    2928             :                 }
    2929             :             }
    2930             : 
    2931           0 :             ftmp = 0.25f * ( HANG_LEN_INIT * Msp - sum_f( tmp_buf, 4 ) ) - hSpMusClas->LT_sparse;
    2932             : 
    2933           0 :             hSpMusClas->LT_sparse = hSpMusClas->LT_sparse + 0.25f * ftmp;
    2934             :         }
    2935             : 
    2936             :         /* find high-band sparseness */
    2937           0 :         mvr2r( st->Bin_E + 80, S1, 48 );
    2938           0 :         order_spectrum( S1, 48 );
    2939             : 
    2940           0 :         for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2941             :         {
    2942           0 :             hSpMusClas->hf_spar_buf[i] = hSpMusClas->hf_spar_buf[i + 1];
    2943             :         }
    2944           0 :         hSpMusClas->hf_spar_buf[i] = sum_f( S1, 5 ) / ( sumh + 0.1f );
    2945           0 :         if ( mean( hSpMusClas->hf_spar_buf, 8 ) > 0.2f )
    2946             :         {
    2947           0 :             hb_sp_high_flag = 1;
    2948             :         }
    2949             : 
    2950             :         /* find low-band sparseness */
    2951           0 :         mvr2r( st->Bin_E, S1, 60 );
    2952           0 :         order_spectrum( S1, 60 );
    2953             : 
    2954           0 :         if ( sum_f( S1, 5 ) / sum_f( S1, 60 ) > 0.18f )
    2955             :         {
    2956           0 :             lb_sp_high_flag = 1;
    2957             :         }
    2958             : 
    2959             :         /* find smoothed linear prediction efficiency */
    2960           0 :         for ( i = 0; i < 7; i++ )
    2961             :         {
    2962           0 :             hSpMusClas->lpe_buf[i] = hSpMusClas->lpe_buf[i + 1];
    2963             :         }
    2964             : 
    2965           0 :         hSpMusClas->lpe_buf[i] = hSpMusClas->past_epsP2;
    2966           0 :         Mlpe = mean( hSpMusClas->lpe_buf, 8 );
    2967             : 
    2968             :         /* find smoothed voicing */
    2969           0 :         for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2970             :         {
    2971           0 :             hSpMusClas->voicing_buf[i] = hSpMusClas->voicing_buf[i + 1];
    2972             :         }
    2973             : 
    2974           0 :         hSpMusClas->voicing_buf[i] = voi_fv;
    2975           0 :         Mv = mean( hSpMusClas->voicing_buf, 8 );
    2976             :     }
    2977             : 
    2978             :     /* avoid using LR-MDCT on sparse spectra */
    2979        1042 :     if ( st->sp_aud_decision1 == 1 )
    2980             :     {
    2981         308 :         if ( st->bwidth == WB )
    2982             :         {
    2983           0 :             ftmp = 90;
    2984             :         }
    2985             :         else
    2986             :         {
    2987         308 :             ftmp = 91;
    2988             :         }
    2989         308 :         if ( sparse > ftmp )
    2990             :         {
    2991           0 :             st->sp_aud_decision1 = 0;
    2992           0 :             st->sp_aud_decision2 = 1;
    2993           0 :             hSpMusClas->gsc_hangover = 1;
    2994             :         }
    2995         308 :         else if ( hSpMusClas->gsc_hangover == 1 )
    2996             :         {
    2997           0 :             if ( sparse > 85 )
    2998             :             {
    2999           0 :                 st->sp_aud_decision1 = 0;
    3000           0 :                 st->sp_aud_decision2 = 1;
    3001             :             }
    3002           0 :             else if ( fabs( sparse - mean( &hSpMusClas->sparse_buf[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) < 7.0f )
    3003             :             {
    3004           0 :                 st->sp_aud_decision1 = 0;
    3005           0 :                 st->sp_aud_decision2 = 1;
    3006             :             }
    3007             :         }
    3008             : 
    3009         308 :         if ( st->bwidth == WB )
    3010             :         {
    3011           0 :             if ( hSpMusClas->LT_sparse > 60 && sparse > 50 && Mlpe < -1.3f && Mv > 0.85f &&
    3012           0 :                  lb_sp_high_flag == 0 && ( ( hb_sp_high_flag == 0 && sumh > 0.15f * sum ) || sumh <= 0.15f * sum ) )
    3013             :             {
    3014           0 :                 st->sp_aud_decision1 = 0;
    3015           0 :                 st->sp_aud_decision2 = 1;
    3016           0 :                 hSpMusClas->gsc_hangover = 1;
    3017             :             }
    3018           0 :             else if ( hSpMusClas->gsc_hangover == 1 && !( st->sp_aud_decision1 == 0 && st->sp_aud_decision2 == 1 ) )
    3019             :             {
    3020           0 :                 if ( fabs( sparse - mean( &hSpMusClas->sparse_buf[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) < 7.0f )
    3021             :                 {
    3022           0 :                     st->sp_aud_decision1 = 0;
    3023           0 :                     st->sp_aud_decision2 = 1;
    3024             :                 }
    3025             :             }
    3026             :         }
    3027             :     }
    3028             : 
    3029             :     /* update the counter of consecutive GSC frames with sparse spectrum */
    3030        1042 :     if ( st->sp_aud_decision1 == 0 && st->sp_aud_decision2 == 1 )
    3031             :     {
    3032           0 :         ( hSpMusClas->gsc_cnt )++;
    3033           0 :         if ( hSpMusClas->gsc_cnt > 7 )
    3034             :         {
    3035           0 :             hSpMusClas->gsc_cnt = 7;
    3036             :         }
    3037             :     }
    3038             :     else
    3039             :     {
    3040        1042 :         hSpMusClas->gsc_cnt = 0;
    3041        1042 :         hSpMusClas->gsc_hangover = 0;
    3042             :     }
    3043             : 
    3044        1042 :     hSpMusClas->last_vad_spa = localVAD_HE_SAD;
    3045             : 
    3046        1042 :     return;
    3047             : }
    3048             : 
    3049             : 
    3050             : /*---------------------------------------------------------------------*
    3051             :  * order_spectrum()
    3052             :  *
    3053             :  *
    3054             :  *---------------------------------------------------------------------*/
    3055             : 
    3056        1042 : static void order_spectrum(
    3057             :     float *vec,
    3058             :     const int16_t len )
    3059             : {
    3060             :     int16_t i, j, imax, imin;
    3061             :     float temp;
    3062             : 
    3063       67730 :     for ( i = 0; i < len / 2; i++ )
    3064             :     {
    3065       66688 :         imax = i;
    3066       66688 :         imin = i;
    3067     4401408 :         for ( j = i; j < len - i; j++ )
    3068             :         {
    3069     4334720 :             if ( vec[j] > vec[imax] )
    3070             :             {
    3071      178842 :                 imax = j;
    3072             :             }
    3073             :             else
    3074             :             {
    3075     4155878 :                 if ( vec[j] < vec[imin] )
    3076             :                 {
    3077      249826 :                     imin = j;
    3078             :                 }
    3079             :             }
    3080             :         }
    3081             : 
    3082       66688 :         temp = vec[i];
    3083       66688 :         vec[i] = vec[imax];
    3084       66688 :         vec[imax] = temp;
    3085             : 
    3086       66688 :         if ( imin == i )
    3087             :         {
    3088       11864 :             imin = imax;
    3089             :         }
    3090             : 
    3091       66688 :         temp = vec[len - i - 1];
    3092       66688 :         vec[len - i - 1] = vec[imin];
    3093       66688 :         vec[imin] = temp;
    3094             :     }
    3095             : 
    3096        1042 :     return;
    3097             : }

Generated by: LCOV version 1.14