LCOV - Coverage on main -- long test vectors @ efe53129c9ed87a5067dd0a8fb9dca41db9c4add

LCOV - code coverage report

Current view:	top level - lib_enc - speech_music_classif.c (source / functions)		Hit	Total	Coverage
Test:	Coverage on main -- long test vectors @ efe53129c9ed87a5067dd0a8fb9dca41db9c4add	Lines:	1134	1151	98.5 %
Date:	2026-02-12 08:06:16	Functions:	17	17	100.0 %

          Line data    Source code

       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2026 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <assert.h>
      38             : #include <stdint.h>
      39             : #include "options.h"
      40             : #ifdef DEBUGGING
      41             : #include "debug.h"
      42             : #endif
      43             : #include <math.h>
      44             : #include "cnst.h"
      45             : #include "prot.h"
      46             : #include "ivas_prot.h"
      47             : #include "rom_enc.h"
      48             : #include "rom_com.h" /* Common static table prototypes         */
      49             : #include "wmc_auto.h"
      50             : 
      51             : 
      52             : /*---------------------------------------------------------------------*
      53             :  * Local constants
      54             :  *---------------------------------------------------------------------*/
      55             : 
      56             : #define ATT_SEG_LEN       ( L_FRAME / ATT_NSEG )
      57             : #define ATT_3LSUB_POS     ( 3 * ATT_NSEG / NB_SUBFR )
      58             : #define ATT_3LSUB_POS_16k ( int16_t )( ( 4.0f * ATT_NSEG / (float) NB_SUBFR16k ) + 0.5f )
      59             : 
      60             : #define THR_CORR_PEAK 0.95f
      61             : #define TON_FACT      0.95f
      62             : #define TON_ALPHA     0.95f
      63             : 
      64             : #define DLP_BIAS 0.138121f
      65             : 
      66             : #define THR_MASS_MAX     0.85f
      67             : #define THR_MASS_MIN     0.75f
      68             : #define THR_MASS_STEP_UP 0.01f
      69             : #define THR_MASS_STEP_DN 0.02f
      70             : 
      71             : 
      72             : /*---------------------------------------------------------------------*
      73             :  * Local function prototypes
      74             :  *---------------------------------------------------------------------*/
      75             : 
      76             : static void spec_analysis( float *Bin_E, float *p2v_map );
      77             : 
      78             : static void flux( float *Bin_E, float *p2v_map, float *old_Bin_E, float *buf_flux, int16_t attack_hangover, float dec_mov );
      79             : 
      80             : static void tonal_dist( float *p2v_map, float *buf_pkh, float *buf_Ntonal, float *buf_Ntonal2, float *buf_Ntonal_lf );
      81             : 
      82             : static int16_t mode_decision( Encoder_State *st, int16_t len, float *dec_mov, float *buf_flux, float *buf_epsP_tilt, float *buf_pkh, float *buf_cor_map_sum, float *buf_Ntonal, float *buf_Ntonal2, float *buf_Ntonal_lf, float *buf_dlp );
      83             : 
      84             : static void var_cor_calc( const float old_corr, float *mold_corr, float var_cor_t[], int16_t *high_stable_cor );
      85             : 
      86             : static int16_t attack_det( const float *inp, const int16_t last_clas, const int16_t localVAD, const int16_t coder_type, const int32_t total_brate, const int16_t element_mode, const int16_t clas, float finc_prev[], float *lt_finc, int16_t *last_strong_attack );
      87             : 
      88             : static float tonal_det( const float S[], int16_t vad_flag, float tod_S_map_lt[], float *tod_thr_lt, float *tod_weight, float *tod_S_mass_prev, float *tod_S_mass_lt );
      89             : 
      90             : static void tonal_context_improv( Encoder_State *st, const float PS[], const float voi_fv, const float cor_map_sum_fv, const float LPCErr );
      91             : 
      92             : static void order_spectrum( float *vec, const int16_t len );
      93             : 
      94             : static void detect_sparseness( Encoder_State *st, const int16_t localVAD_HE_SAD, const float voi_fv );
      95             : 
      96             : static int16_t sp_mus_classif_1st( Encoder_State *st, const int16_t localVAD_HE_SAD, const float lsp_new[M], const float cor_map_sum, const float epsP[M + 1], const float PS[], float non_sta, float relE, float *voi_fv, float *cor_map_sum_fv, float *LPCErr, int16_t *high_lpn_flag );
      97             : 
      98             : static void sp_mus_classif_2nd( Encoder_State *st, const float Etot, int16_t *attack_flag, const float *inp );
      99             : 
     100             : static void music_mixed_classif_improv( Encoder_State *st, const float *new_inp, const float *epsP, const float etot, const float old_cor, const float cor_map_sum );
     101             : 
     102             : 
     103             : /*---------------------------------------------------------------------*
     104             :  * speech_music_clas_init()
     105             :  *
     106             :  * Initialization of speech/music classifier
     107             :  *---------------------------------------------------------------------*/
     108             : 
     109      129201 : void speech_music_clas_init(
     110             :     SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle   */
     111             : )
     112             : {
     113             :     int16_t i;
     114             : 
     115      129201 :     set_f( hSpMusClas->FV_st, 0.0f, N_SMC_FEATURES );
     116             : 
     117      129201 :     hSpMusClas->inact_cnt = 0;
     118      129201 :     set_s( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
     119      129201 :     set_f( hSpMusClas->past_dlp, 0, HANG_LEN - 1 );
     120      129201 :     set_f( hSpMusClas->past_dlp_mean_ST, 0, HANG_LEN - 1 );
     121      129201 :     hSpMusClas->dlp_mean_ST = 0.0f;
     122      129201 :     hSpMusClas->dlp_mean_LT = 0.0f;
     123      129201 :     hSpMusClas->dlp_var_LT = 0.0f;
     124             : 
     125     2067216 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
     126             :     {
     127     1938015 :         hSpMusClas->prev_FV[i] = 0.5f * hout_intervals[2 * i] + 0.5f * hout_intervals[2 * i + 1];
     128             :     }
     129             : 
     130     2067216 :     for ( i = 0; i < NB_BANDS_SPMUS; i++ )
     131             :     {
     132     1938015 :         hSpMusClas->past_log_enr[i] = logf( E_MIN );
     133             :     }
     134             : 
     135      129201 :     hSpMusClas->sp_mus_state = -8;
     136      129201 :     hSpMusClas->wdrop = 0.0f;
     137      129201 :     hSpMusClas->wrise = 0.0f;
     138      129201 :     hSpMusClas->wdlp_0_95_sp = 0.0f;
     139      129201 :     hSpMusClas->wdlp_xtalk = 0.0f;
     140      129201 :     set_f( hSpMusClas->last_lsp, 0.0f, M_LSP_SPMUS );
     141      129201 :     hSpMusClas->last_cor_map_sum = 0.0f;
     142      129201 :     hSpMusClas->last_non_sta = 0.0f;
     143      129201 :     set_f( hSpMusClas->past_PS, 0.0f, HIGHEST_FBIN - LOWEST_FBIN );
     144      129201 :     hSpMusClas->past_ps_diff = 0;
     145      129201 :     hSpMusClas->past_epsP2 = 01;
     146      129201 :     hSpMusClas->past_epsP = 0;
     147      129201 :     hSpMusClas->flag_spitch_cnt = 0;
     148             : 
     149      129201 :     hSpMusClas->gsc_thres[0] = TH_0_MIN;
     150      129201 :     hSpMusClas->gsc_thres[1] = TH_1_MIN;
     151      129201 :     hSpMusClas->gsc_thres[2] = TH_2_MIN;
     152      129201 :     hSpMusClas->gsc_thres[3] = TH_3_MIN;
     153      129201 :     set_f( hSpMusClas->gsc_lt_diff_etot, 0.0f, MAX_LT );
     154      129201 :     hSpMusClas->gsc_mem_etot = 0.0f;
     155      129201 :     hSpMusClas->gsc_last_music_flag = 0;
     156      129201 :     hSpMusClas->gsc_nb_thr_1 = 0;
     157      129201 :     hSpMusClas->gsc_nb_thr_3 = 0;
     158      129201 :     hSpMusClas->mold_corr = 0.9f;
     159      129201 :     hSpMusClas->mean_avr_dyn = 0.5f;
     160      129201 :     hSpMusClas->last_sw_dyn = 10.0f;
     161             : 
     162      129201 :     hSpMusClas->relE_attack_cnt = 0;
     163      129201 :     hSpMusClas->prev_relE = 0.0f;
     164      129201 :     hSpMusClas->prev_Etot = 0.0f;
     165      129201 :     hSpMusClas->prev_vad = 0;
     166      129201 :     hSpMusClas->vad_0_1_cnt = 0;
     167      129201 :     hSpMusClas->relE_attack_sum = 0;
     168             : 
     169             :     /* speech/music classifier improvement */
     170     7881261 :     for ( i = 0; i < BUF_LEN; i++ )
     171             :     {
     172     7752060 :         hSpMusClas->buf_flux[i] = -100;
     173     7752060 :         hSpMusClas->buf_pkh[i] = 0;
     174     7752060 :         hSpMusClas->buf_epsP_tilt[i] = 0;
     175     7752060 :         hSpMusClas->buf_cor_map_sum[i] = 0;
     176     7752060 :         hSpMusClas->buf_Ntonal[i] = 0;
     177     7752060 :         hSpMusClas->buf_Ntonal2[i] = 0;
     178     7752060 :         hSpMusClas->buf_Ntonal_lf[i] = 0;
     179             :     }
     180             : 
     181      129201 :     set_f( hSpMusClas->lpe_buf, 0, HANG_LEN_INIT );
     182      129201 :     set_f( hSpMusClas->voicing_buf, 0, HANG_LEN_INIT );
     183      129201 :     hSpMusClas->gsc_hangover = 0;
     184      129201 :     set_f( hSpMusClas->sparse_buf, 0, HANG_LEN_INIT );
     185      129201 :     set_f( hSpMusClas->hf_spar_buf, 0, HANG_LEN_INIT );
     186      129201 :     hSpMusClas->LT_sparse = 0.0f;
     187      129201 :     hSpMusClas->gsc_cnt = 0;
     188      129201 :     hSpMusClas->last_vad_spa = 0;
     189             : 
     190      129201 :     set_f( hSpMusClas->old_Bin_E, 0.0f, 3 * N_OLD_BIN_E );
     191      129201 :     set_f( hSpMusClas->buf_etot, 0, 4 );
     192      129201 :     set_f( hSpMusClas->buf_dlp, 0, 10 );
     193             : 
     194      129201 :     hSpMusClas->UV_cnt1 = 300;
     195      129201 :     hSpMusClas->LT_UV_cnt1 = 250.0f;
     196      129201 :     hSpMusClas->onset_cnt = 0;
     197      129201 :     hSpMusClas->attack_hangover = 0;
     198      129201 :     hSpMusClas->dec_mov = 0.0f;
     199      129201 :     hSpMusClas->dec_mov1 = 0.0f;
     200      129201 :     hSpMusClas->mov_log_max_spl = 200.0f;
     201      129201 :     hSpMusClas->old_lt_diff[0] = 0.0f;
     202      129201 :     hSpMusClas->old_lt_diff[1] = 0.0f;
     203             : 
     204      129201 :     set_f( hSpMusClas->finc_prev, 0.0f, ATT_NSEG );
     205      129201 :     hSpMusClas->lt_finc = 0.0f;
     206      129201 :     hSpMusClas->last_strong_attack = 0;
     207      129201 :     hSpMusClas->tdm_lt_Etot = 0.01f;
     208      129201 :     set_f( hSpMusClas->tod_lt_Bin_E, 0.0f, TOD_NSPEC );
     209      129201 :     set_f( hSpMusClas->tod_S_map_lt, 0.0f, TOD_NSPEC );
     210      129201 :     hSpMusClas->tod_thr_lt = TOD_THR_MASS;
     211      129201 :     hSpMusClas->tod_weight = 0.0f;
     212      129201 :     hSpMusClas->tod_S_mass_prev = 0.0f;
     213      129201 :     hSpMusClas->tod_S_mass_lt = 0.0f;
     214             : 
     215             :     /* speech/music classification */
     216      129201 :     set_s( hSpMusClas->lt_old_mode, 1, 3 );
     217      129201 :     hSpMusClas->lt_voicing = 0.5f;
     218      129201 :     hSpMusClas->lt_corr = 0.5f;
     219      129201 :     hSpMusClas->lt_tonality = 0;
     220      129201 :     set_s( hSpMusClas->lt_corr_pitch, 0, 3 );
     221      129201 :     hSpMusClas->lt_hangover = 0;
     222      129201 :     hSpMusClas->lowrate_pitchGain = 0;
     223             : 
     224      129201 :     hSpMusClas->lt_music_hangover = 0;
     225      129201 :     set_f( hSpMusClas->tonality2_buf, 0, HANG_LEN_INIT );
     226      129201 :     set_f( hSpMusClas->tonality3_buf, 0, HANG_LEN_INIT );
     227      129201 :     set_f( hSpMusClas->LPCErr_buf, 0, HANG_LEN_INIT );
     228      129201 :     hSpMusClas->lt_music_state = 0;
     229      129201 :     hSpMusClas->lt_speech_state = 0;
     230      129201 :     hSpMusClas->lt_speech_hangover = 0;
     231             : 
     232      129201 :     hSpMusClas->lt_dec_thres = 10.0f;
     233      129201 :     hSpMusClas->ener_RAT = 0.0f;
     234             : 
     235      129201 :     hSpMusClas->high_stable_cor = 0;
     236      129201 :     set_f( hSpMusClas->var_cor_t, 0.0f, VAR_COR_LEN );
     237             : 
     238      129201 :     hSpMusClas->lps = 0.0f;
     239      129201 :     hSpMusClas->lpm = 0.0f;
     240      129201 :     hSpMusClas->lpn = 0.0f;
     241             : 
     242      129201 :     return;
     243             : }
     244             : 
     245             : 
     246             : /*---------------------------------------------------------------------*
     247             :  * speech_music_classif()
     248             :  *
     249             :  * Speech/music classification
     250             :  *
     251             :  * The following technologies are used based on the outcome of the sp/mus classifier
     252             :  * sp_aud_decision1  sp_aud_decision2
     253             :  *       0                 0             use ACELP (+TD BWE)
     254             :  *       1                 0             use ACELP (+FD BWE) or HQ/LR-MDCT depending on bitrate
     255             :  *       1                 1             use GSC (+FD BWE) or HQ/LR-MDCT depending on bitrate
     256             :  *
     257             :  *       0                 1             exceptionally use GSC (+FD BWE) instead of LR-MDCT at 13.2 kbps (WB/SWB) for sparse spectra
     258             :  *---------------------------------------------------------------------*/
     259             : 
     260             : /*! r: 1st stage decision (1-music, 0-speech or noise) */
     261       83858 : void speech_music_classif(
     262             :     Encoder_State *st,             /* i/o: state structure                                 */
     263             :     const float *new_inp,          /* i  : new input signal                                */
     264             :     const float *inp,              /* i  : input signal to locate attach position          */
     265             :     const int16_t localVAD_HE_SAD, /* i  : HE-SAD flag without hangover                    */
     266             :     const float lsp_new[M],        /* i  : LSPs in current frame                           */
     267             :     const float cor_map_sum,       /* i  : correlation map sum (from multi-harmonic anal.) */
     268             :     const float epsP[M + 1],       /* i  : LP prediciton error                             */
     269             :     const float PS[],              /* i  : energy spectrum                                 */
     270             :     const float Etot,              /* i  : total frame energy                              */
     271             :     const float old_cor,           /* i  : max correlation from previous frame             */
     272             :     int16_t *attack_flag,          /* o  : attack flag (GSC or TC)                         */
     273             :     const float non_sta,           /* i  : unbound non-stationarity for sp/mus classifier  */
     274             :     const float relE,              /* i  : relative frame energy                           */
     275             :     int16_t *high_lpn_flag,        /* o  : sp/mus LPN flag                                 */
     276             :     const int16_t flag_spitch      /* i  : flag to indicate very short stable pitch        */
     277             : )
     278             : {
     279             :     float voi_fv, cor_map_sum_fv, LPCErr;
     280             : 
     281             :     /* 1st stage speech/music classification based on the GMM model */
     282       83858 :     st->sp_aud_decision1 = sp_mus_classif_1st( st, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, non_sta, relE, &voi_fv, &cor_map_sum_fv, &LPCErr, high_lpn_flag );
     283             : 
     284       83858 :     if ( st->codec_mode == MODE1 || st->sr_core == INT_FS_12k8 )
     285             :     {
     286             : 
     287             :         /* Improvement of the 1st stage decision for mixed/music content */
     288       55265 :         if ( !st->Opt_SC_VBR && ( st->total_brate != ACELP_24k40 ) )
     289             :         {
     290       52755 :             music_mixed_classif_improv( st, new_inp, epsP, Etot, old_cor, cor_map_sum );
     291             :         }
     292             : 
     293       55265 :         st->sp_aud_decision0 = st->sp_aud_decision1;
     294             : 
     295             :         /* 2nd stage speech/music classification (rewrite music to speech in onsets) */
     296       55265 :         st->sp_aud_decision2 = st->sp_aud_decision1;
     297             : 
     298       55265 :         if ( st->bwidth > NB )
     299             :         {
     300       47715 :             sp_mus_classif_2nd( st, Etot, attack_flag, inp );
     301             : 
     302       47715 :             if ( flag_spitch && st->bwidth == WB && st->total_brate < ACELP_13k20 )
     303             :             {
     304             :                 /* avoid switch to AUDIO/MUSIC class for very short stable high pitch
     305             :                    and/or stable pitch with high correlation at low bitrates*/
     306          56 :                 st->sp_aud_decision2 = 0;
     307             :             }
     308             :         }
     309             : 
     310             :         /* Context-based improvement of 1st and 2nd stage decision on stable tonal signals */
     311       55265 :         if ( !st->Opt_SC_VBR && st->total_brate != ACELP_24k40 )
     312             :         {
     313       52755 :             tonal_context_improv( st, PS, voi_fv, cor_map_sum_fv, LPCErr );
     314             :         }
     315             : 
     316             :         /* Avoid using LR-MDCT on sparse spectra, use GSC instead at 13.2 kbps (WB/SWB) */
     317       55265 :         if ( !st->Opt_SC_VBR && st->total_brate == ACELP_13k20 && st->vad_flag == 1 && ( st->bwidth == WB || st->bwidth == SWB ) )
     318             :         {
     319       16594 :             detect_sparseness( st, localVAD_HE_SAD, voi_fv );
     320             :         }
     321             : 
     322             :         /* override speech/music classification to ACELP when background noise level reaches certain level */
     323             :         /* this is a patch against mis-classifications during active noisy speech segments */
     324       55265 :         if ( st->lp_noise > 12.0f )
     325             :         {
     326       16504 :             st->sp_aud_decision1 = 0;
     327       16504 :             st->sp_aud_decision2 = 0;
     328             :         }
     329             : 
     330             :         /* set GSC noisy speech flag on unvoiced SWB segments */
     331       55265 :         st->GSC_noisy_speech = 0;
     332       55265 :         if ( st->vad_flag == 1 && st->total_brate >= ACELP_13k20 && st->total_brate < ACELP_24k40 &&
     333       18887 :              st->lp_noise > 12.0f && st->sp_aud_decision1 == 0 && st->bwidth >= SWB &&
     334        1896 :              st->coder_type_raw == UNVOICED )
     335             :         {
     336         236 :             st->GSC_noisy_speech = 1;
     337             :         }
     338             : 
     339             :         /* Select AUDIO frames */
     340             : #ifdef DEBUGGING
     341             :         if ( st->codec_mode == MODE1 && ( st->force == 1 || ( st->force == -1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) ) ) )
     342             : #else
     343       55265 :         if ( st->codec_mode == MODE1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) )
     344             : #endif
     345             :         {
     346       15582 :             st->coder_type = AUDIO;
     347       15582 :             st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
     348             :         }
     349             :     }
     350             :     else
     351             :     {
     352       28593 :         st->sp_aud_decision0 = st->sp_aud_decision1;
     353             :     }
     354             : 
     355             : 
     356       83858 :     return;
     357             : }
     358             : 
     359             : 
     360             : /*---------------------------------------------------------------------*
     361             :  * sp_mus_classif_1st()
     362             :  *
     363             :  * 1st stage speech/music classification (based on the GMM model)
     364             :  *---------------------------------------------------------------------*/
     365             : 
     366             : /*! r: decision flag (1-music, 0-speech or noise) */
     367       83858 : static int16_t sp_mus_classif_1st(
     368             :     Encoder_State *st,             /* i/o: state structure                                 */
     369             :     const int16_t localVAD_HE_SAD, /* i  : local VAD HE flag                               */
     370             :     const float lsp_new[M],        /* i  : LSPs in current frame                           */
     371             :     const float cor_map_sum,       /* i  : correlation map sum (from multi-harmonic anal.) */
     372             :     const float epsP[M + 1],       /* i  : LP prediciton error                             */
     373             :     const float PS[],              /* i  : energy spectrum                                 */
     374             :     float non_sta,                 /* i  : unbound non-stationarity                        */
     375             :     float relE,                    /* i  : relative frame energy                           */
     376             :     float *voi_fv,                 /* o  : scaled voicing feature                          */
     377             :     float *cor_map_sum_fv,         /* o  : scaled correlation map feature                  */
     378             :     float *LPCErr,                 /* o  : scaled LP prediction error feature              */
     379             :     int16_t *high_lpn_flag         /* o  : sp/mus LPN flag                                 */
     380             : )
     381             : {
     382             :     int16_t i, k, p, dec, vad;
     383             :     float dlp, ftmp, lepsP1, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght, mx;
     384       83858 :     float FV[N_FEATURES], *pFV = FV, PS_norm[128], dPS[128], lsp[M];
     385       83858 :     float pys, pym, xm[N_FEATURES], py, lps = 0, lpm = 0;
     386             :     const float *pSF;
     387       83858 :     float pyn, lpn = 0;
     388             : 
     389       83858 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
     390             : 
     391             :     /*------------------------------------------------------------------*
     392             :      * Initialization
     393             :      *------------------------------------------------------------------*/
     394             : 
     395       83858 :     vad = localVAD_HE_SAD;
     396             : 
     397             :     /*------------------------------------------------------------------*
     398             :      * Preparation of the feature vector
     399             :      *------------------------------------------------------------------*/
     400             : 
     401             :     /* [0] OL pitch */
     402       83858 :     if ( st->tc_cnt == 1 || st->tc_cnt == 2 )
     403             :     {
     404        6816 :         *pFV++ = (float) st->pitch[2];
     405             :     }
     406             :     else
     407             :     {
     408       77042 :         *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
     409             :     }
     410             : 
     411             :     /* [1] voicing */
     412       83858 :     if ( st->tc_cnt == 1 || st->tc_cnt == 2 )
     413             :     {
     414        6816 :         *pFV++ = st->voicing[2];
     415             :     }
     416             :     else
     417             :     {
     418       77042 :         *pFV++ = (float) ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
     419             :     }
     420             : 
     421             :     /* [2,3,4,5,6] LSFs */
     422       83858 :     mvr2r( lsp_new, lsp, M );
     423             : 
     424       83858 :     ftmp = (float) acos( lsp[1] );
     425       83858 :     *pFV++ = ftmp + hSpMusClas->last_lsp[1];
     426       83858 :     hSpMusClas->last_lsp[1] = ftmp;
     427             : 
     428       83858 :     ftmp = (float) acos( lsp[2] );
     429       83858 :     *pFV++ = ftmp + hSpMusClas->last_lsp[2];
     430       83858 :     hSpMusClas->last_lsp[2] = ftmp;
     431             : 
     432       83858 :     ftmp = (float) acos( lsp[3] );
     433       83858 :     *pFV++ = ftmp + hSpMusClas->last_lsp[3];
     434       83858 :     hSpMusClas->last_lsp[3] = ftmp;
     435             : 
     436       83858 :     ftmp = (float) acos( lsp[4] );
     437       83858 :     *pFV++ = ftmp + hSpMusClas->last_lsp[4];
     438       83858 :     hSpMusClas->last_lsp[4] = ftmp;
     439             : 
     440       83858 :     ftmp = (float) acos( lsp[5] );
     441       83858 :     *pFV++ = ftmp + hSpMusClas->last_lsp[5];
     442       83858 :     hSpMusClas->last_lsp[5] = ftmp;
     443             : 
     444             :     /* [7] cor_map_sum */
     445       83858 :     *pFV++ = cor_map_sum + hSpMusClas->last_cor_map_sum;
     446       83858 :     hSpMusClas->last_cor_map_sum = cor_map_sum;
     447             : 
     448             :     /* [8] non_sta */
     449       83858 :     *pFV++ = non_sta + hSpMusClas->last_non_sta;
     450       83858 :     hSpMusClas->last_non_sta = non_sta;
     451             : 
     452             :     /* [9] epsP */
     453       83858 :     if ( st->bwidth == NB )
     454             :     {
     455             :         /* do not take into account (statistics are too different) */
     456        7550 :         *pFV++ = -1.647f;
     457             :     }
     458             :     else
     459             :     {
     460       76308 :         lepsP1 = logf( epsP[1] + 1e-5f );
     461       76308 :         ftmp = logf( epsP[13] ) - lepsP1;
     462       76308 :         *pFV++ = ftmp + hSpMusClas->past_epsP2;
     463       76308 :         hSpMusClas->past_epsP2 = ftmp;
     464             :     }
     465             : 
     466             :     /* calculation of differential normalized power spectrum */
     467       83858 :     sum_PS = 1e-5f;
     468     5702344 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     469             :     {
     470     5618486 :         sum_PS += PS[i];
     471             :     }
     472             : 
     473     5702344 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     474             :     {
     475     5618486 :         PS_norm[i] = PS[i] / sum_PS;
     476     5618486 :         dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] );
     477             :     }
     478             : 
     479             :     /* [10] ps_diff (spectral difference) */
     480       83858 :     ps_diff = 0;
     481     5702344 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     482             :     {
     483     5618486 :         ps_diff += dPS[i];
     484             :     }
     485             : 
     486       83858 :     ps_diff = logf( ps_diff + 1e-5f );
     487       83858 :     *pFV++ = ps_diff + hSpMusClas->past_ps_diff;
     488       83858 :     hSpMusClas->past_ps_diff = ps_diff;
     489             : 
     490             :     /* [11] ps_sta (spectral stationarity) */
     491       83858 :     ps_sta = 0;
     492     5702344 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     493             :     {
     494     5618486 :         mx = PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] ? PS_norm[i] : hSpMusClas->past_PS[i - LOWEST_FBIN];
     495     5618486 :         ps_sta += mx / ( dPS[i] + 1e-5f );
     496             :     }
     497             : 
     498       83858 :     *pFV++ = logf( ps_sta + 1e-5f );
     499       83858 :     mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN );
     500             : 
     501             :     /*------------------------------------------------------------------*
     502             :      * Scaling of the feature vector
     503             :      *------------------------------------------------------------------*/
     504             : 
     505       83858 :     pFV = FV;
     506       83858 :     if ( st->bwidth == NB )
     507             :     {
     508        7550 :         pSF = SF_8k;
     509             :     }
     510             :     else
     511             :     {
     512       76308 :         pSF = SF;
     513             :     }
     514             : 
     515     1090154 :     for ( i = 0; i < N_FEATURES; i++, pFV++, pSF += 2 )
     516             :     {
     517     1006296 :         *pFV = pSF[0] * *pFV + pSF[1];
     518             :     }
     519             : 
     520             :     /* store some scaled parameters for later correction of the 1st stage speech/music classification */
     521       83858 :     *voi_fv = FV[1];
     522       83858 :     *cor_map_sum_fv = FV[7];
     523       83858 :     *LPCErr = FV[9];
     524             : 
     525             :     /*------------------------------------------------------------------*
     526             :      * Calculation of posterior probability
     527             :      * Log-probability
     528             :      *------------------------------------------------------------------*/
     529             : 
     530       83858 :     pys = pym = pyn = 1e-5f;
     531             : 
     532             :     /* run loop for all mixtures (for each mixture, calculate the probability of speech or noise and the probability of music) */
     533      587006 :     for ( k = 0; k < N_MIXTURES; k++ )
     534             :     {
     535             :         /* active frames - calculate the probability of speech */
     536     6540924 :         for ( p = 0; p < N_FEATURES; p++ )
     537             :         {
     538     6037776 :             xm[p] = FV[p] - m_speech[k * N_FEATURES + p];
     539             :         }
     540             : 
     541      503148 :         py = lvm_speech[k] + dot_product_mat( xm, &invV_speech[k * N_FEATURES * N_FEATURES], N_FEATURES );
     542      503148 :         pys += expf( py );
     543             :         /* inactive frames - calculate the probability of noise */
     544     6540924 :         for ( p = 0; p < N_FEATURES; p++ )
     545             :         {
     546     6037776 :             xm[p] = FV[p] - m_noise[k * N_FEATURES + p];
     547             :         }
     548             : 
     549      503148 :         py = lvm_noise[k] + dot_product_mat( xm, &invV_noise[k * N_FEATURES * N_FEATURES], N_FEATURES );
     550      503148 :         pyn += expf( py );
     551             : 
     552             :         /* either active or inactive frames - calculate the probability of music */
     553     6540924 :         for ( p = 0; p < N_FEATURES; p++ )
     554             :         {
     555     6037776 :             xm[p] = FV[p] - m_music[k * N_FEATURES + p];
     556             :         }
     557             : 
     558      503148 :         py = lvm_music[k] + dot_product_mat( xm, &invV_music[k * N_FEATURES * N_FEATURES], N_FEATURES );
     559      503148 :         pym += expf( py );
     560             :     }
     561             : 
     562             :     /* calculate log-probability */
     563       83858 :     lps = logf( pys ) - 0.5f * N_FEATURES * logf( PI2 );
     564       83858 :     lpm = logf( pym ) - 0.5f * N_FEATURES * logf( PI2 );
     565       83858 :     lpn = logf( pyn ) - 0.5f * N_FEATURES * logf( PI2 );
     566             : 
     567       83858 :     *high_lpn_flag = 0;
     568       83858 :     if ( lpn > lps && lpn > lpm )
     569             :     {
     570       14367 :         *high_lpn_flag = 1;
     571             :     }
     572             : 
     573       83858 :     if ( !vad )
     574             :     {
     575             :         /* artificially increase log-probability of noise */
     576       10764 :         lps = lpn * 1.2f;
     577             :     }
     578             : 
     579       83858 :     hSpMusClas->lpm = lpm;
     580       83858 :     hSpMusClas->lps = lps;
     581             : 
     582             :     /* determine HQ Generic speech class */
     583       83858 :     if ( st->hHQ_core != NULL )
     584             :     {
     585       83858 :         if ( lps > lpm + 0.5f )
     586             :         {
     587       39034 :             st->hHQ_core->hq_generic_speech_class = 1;
     588             :         }
     589             :         else
     590             :         {
     591       44824 :             st->hHQ_core->hq_generic_speech_class = 0;
     592             :         }
     593             :     }
     594             : 
     595             :     /*------------------------------------------------------------------*
     596             :      * State machine (sp_mus_state < 0 .. inactive, > 0 .. entry, = 0 .. active )
     597             :      *------------------------------------------------------------------*/
     598             : 
     599       83858 :     if ( vad )
     600             :     {
     601       73094 :         if ( relE < -20 || ( lps <= -5 && lpm <= -5 ) )
     602             :         {
     603        7329 :             if ( hSpMusClas->sp_mus_state > 0 )
     604             :             {
     605        1280 :                 if ( hSpMusClas->sp_mus_state < HANG_LEN )
     606             :                 {
     607             :                     /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
     608         103 :                     hSpMusClas->inact_cnt = 0;
     609             :                 }
     610             : 
     611             :                 /* energy is too low -> we are going to instable state */
     612        1280 :                 hSpMusClas->sp_mus_state = 0;
     613             :             }
     614        6049 :             else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
     615             :             {
     616             :                 /* energy is still too low -> we are still in instable state */
     617        2916 :                 hSpMusClas->sp_mus_state--;
     618             :             }
     619             :         }
     620       65765 :         else if ( hSpMusClas->sp_mus_state <= 0 )
     621             :         {
     622        1597 :             if ( hSpMusClas->inact_cnt == 0 )
     623             :             {
     624             : 
     625         497 :                 hSpMusClas->sp_mus_state = 1;
     626             :             }
     627             :             else
     628             :             {
     629             : 
     630        1100 :                 hSpMusClas->sp_mus_state = HANG_LEN;
     631             :             }
     632             : 
     633        1597 :             hSpMusClas->inact_cnt = 12;
     634             :         }
     635       64168 :         else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     636             :         {
     637             :             /* we are inside an entry period -> increment the counter of entry frames */
     638        2557 :             hSpMusClas->sp_mus_state++;
     639             :         }
     640             : 
     641       73094 :         if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
     642             :         {
     643        3472 :             hSpMusClas->inact_cnt--;
     644             :         }
     645             :     }
     646             :     else
     647             :     {
     648       10764 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     649             :         {
     650          50 :             hSpMusClas->inact_cnt = 0;
     651             :         }
     652       10714 :         else if ( hSpMusClas->inact_cnt > 0 )
     653             :         {
     654        1941 :             hSpMusClas->inact_cnt--;
     655             :         }
     656             : 
     657       10764 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     658             :         {
     659          50 :             hSpMusClas->sp_mus_state = -HANG_LEN;
     660             :         }
     661       10714 :         else if ( hSpMusClas->sp_mus_state > 0 )
     662             :         {
     663         264 :             hSpMusClas->sp_mus_state = -1;
     664             :         }
     665       10450 :         else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
     666             :         {
     667             :             /* we are in inactive state */
     668        1244 :             hSpMusClas->sp_mus_state--;
     669             :         }
     670             :     }
     671             : 
     672             :     /*------------------------------------------------------------------*
     673             :      * Decision without hangover
     674             :      * Weighted decision
     675             :      *------------------------------------------------------------------*/
     676             : 
     677             :     /* decision without hangover (0 - speech/noise, 1 - music) */
     678       83858 :     dec = lpm > lps;
     679       83858 :     dlp = lpm - lps;
     680             : 
     681       83858 :     if ( !vad )
     682             :     {
     683       10764 :         dec = 0;
     684       10764 :         dlp = 0;
     685             :     }
     686             : 
     687             :     /* calculate weight based on relE (close to 0.01 in low-E regions, close to 1 in high-E regions) */
     688       83858 :     wrelE = 1.0f + relE / 15;
     689             : 
     690       83858 :     if ( wrelE > 1.0f )
     691             :     {
     692       29024 :         wrelE = 1.0f;
     693             :     }
     694       54834 :     else if ( wrelE < 0.01f )
     695             :     {
     696       16772 :         wrelE = 0.01f;
     697             :     }
     698             : 
     699             :     /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
     700       83858 :     if ( dlp < 0 && dlp < hSpMusClas->past_dlp[0] )
     701             :     {
     702       19499 :         if ( hSpMusClas->past_dlp[0] > 0 )
     703             :         {
     704        6833 :             hSpMusClas->wdrop = -dlp;
     705             :         }
     706             :         else
     707             :         {
     708       12666 :             hSpMusClas->wdrop += hSpMusClas->past_dlp[0] - dlp;
     709             :         }
     710             :     }
     711             :     else
     712             :     {
     713       64359 :         hSpMusClas->wdrop = 0;
     714             :     }
     715             : 
     716       83858 :     wdrop = hSpMusClas->wdrop / 20;
     717             : 
     718       83858 :     if ( wdrop > 1.0f )
     719             :     {
     720           0 :         wdrop = 1.0f;
     721             :     }
     722       83858 :     else if ( wdrop < 0.1f )
     723             :     {
     724       73413 :         wdrop = 0.1f;
     725             :     }
     726             : 
     727             :     /* combine weights into one */
     728       83858 :     wght = wrelE * wdrop;
     729       83858 :     if ( wght < 0.01f )
     730             :     {
     731       18515 :         wght = 0.01f;
     732             :     }
     733             : 
     734             :     /* calculate weighted decision */
     735       83858 :     hSpMusClas->wdlp_0_95_sp = wght * dlp + ( 1 - wght ) * hSpMusClas->wdlp_0_95_sp;
     736             : 
     737       83858 :     if ( hSpMusClas->sp_mus_state == -HANG_LEN )
     738             :     {
     739       12770 :         hSpMusClas->wdlp_0_95_sp = 0;
     740             :     }
     741             : 
     742             :     /*------------------------------------------------------------------*
     743             :      * Final speech/music decision
     744             :      *------------------------------------------------------------------*/
     745             : 
     746       83858 :     if ( !vad && hSpMusClas->sp_mus_state == -HANG_LEN )
     747             :     {
     748             :         /* inactive state */
     749        9402 :         dec = 0;
     750             :     }
     751       74456 :     else if ( hSpMusClas->sp_mus_state <= 0 )
     752             :     {
     753             :         /* transition from active to inactive state or instable state */
     754        8691 :         dec = hSpMusClas->past_dec[0];
     755             :     }
     756       65765 :     else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     757             :     {
     758             :         /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
     759        2710 :         ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp;
     760        2710 :         ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 );
     761        2710 :         dec = ftmp > 2.0f;
     762             :     }
     763             :     else
     764             :     {
     765             :         /* stable active state */
     766       63055 :         if ( hSpMusClas->wdlp_0_95_sp > 0 && hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 )
     767             :         {
     768             :             /* switching from speech to music */
     769         492 :             dec = 1;
     770             :         }
     771       62563 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < 0 )
     772             :         {
     773             :             /* switching from music to speech */
     774         456 :             dec = 0;
     775             :         }
     776             :         else
     777             :         {
     778       62107 :             dec = hSpMusClas->past_dec[0];
     779             :         }
     780             :     }
     781             : 
     782             :     /*------------------------------------------------------------------*
     783             :      * Updates
     784             :      *------------------------------------------------------------------*/
     785             : 
     786             :     /* update buffer of past non-binary decisions */
     787       83858 :     mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 );
     788       83858 :     hSpMusClas->past_dlp[0] = dlp;
     789             : 
     790             :     /* update buffer of past binary decisions */
     791       83858 :     mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
     792       83858 :     hSpMusClas->past_dec[0] = dec;
     793             : 
     794       83858 :     return dec;
     795             : }
     796             : 
     797             : 
     798             : /*---------------------------------------------------------------------*
     799             :  * sp_mus_classif_2nd()
     800             :  *
     801             :  * 2nd stage speech/music classifier (convert music to speech for onsets)
     802             :  *---------------------------------------------------------------------*/
     803             : 
     804       47715 : static void sp_mus_classif_2nd(
     805             :     Encoder_State *st,    /* i/o: encoder state structure     */
     806             :     const float Etot,     /* i  : total frame energy          */
     807             :     int16_t *attack_flag, /* i/o: attack flag (GSC or TC)     */
     808             :     const float *inp      /* i  : input signal                */
     809             : )
     810             : {
     811             :     int16_t attack;
     812       47715 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
     813             : 
     814             :     /* initialization */
     815       47715 :     *attack_flag = 0;
     816             : 
     817             :     /* signal stability estimation */
     818       47715 :     stab_est( Etot, hSpMusClas->gsc_lt_diff_etot, &hSpMusClas->gsc_mem_etot, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
     819             : 
     820             :     /* calculate variance of correlation */
     821       47715 :     var_cor_calc( st->old_corr, &hSpMusClas->mold_corr, hSpMusClas->var_cor_t, &hSpMusClas->high_stable_cor );
     822             : 
     823             :     /* attack detection */
     824       47715 :     attack = attack_det( inp, st->clas, st->localVAD, st->coder_type, st->total_brate, EVS_MONO, st->clas, hSpMusClas->finc_prev, &hSpMusClas->lt_finc, &hSpMusClas->last_strong_attack );
     825             : 
     826             :     /* change decision from music to speech in certain special cases */
     827       47715 :     if ( st->sp_aud_decision1 == 1 )
     828             :     {
     829       17624 :         if ( hSpMusClas->ener_RAT < 0.18f && hSpMusClas->lt_dec_thres > 15.0f )
     830             :         {
     831             :             /* strong music decision but almost no content below 1kHz */
     832           0 :             st->sp_aud_decision2 = 0;
     833             :         }
     834       17624 :         else if ( hSpMusClas->high_stable_cor && st->pitch[0] >= 130 )
     835             :         {
     836             :             /* prevent GSC in highly correlated signal with low energy variation */
     837             :             /* this is basically a patch against bassoon-type of music */
     838          53 :             st->sp_aud_decision2 = 0;
     839             : 
     840          53 :             if ( st->codec_mode == MODE1 && st->coder_type == TRANSITION )
     841             :             {
     842           0 :                 st->coder_type = GENERIC;
     843             :             }
     844             :         }
     845       17571 :         else if ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] > 4.5f && ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] - hSpMusClas->gsc_lt_diff_etot[MAX_LT - 2] > 10.0f ) )
     846             :         {
     847         307 :             if ( st->tc_cnt == 1 )
     848             :             {
     849             :                 /* do TC coding instead of GC/VC if onset has been already declared before */
     850          14 :                 st->sp_aud_decision2 = 0;
     851             : 
     852          14 :                 if ( st->codec_mode == MODE1 )
     853             :                 {
     854          14 :                     st->coder_type = TRANSITION;
     855             :                 }
     856             :             }
     857             :             else
     858             :             {
     859         293 :                 if ( attack >= ATT_3LSUB_POS )
     860             :                 {
     861             :                     /* do TC coding if attack is located in the last subframe */
     862          85 :                     st->sp_aud_decision2 = 0;
     863          85 :                     *attack_flag = attack + 1;
     864             : 
     865          85 :                     if ( st->codec_mode == MODE1 )
     866             :                     {
     867          84 :                         st->coder_type = TRANSITION;
     868             :                     }
     869             :                 }
     870         208 :                 else if ( attack >= ATT_SEG_LEN / 2 )
     871             :                 {
     872             :                     /* do GSC coding if attack is located after the first quarter of the first subframe */
     873             :                     /* (pre-echo will be treated at the decoder side) */
     874           1 :                     st->sp_aud_decision2 = 1;
     875           1 :                     *attack_flag = 31;
     876             :                 }
     877             :             }
     878             :         }
     879             :     }
     880       30091 :     else if ( st->localVAD == 1 && st->coder_type == GENERIC && ( ( attack >= ATT_3LSUB_POS && st->total_brate < ACELP_24k40 ) || ( attack >= ATT_3LSUB_POS_16k && st->total_brate >= ACELP_24k40 && st->total_brate < ACELP_48k ) ) )
     881             :     {
     882             :         /* do TC coding if attack is located in the last subframe */
     883         424 :         *attack_flag = attack + 1;
     884             : 
     885         424 :         if ( st->codec_mode == MODE1 )
     886             :         {
     887         414 :             st->coder_type = TRANSITION;
     888             :         }
     889             :     }
     890             : 
     891       47715 :     return;
     892             : }
     893             : 
     894             : 
     895             : /*---------------------------------------------------------------------*
     896             :  * tonal_det()
     897             :  *
     898             :  * Tonal detector based on spectral stability and harmonicity
     899             :  *---------------------------------------------------------------------*/
     900             : 
     901     3856787 : static float tonal_det(
     902             :     const float S[],
     903             :     int16_t vad_flag,
     904             :     float tod_S_map_lt[],
     905             :     float *tod_thr_lt,
     906             :     float *tod_weight,
     907             :     float *tod_S_mass_prev,
     908             :     float *tod_S_mass_lt )
     909             : {
     910             :     int16_t i;
     911             :     float S_mass, alpha;
     912             : 
     913             :     /* update the adaptive weight */
     914     3856787 :     *tod_weight = TON_ALPHA * *tod_weight + ( 1 - TON_ALPHA ) * vad_flag;
     915     3856787 :     if ( *tod_weight > TON_ALPHA )
     916             :     {
     917     2231536 :         *tod_weight = TON_ALPHA;
     918             :     }
     919     1625251 :     else if ( *tod_weight < ( 1 - TON_ALPHA ) )
     920             :     {
     921      633983 :         *tod_weight = 1 - TON_ALPHA;
     922             :     }
     923             : 
     924             :     /* calculate LT spectral correlation in each band up to 4KHz */
     925     3856787 :     S_mass = 0.0f;
     926   312399747 :     for ( i = 0; i < TOD_NSPEC; i++ )
     927             :     {
     928   308542960 :         tod_S_map_lt[i] = *tod_weight * tod_S_map_lt[i] + ( 1 - *tod_weight ) * S[i];
     929             : 
     930   308542960 :         S_mass += tod_S_map_lt[i];
     931             :     }
     932     3856787 :     S_mass /= TOD_NSPEC;
     933             : 
     934     3856787 :     if ( S_mass > *tod_S_mass_prev )
     935             :     {
     936     1820031 :         alpha = 0.7f;
     937             :     }
     938             :     else
     939             :     {
     940     2036756 :         alpha = 0.3f;
     941             :     }
     942     3856787 :     *tod_S_mass_prev = S_mass;
     943     3856787 :     *tod_S_mass_lt = alpha * *tod_S_mass_lt + ( 1 - alpha ) * S_mass;
     944     3856787 :     S_mass = *tod_S_mass_lt;
     945             : 
     946             :     /* updating adaptive decision threshold */
     947     3856787 :     if ( S_mass > *tod_thr_lt )
     948             :     {
     949       69580 :         *tod_thr_lt -= THR_MASS_STEP_DN;
     950             :     }
     951             :     else
     952             :     {
     953     3787207 :         *tod_thr_lt += THR_MASS_STEP_UP;
     954             :     }
     955             : 
     956     3856787 :     if ( *tod_thr_lt > THR_MASS_MAX )
     957             :     {
     958     3776539 :         *tod_thr_lt = THR_MASS_MAX;
     959             :     }
     960             : 
     961     3856787 :     if ( *tod_thr_lt < THR_MASS_MIN )
     962             :     {
     963       63303 :         *tod_thr_lt = THR_MASS_MIN;
     964             :     }
     965             : 
     966     3856787 :     return S_mass;
     967             : }
     968             : 
     969             : /*---------------------------------------------------------------------*
     970             :  * var_cor_calc()
     971             :  *
     972             :  * Calculate variance of correlation
     973             :  *---------------------------------------------------------------------*/
     974             : 
     975     3904502 : static void var_cor_calc(
     976             :     const float old_corr,
     977             :     float *mold_corr,
     978             :     float var_cor_t[],
     979             :     int16_t *high_stable_cor )
     980             : {
     981             :     int16_t i;
     982             :     float var_cor;
     983             : 
     984             :     /* update buffer of old correlation values */
     985    39045020 :     for ( i = VAR_COR_LEN - 1; i > 0; i-- )
     986             :     {
     987    35140518 :         var_cor_t[i] = var_cor_t[i - 1];
     988             :     }
     989     3904502 :     var_cor_t[i] = old_corr;
     990             : 
     991             :     /* calculate variance of correlation */
     992     3904502 :     var_cor = var( var_cor_t, VAR_COR_LEN );
     993             : 
     994             :     /* set flag in case of highly-correlated stable signal */
     995     3904502 :     if ( *mold_corr > 0.8f && var_cor < 5e-4f )
     996             :     {
     997      309210 :         *high_stable_cor = 1;
     998             :     }
     999             :     else
    1000             :     {
    1001     3595292 :         *high_stable_cor = 0;
    1002             :     }
    1003             : 
    1004             :     /* update average correlation */
    1005     3904502 :     *mold_corr = 0.1f * old_corr + 0.9f * *mold_corr;
    1006             : 
    1007     3904502 :     return;
    1008             : }
    1009             : 
    1010             : /*---------------------------------------------------------------------*
    1011             :  * attack_det()
    1012             :  *
    1013             :  * Attack detection
    1014             :  *---------------------------------------------------------------------*/
    1015             : 
    1016     3904502 : static int16_t attack_det(
    1017             :     const float *inp,           /* i  : input signal                           */
    1018             :     const int16_t last_clas,    /* i  : last signal clas                       */
    1019             :     const int16_t localVAD,     /* i  : local VAD flag                         */
    1020             :     const int16_t coder_type,   /* i  : coder type                             */
    1021             :     const int32_t total_brate,  /* i  : total bitrate                          */
    1022             :     const int16_t element_mode, /* i  : IVAS element mode                      */
    1023             :     const int16_t clas,         /* i  : signal class                           */
    1024             :     float finc_prev[],          /* i/o: previous finc                          */
    1025             :     float *lt_finc,             /* i/o: long-term mean finc                    */
    1026             :     int16_t *last_strong_attack /* i/o: last strong attack flag                */
    1027             : )
    1028             : {
    1029             :     int16_t i, attack;
    1030             :     float etmp, etmp2, finc[ATT_NSEG];
    1031             :     int16_t att_3lsub_pos;
    1032             :     int16_t attack1;
    1033             : 
    1034     3904502 :     att_3lsub_pos = ATT_3LSUB_POS;
    1035     3904502 :     if ( total_brate >= ACELP_24k40 )
    1036             :     {
    1037       19544 :         att_3lsub_pos = ATT_3LSUB_POS_16k;
    1038             :     }
    1039             : 
    1040             :     /* compute energy per section */
    1041   128848566 :     for ( i = 0; i < ATT_NSEG; i++ )
    1042             :     {
    1043   124944064 :         finc[i] = sum2_f( inp + i * ATT_SEG_LEN, ATT_SEG_LEN );
    1044             :     }
    1045             : 
    1046     3904502 :     attack = maximum( finc, ATT_NSEG, &etmp );
    1047     3904502 :     attack1 = attack;
    1048             : 
    1049     3904502 :     if ( localVAD == 1 && coder_type == GENERIC )
    1050             :     {
    1051             :         /* compute mean energy in the first three subframes */
    1052     1525528 :         etmp = mean( finc, att_3lsub_pos );
    1053             : 
    1054             :         /* compute mean energy after the attack */
    1055     1525528 :         etmp2 = mean( finc + attack, ATT_NSEG - attack );
    1056             : 
    1057             :         /* and compare them */
    1058     1525528 :         if ( etmp * 8 > etmp2 )
    1059             :         {
    1060             :             /* stop, if the attack is not sufficiently strong */
    1061     1471978 :             attack = 0;
    1062             :         }
    1063             : 
    1064     1525528 :         if ( last_clas == VOICED_CLAS && etmp * 20 > etmp2 )
    1065             :         {
    1066             :             /* stop, if the signal was voiced and the attack is not sufficiently strong */
    1067      344469 :             attack = 0;
    1068             :         }
    1069             : 
    1070             :         /* compare wrt. other sections (reduces miss-classification) */
    1071     1525528 :         if ( attack > 0 )
    1072             :         {
    1073       47345 :             etmp2 = finc[attack];
    1074             : 
    1075      981351 :             for ( i = 2; i < att_3lsub_pos - 2; i++ )
    1076             :             {
    1077      936090 :                 if ( finc[i] * 2.0f > etmp2 )
    1078             :                 {
    1079             :                     /* stop, if the attack is not sufficiently strong */
    1080        2084 :                     attack = 0;
    1081        2084 :                     break;
    1082             :                 }
    1083             :             }
    1084             :         }
    1085             : 
    1086     1525528 :         if ( attack == 0 && element_mode > EVS_MONO && ( clas < VOICED_TRANSITION || clas == ONSET ) )
    1087             :         {
    1088     1018392 :             mvr2r( finc, finc_prev, attack1 );
    1089             : 
    1090             :             /* compute mean energy before the attack */
    1091     1018392 :             etmp = mean( finc_prev, ATT_NSEG );
    1092             : 
    1093     1018392 :             etmp2 = finc[attack1];
    1094             : 
    1095     1018392 :             if ( ( etmp * 16 < etmp2 ) || ( etmp * 12 < etmp2 && last_clas == UNVOICED_CLAS ) )
    1096             :             {
    1097       50951 :                 attack = attack1;
    1098             :             }
    1099             : 
    1100     1018392 :             if ( 20 * *lt_finc > etmp2 || *last_strong_attack )
    1101             :             {
    1102      948170 :                 attack = 0;
    1103             :             }
    1104             :         }
    1105             : 
    1106     1525528 :         *last_strong_attack = attack;
    1107             :     }
    1108             : 
    1109             :     /* compare wrt. other sections (reduces miss-classification) */
    1110     2378974 :     else if ( attack > 0 )
    1111             :     {
    1112    26953439 :         for ( i = 2; i < att_3lsub_pos - 2; i++ )
    1113             :         {
    1114    26129981 :             if ( i != attack && finc[i] * 1.3f > finc[attack] )
    1115             :             {
    1116             :                 /* stop, if the attack is not sufficiently strong */
    1117     1319315 :                 attack = 0;
    1118     1319315 :                 break;
    1119             :             }
    1120             :         }
    1121     2142773 :         *last_strong_attack = 0;
    1122             :     }
    1123             : 
    1124             :     /* updates */
    1125     3904502 :     mvr2r( finc, finc_prev, ATT_NSEG );
    1126     3904502 :     *lt_finc = 0.95f * *lt_finc + 0.05f * mean( finc, ATT_NSEG );
    1127             : 
    1128     3904502 :     return attack;
    1129             : }
    1130             : 
    1131             : /*---------------------------------------------------------------------*
    1132             :  * ivas_smc_gmm()
    1133             :  *
    1134             :  * 1st stage of the speech/music classification (based on the GMM model)
    1135             :  *---------------------------------------------------------------------*/
    1136             : 
    1137             : /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */
    1138    14464481 : int16_t ivas_smc_gmm(
    1139             :     Encoder_State *st,                    /* i/o: state structure                                     */
    1140             :     STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure                         */
    1141             :     const int16_t localVAD_HE_SAD,        /* i  : HE-SAD flag without hangover                        */
    1142             :     const float Etot,                     /* i  : total frame energy                                  */
    1143             :     const float lsp_new[M],               /* i  : LSPs in current frame                               */
    1144             :     const float cor_map_sum,              /* i  : correlation map sum (from multi-harmonic anal.)     */
    1145             :     const float epsP[M + 1],              /* i  : LP prediciton error                                 */
    1146             :     const float PS[],                     /* i  : energy spectrum                                     */
    1147             :     const float non_sta,                  /* i  : unbound non-stationarity                            */
    1148             :     const float relE,                     /* i  : relative frame energy                               */
    1149             :     int16_t *high_lpn_flag,               /* i/o: sp/mus LPN flag                                     */
    1150             :     const int16_t flag_spitch             /* i  : flag to indicate very short stable pitch            */
    1151             : )
    1152             : {
    1153             :     int16_t i, m, dec;
    1154             :     int16_t flag_odv;
    1155             :     float lps, lpm, lpn;
    1156             :     float ps[N_SMC_MIXTURES], pm[N_SMC_MIXTURES], pn[N_SMC_MIXTURES];
    1157             :     float fvm[N_PCA_COEF], lprob;
    1158             :     float dlp, ftmp, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght;
    1159             :     float wrise;
    1160             :     float dlp_mean2var;
    1161             :     float FV[N_SMC_FEATURES], *pFV, PS_norm[128], dPS[128];
    1162             :     const float *pODV;
    1163             :     float *pFV_st, smc_st_mean_fact;
    1164             :     int16_t relE_attack_flag;
    1165             :     int16_t j, len;
    1166             :     const float *pt_mel_fb;
    1167             :     float melS[NB_MEL_BANDS], mfcc[NB_MEL_BANDS];
    1168             :     int16_t odv_cnt;
    1169             :     int16_t i_out[N_SMC_FEATURES], *p_out;
    1170             : 
    1171             :     /*------------------------------------------------------------------*
    1172             :      * Initialization
    1173             :      *------------------------------------------------------------------*/
    1174             : 
    1175    14464481 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    1176             : 
    1177             :     /*------------------------------------------------------------------*
    1178             :      * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE )
    1179             :      *------------------------------------------------------------------*/
    1180             : 
    1181    14464481 :     if ( localVAD_HE_SAD )
    1182             :     {
    1183    11481615 :         if ( relE < -20 )
    1184             :         {
    1185      636290 :             if ( hSpMusClas->sp_mus_state > 0 )
    1186             :             {
    1187      120154 :                 if ( hSpMusClas->sp_mus_state < HANG_LEN )
    1188             :                 {
    1189             :                     /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
    1190       28752 :                     hSpMusClas->inact_cnt = 0;
    1191             :                 }
    1192             : 
    1193             :                 /* energy is too low -> we are going to instable state */
    1194      120154 :                 hSpMusClas->sp_mus_state = 0;
    1195             :             }
    1196      516136 :             else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
    1197             :             {
    1198             :                 /* energy is still too low -> we are still in instable state */
    1199      238861 :                 hSpMusClas->sp_mus_state--;
    1200             :             }
    1201             :         }
    1202    10845325 :         else if ( hSpMusClas->sp_mus_state <= 0 )
    1203             :         {
    1204      271829 :             if ( hSpMusClas->inact_cnt == 0 )
    1205             :             {
    1206             : 
    1207      160098 :                 hSpMusClas->sp_mus_state = 1;
    1208             :             }
    1209             :             else
    1210             :             {
    1211             : 
    1212      111731 :                 hSpMusClas->sp_mus_state = HANG_LEN;
    1213             :             }
    1214             : 
    1215      271829 :             hSpMusClas->inact_cnt = 12;
    1216             :         }
    1217    10573496 :         else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1218             :         {
    1219             :             /* we are inside an entry period -> increment the counter of entry frames */
    1220      652239 :             hSpMusClas->sp_mus_state++;
    1221             :         }
    1222             : 
    1223    11481615 :         if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
    1224             :         {
    1225      219718 :             hSpMusClas->inact_cnt--;
    1226             :         }
    1227             :     }
    1228             :     else
    1229             :     {
    1230     2982866 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1231             :         {
    1232       15404 :             hSpMusClas->inact_cnt = 0;
    1233             :         }
    1234     2967462 :         else if ( hSpMusClas->inact_cnt > 0 )
    1235             :         {
    1236      358710 :             hSpMusClas->inact_cnt--;
    1237             :         }
    1238             : 
    1239     2982866 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1240             :         {
    1241       15404 :             hSpMusClas->sp_mus_state = -HANG_LEN;
    1242             :         }
    1243     2967462 :         else if ( hSpMusClas->sp_mus_state > 0 )
    1244             :         {
    1245       48210 :             hSpMusClas->sp_mus_state = -1;
    1246             :         }
    1247     2919252 :         else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
    1248             :         {
    1249             :             /* we are in inactive state */
    1250      246384 :             hSpMusClas->sp_mus_state--;
    1251             :         }
    1252             :     }
    1253             : 
    1254             :     /* detect attacks based on relE */
    1255    14464481 :     if ( relE > hSpMusClas->prev_relE )
    1256             :     {
    1257     5644410 :         hSpMusClas->relE_attack_sum += relE - hSpMusClas->prev_relE;
    1258             :     }
    1259             :     else
    1260             :     {
    1261     8820071 :         hSpMusClas->relE_attack_sum = 0;
    1262             :     }
    1263    14464481 :     hSpMusClas->prev_relE = relE;
    1264             : 
    1265             :     /* update counter from last VAD 0->1 change */
    1266    14464481 :     if ( hSpMusClas->prev_vad == 0 && localVAD_HE_SAD == 1 )
    1267             :     {
    1268      202231 :         hSpMusClas->vad_0_1_cnt = 1;
    1269             :     }
    1270    14262250 :     else if ( localVAD_HE_SAD == 1 && hSpMusClas->vad_0_1_cnt > 0 && hSpMusClas->vad_0_1_cnt < 50 )
    1271             :     {
    1272     2782228 :         hSpMusClas->vad_0_1_cnt++;
    1273             :     }
    1274             :     else
    1275             :     {
    1276    11480022 :         hSpMusClas->vad_0_1_cnt = 0;
    1277             :     }
    1278    14464481 :     hSpMusClas->prev_vad = localVAD_HE_SAD;
    1279             : 
    1280    14464481 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && hSpMusClas->relE_attack_sum > 5.0f )
    1281             :     {
    1282      228700 :         hSpMusClas->relE_attack_cnt++;
    1283             : 
    1284             :         /* set flag only in the first X frames in a series */
    1285      228700 :         if ( hSpMusClas->relE_attack_cnt > 0 && hSpMusClas->relE_attack_cnt < 3 )
    1286             :         {
    1287      177236 :             relE_attack_flag = 1;
    1288             :         }
    1289             :         else
    1290             :         {
    1291       51464 :             relE_attack_flag = 0;
    1292             :         }
    1293             :     }
    1294             :     else
    1295             :     {
    1296    14235781 :         hSpMusClas->relE_attack_cnt = 0;
    1297    14235781 :         relE_attack_flag = 0;
    1298             :     }
    1299             : 
    1300    14464481 :     hSpMusClas->prev_Etot = Etot;
    1301             : 
    1302             :     /*------------------------------------------------------------------*
    1303             :      * Preparation of the feature vector
    1304             :      *------------------------------------------------------------------*/
    1305             : 
    1306    14464481 :     pFV = FV;
    1307             : 
    1308             :     /* [0] OL pitch */
    1309    14464481 :     if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 )
    1310             :     {
    1311     1000413 :         *pFV++ = (float) st->pitch[2];
    1312             :     }
    1313             :     else
    1314             :     {
    1315    13464068 :         *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
    1316             :     }
    1317             : 
    1318             :     /* [1] voicing */
    1319    14464481 :     if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 )
    1320             :     {
    1321     1000413 :         *pFV++ = st->voicing[2];
    1322             :     }
    1323             :     else
    1324             :     {
    1325    13464068 :         *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
    1326             :     }
    1327             : 
    1328             :     /* [2,3,4,5,6] LSFs */
    1329    14464481 :     *pFV++ = acosf( lsp_new[2] );
    1330    14464481 :     *pFV++ = acosf( lsp_new[3] );
    1331    14464481 :     *pFV++ = acosf( lsp_new[4] );
    1332    14464481 :     *pFV++ = acosf( lsp_new[5] );
    1333    14464481 :     *pFV++ = acosf( lsp_new[6] );
    1334             : 
    1335             :     /* [7] cor_map_sum */
    1336    14464481 :     *pFV++ = cor_map_sum;
    1337             : 
    1338             :     /* [8] non_sta */
    1339    14464481 :     *pFV++ = non_sta;
    1340             : 
    1341             :     /* [9] epsP */
    1342    14464481 :     *pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f );
    1343             : 
    1344             :     /* [10,11,12] MFCCs */
    1345    14464481 :     set_zero( melS, NB_MEL_BANDS );
    1346    14464481 :     pt_mel_fb = mel_fb;
    1347   593043721 :     for ( i = 0; i < NB_MEL_BANDS; i++ )
    1348             :     {
    1349   578579240 :         j = mel_fb_start[i];
    1350   578579240 :         len = mel_fb_len[i];
    1351   578579240 :         melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f );
    1352   578579240 :         pt_mel_fb += len;
    1353             :     }
    1354             : 
    1355    14464481 :     v_mult_mat( mfcc, melS, dct_mtx, NB_MEL_BANDS, NB_MEL_COEF );
    1356             : 
    1357    14464481 :     *pFV++ = mfcc[2];
    1358    14464481 :     *pFV++ = mfcc[6];
    1359    14464481 :     *pFV++ = mfcc[12];
    1360             : 
    1361             :     /* calculation of differential normalized power spectrum */
    1362    14464481 :     sum_PS = 1e-5f;
    1363   983584708 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1364             :     {
    1365   969120227 :         sum_PS += PS[i];
    1366             :     }
    1367             : 
    1368   983584708 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1369             :     {
    1370   969120227 :         PS_norm[i] = PS[i] / sum_PS;
    1371   969120227 :         dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] );
    1372             :     }
    1373             : 
    1374             :     /* [13] ps_diff (spectral difference) */
    1375    14464481 :     ps_diff = 0;
    1376   983584708 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1377             :     {
    1378   969120227 :         ps_diff += dPS[i];
    1379             :     }
    1380             : 
    1381    14464481 :     *pFV++ = ps_diff;
    1382             : 
    1383             :     /* [14] ps_sta (spectral stationarity) */
    1384    14464481 :     ps_sta = 0;
    1385   983584708 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1386             :     {
    1387   969120227 :         if ( PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] )
    1388             :         {
    1389   453570766 :             ps_sta += PS_norm[i] / ( dPS[i] + 1e-5f );
    1390             :         }
    1391             :         else
    1392             :         {
    1393   515549461 :             ps_sta += hSpMusClas->past_PS[i - LOWEST_FBIN] / ( dPS[i] + 1e-5f );
    1394             :         }
    1395             :     }
    1396             : 
    1397    14464481 :     *pFV++ = logf( ps_sta + 1e-5f );
    1398    14464481 :     mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN );
    1399             : 
    1400             :     /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */
    1401    14464481 :     if ( hStereoClassif != NULL )
    1402             :     {
    1403    10805766 :         if ( st->idchan == 0 )
    1404             :         {
    1405     5766789 :             hStereoClassif->ps_diff_ch1 = ps_diff;
    1406     5766789 :             hStereoClassif->ps_sta_ch1 = logf( ps_sta + 1e-5f );
    1407             :         }
    1408             :         else
    1409             :         {
    1410     5038977 :             hStereoClassif->ps_diff_ch2 = ps_diff;
    1411     5038977 :             hStereoClassif->ps_sta_ch2 = logf( ps_sta + 1e-5f );
    1412             :         }
    1413             :     }
    1414             : 
    1415             :     /*------------------------------------------------------------------*
    1416             :      * Outlier detection based on feature histograms
    1417             :      *------------------------------------------------------------------*/
    1418             : 
    1419    14464481 :     flag_odv = 0;
    1420    14464481 :     if ( localVAD_HE_SAD )
    1421             :     {
    1422    11481615 :         pFV = FV;
    1423    11481615 :         pODV = hout_intervals;
    1424    11481615 :         p_out = i_out;
    1425    11481615 :         odv_cnt = 0;
    1426   183705840 :         for ( i = 0; i < N_SMC_FEATURES; i++ )
    1427             :         {
    1428   172224225 :             if ( *pFV < pODV[0] || *pFV > pODV[1] )
    1429             :             {
    1430      157710 :                 *p_out++ = i;
    1431      157710 :                 odv_cnt++;
    1432             :             }
    1433             : 
    1434   172224225 :             pFV++;
    1435   172224225 :             pODV += 2;
    1436             :         }
    1437             : 
    1438             :         /* set outlier flag */
    1439    11481615 :         if ( odv_cnt >= 2 )
    1440             :         {
    1441       43277 :             flag_odv = 1;
    1442             : 
    1443             :             /* replace outlying features with values from the previous frame */
    1444      139821 :             for ( i = 0; i < odv_cnt; i++ )
    1445             :             {
    1446       96544 :                 FV[i_out[i]] = hSpMusClas->prev_FV[i_out[i]];
    1447             :             }
    1448             :         }
    1449             :     }
    1450             : 
    1451             :     /*------------------------------------------------------------------*
    1452             :      * Adaptive short-term mean filter on feature vector
    1453             :      *------------------------------------------------------------------*/
    1454             : 
    1455    14464481 :     pFV = FV;
    1456    14464481 :     pFV_st = hSpMusClas->FV_st;
    1457    14464481 :     smc_st_mean_fact = SMC_ST_MEAN_FACT;
    1458   231431696 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1459             :     {
    1460   216967215 :         *pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV );
    1461             : 
    1462   216967215 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) )
    1463             :         {
    1464             :             /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */
    1465             :             /* -> do whatever you want because dlp will be reset to 0 anyway */
    1466     2678730 :             pFV++;
    1467     2678730 :             pFV_st++;
    1468             :         }
    1469   214288485 :         else if ( hSpMusClas->sp_mus_state == HANG_LEN && ( st->tc_cnt == 1 || st->tc_cnt == 2 ) )
    1470             :         {
    1471             :             /* energy attack in stable state -> use current features intead of the long-term average */
    1472    11542320 :             pFV++;
    1473    11542320 :             pFV_st++;
    1474             :         }
    1475             :         else
    1476             :         {
    1477   202746165 :             *pFV++ = *pFV_st++;
    1478             :         }
    1479             :     }
    1480             : 
    1481             :     /* update */
    1482    14464481 :     mvr2r( FV, hSpMusClas->prev_FV, N_SMC_FEATURES );
    1483             : 
    1484             :     /*------------------------------------------------------------------*
    1485             :      * Non-linear power transformation (boxcox) on certain features
    1486             :      *------------------------------------------------------------------*/
    1487             : 
    1488    14464481 :     pFV = FV;
    1489   231431696 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1490             :     {
    1491   216967215 :         if ( bcox_lmbd[i] != 0 )
    1492             :         {
    1493    43393443 :             *pFV -= bcox_add_cnst[i];
    1494    43393443 :             if ( *pFV < 1 )
    1495             :             {
    1496     2713741 :                 *pFV = 1;
    1497             :             }
    1498    43393443 :             *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i];
    1499             :         }
    1500             : 
    1501   216967215 :         pFV++;
    1502             :     }
    1503             : 
    1504             :     /*------------------------------------------------------------------*
    1505             :      * Scaling of the feature vector
    1506             :      * PCA
    1507             :      *------------------------------------------------------------------*/
    1508             : 
    1509    14464481 :     pFV = FV;
    1510   231431696 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1511             :     {
    1512             :         /* Standard scaler - mean and variance normalization */
    1513   216967215 :         *pFV = ( *pFV - sm_means[i] ) / sm_scale[i];
    1514   216967215 :         pFV++;
    1515             : 
    1516             :         /* MinMax sclaer - mean and variance normalization */
    1517             :         /**pFV = *pFV * sm_scale[i] + sm_min[i];*/
    1518             :         /*pFV++;*/
    1519             :     }
    1520             : 
    1521             :     /* PCA */
    1522    14464481 :     v_sub( FV, pca_mean_, FV, N_SMC_FEATURES );
    1523    14464481 :     v_mult_mat( FV, FV, pca_components_, N_SMC_FEATURES, N_PCA_COEF );
    1524             : 
    1525             :     /*------------------------------------------------------------------*
    1526             :      * Calculation of posterior probability
    1527             :      * Log-probability
    1528             :      *------------------------------------------------------------------*/
    1529             : 
    1530             :     /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */
    1531    14464481 :     lps = lpm = lpn = 0;
    1532   101251367 :     for ( m = 0; m < N_SMC_MIXTURES; m++ )
    1533             :     {
    1534    86786886 :         v_sub( FV, &means_speech[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1535    86786886 :         lprob = dot_product_cholesky( fvm, &prec_chol_speech[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1536    86786886 :         ps[m] = logf( weights_speech[m] ) + log_det_chol_speech[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1537             : 
    1538    86786886 :         v_sub( FV, &means_music[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1539    86786886 :         lprob = dot_product_cholesky( fvm, &prec_chol_music[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1540    86786886 :         pm[m] = logf( weights_music[m] ) + log_det_chol_music[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1541             : 
    1542    86786886 :         v_sub( FV, &means_noise[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1543    86786886 :         lprob = dot_product_cholesky( fvm, &prec_chol_noise[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1544    86786886 :         pn[m] = logf( weights_noise[m] ) + log_det_chol_noise[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1545             :     }
    1546             : 
    1547    14464481 :     lps = logsumexp( ps, N_SMC_MIXTURES );
    1548    14464481 :     lpm = logsumexp( pm, N_SMC_MIXTURES );
    1549    14464481 :     lpn = logsumexp( pn, N_SMC_MIXTURES );
    1550             : 
    1551    14464481 :     *high_lpn_flag = 0;
    1552    14464481 :     if ( lpn > lps && lpn > lpm )
    1553             :     {
    1554     1736377 :         *high_lpn_flag = 1;
    1555             :     }
    1556             : 
    1557    14464481 :     hSpMusClas->lpm = lpm;
    1558    14464481 :     hSpMusClas->lps = lps;
    1559    14464481 :     hSpMusClas->lpn = lpn;
    1560             : 
    1561             :     /* determine HQ Generic speech class */
    1562    14464481 :     if ( st->hHQ_core != NULL )
    1563             :     {
    1564     3964156 :         if ( lps > lpm + 0.5f )
    1565             :         {
    1566     1824004 :             st->hHQ_core->hq_generic_speech_class = 1;
    1567             :         }
    1568             :         else
    1569             :         {
    1570     2140152 :             st->hHQ_core->hq_generic_speech_class = 0;
    1571             :         }
    1572             :     }
    1573             : 
    1574             :     /*------------------------------------------------------------------*
    1575             :      * Decision without hangover
    1576             :      * Weighted decision
    1577             :      *------------------------------------------------------------------*/
    1578             : 
    1579             :     /* decision without hangover (0 - speech/noise, 1 - music) */
    1580    14464481 :     if ( !localVAD_HE_SAD || Etot < 10 || ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) )
    1581             :     {
    1582     3273697 :         dlp = 0;
    1583             :     }
    1584             :     else
    1585             :     {
    1586    11190784 :         dlp = lpm - lps + DLP_BIAS;
    1587             : 
    1588    11190784 :         if ( dlp > 30.0f )
    1589             :         {
    1590      854735 :             dlp = 30.0f;
    1591             :         }
    1592    10336049 :         else if ( dlp < -30.0f )
    1593             :         {
    1594           0 :             dlp = -30.0f;
    1595             :         }
    1596             :     }
    1597             : 
    1598    14464481 :     dec = dlp > 0;
    1599             : 
    1600             :     /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */
    1601    14464481 :     wrelE = lin_interp( relE, 15.0f, 0.9f, -15.0f, 0.99f, 1 );
    1602             : 
    1603             :     /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
    1604    14464481 :     hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp;
    1605    14464481 :     hSpMusClas->lt_dec_thres = hSpMusClas->dlp_mean_ST;
    1606             : 
    1607    14464481 :     if ( dlp < 0 && dlp < hSpMusClas->dlp_mean_ST )
    1608             :     {
    1609     3146622 :         if ( hSpMusClas->dlp_mean_ST > 0 )
    1610             :         {
    1611     1048358 :             hSpMusClas->wdrop = -dlp;
    1612             :         }
    1613     2098264 :         else if ( hSpMusClas->wdrop > 0 )
    1614             :         {
    1615      566969 :             hSpMusClas->wdrop += hSpMusClas->dlp_mean_ST - dlp;
    1616             :         }
    1617             :     }
    1618             :     else
    1619             :     {
    1620    11317859 :         hSpMusClas->wdrop = 0;
    1621             :     }
    1622             : 
    1623    14464481 :     wdrop = lin_interp( hSpMusClas->wdrop, 15.0f, 0.7f, 0.0f, 1.0f, 1 );
    1624             : 
    1625             :     /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */
    1626    14464481 :     if ( hSpMusClas->sp_mus_state == HANG_LEN && hSpMusClas->dlp_mean_ST > 0 && hSpMusClas->dlp_mean_ST > hSpMusClas->past_dlp_mean_ST[0] )
    1627             :     {
    1628     3384848 :         if ( hSpMusClas->past_dlp_mean_ST[0] < 0 )
    1629             :         {
    1630      201472 :             hSpMusClas->wrise = hSpMusClas->dlp_mean_ST;
    1631             :         }
    1632     3183376 :         else if ( hSpMusClas->wrise > 0 )
    1633             :         {
    1634      472886 :             hSpMusClas->wrise += hSpMusClas->dlp_mean_ST - hSpMusClas->past_dlp_mean_ST[0];
    1635             :         }
    1636             :     }
    1637             :     else
    1638             :     {
    1639    11079633 :         hSpMusClas->wrise = 0;
    1640             :     }
    1641             : 
    1642    14464481 :     wrise = lin_interp( hSpMusClas->wrise, 5.0f, 0.95f, 0.0f, 1.0f, 1 );
    1643             : 
    1644             :     /* combine weights into one */
    1645    14464481 :     wght = wrelE * wdrop * wrise;
    1646             : 
    1647             :     /* ratio of delta means vs. delta variances */
    1648    14464481 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1649             :     {
    1650      743917 :         hSpMusClas->dlp_mean_LT = dlp;
    1651      743917 :         hSpMusClas->dlp_var_LT = 0;
    1652             :     }
    1653             : 
    1654    14464481 :     hSpMusClas->dlp_mean_LT = 0.9f * hSpMusClas->dlp_mean_LT + 0.1f * dlp;
    1655    14464481 :     ftmp = dlp - hSpMusClas->dlp_mean_LT;
    1656    14464481 :     hSpMusClas->dlp_var_LT = 0.9f * hSpMusClas->dlp_var_LT + 0.1f * ( ftmp * ftmp );
    1657             : 
    1658    14464481 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1659             :     {
    1660      743917 :         dlp_mean2var = 0;
    1661             :     }
    1662             :     else
    1663             :     {
    1664    13720564 :         dlp_mean2var = fabsf( hSpMusClas->dlp_mean_LT ) / ( sqrtf( fabsf( hSpMusClas->dlp_var_LT ) ) + 1.0f );
    1665             :     }
    1666             : 
    1667    14464481 :     if ( dlp_mean2var > 15.0f )
    1668             :     {
    1669             :         /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */
    1670      206323 :         wght *= 0.9f;
    1671             :     }
    1672             : 
    1673    14464481 :     if ( wght > 1.0f )
    1674             :     {
    1675           0 :         wght = 1.0f;
    1676             :     }
    1677    14464481 :     else if ( wght < 0.01f )
    1678             :     {
    1679           0 :         wght = 0.01f;
    1680             :     }
    1681             : 
    1682    14464481 :     if ( Etot < 10 )
    1683             :     {
    1684             :         /* silence */
    1685     2476523 :         wght = 0.92f;
    1686             :     }
    1687             : 
    1688             :     /* calculate weighted decision */
    1689    14464481 :     hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp;
    1690             : 
    1691             :     /* xtalk classifier: apply long hysteresis to prevent LRTD on music */
    1692    14464481 :     hSpMusClas->wdlp_xtalk = 0.995f * hSpMusClas->wdlp_xtalk + 0.005f * dlp;
    1693             : 
    1694             :     /*------------------------------------------------------------------*
    1695             :      * Final speech/music decision
    1696             :      *------------------------------------------------------------------*/
    1697             : 
    1698    14464481 :     if ( flag_spitch )
    1699             :     {
    1700      905129 :         hSpMusClas->flag_spitch_cnt = 5;
    1701             :     }
    1702    13559352 :     else if ( hSpMusClas->flag_spitch_cnt > 0 )
    1703             :     {
    1704      111914 :         hSpMusClas->flag_spitch_cnt--;
    1705             :     }
    1706             : 
    1707    14464481 :     if ( Etot < 10 )
    1708             :     {
    1709             :         /* silence */
    1710     2476523 :         dec = 0;
    1711             :     }
    1712    11987958 :     else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1713             :     {
    1714             :         /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
    1715      742766 :         ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp;
    1716      742766 :         ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 );
    1717      742766 :         if ( ftmp > 2.0f )
    1718             :         {
    1719      352403 :             if ( dlp > 2.0f )
    1720             :             {
    1721      224251 :                 dec = 2;
    1722             :             }
    1723             :             else
    1724             :             {
    1725      128152 :                 dec = 1;
    1726             :             }
    1727             :         }
    1728             :         else
    1729             :         {
    1730      390363 :             dec = 0;
    1731             :         }
    1732             :     }
    1733             :     else
    1734             :     {
    1735             :         /* stable active state */
    1736    11245192 :         if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 &&
    1737     5034150 :              ( ( hSpMusClas->flag_spitch_cnt > 0 && hSpMusClas->wdlp_0_95_sp > 3.4f ) || ( hSpMusClas->flag_spitch_cnt == 0 && hSpMusClas->wdlp_0_95_sp > 2.1f ) ) )
    1738             :         {
    1739             :             /* switching from speech to unclear */
    1740       20516 :             dec = 1;
    1741             :         }
    1742    11224676 :         else if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->vad_0_1_cnt < 50 && hSpMusClas->relE_attack_sum == 0.0f && hSpMusClas->wdlp_0_95_sp > 1.0f )
    1743             :         {
    1744             :             /* switch from speech to unclear also during slowly rising weak music onsets */
    1745       31482 :             dec = 1;
    1746             :         }
    1747    11193194 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp > 2.5f )
    1748             :         {
    1749             :             /* switching from unclear to music */
    1750       43693 :             dec = 2;
    1751             :         }
    1752    11149501 :         else if ( hSpMusClas->past_dec[0] == 2 && hSpMusClas->past_dec[1] == 2 && hSpMusClas->past_dec[2] == 2 && hSpMusClas->wdlp_0_95_sp < -1.0f )
    1753             :         {
    1754             :             /* switching from music to unclear */
    1755       31271 :             dec = 1;
    1756             :         }
    1757    11118230 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < -2.5f )
    1758             :         {
    1759             :             /* switching from unclear to speech */
    1760       31497 :             dec = 0;
    1761             :         }
    1762             :         else
    1763             :         {
    1764    11086733 :             dec = hSpMusClas->past_dec[0];
    1765             :         }
    1766             :     }
    1767             : 
    1768             :     /*------------------------------------------------------------------*
    1769             :      * raw S/M decision based on smoothed GMM score
    1770             :      *------------------------------------------------------------------*/
    1771             : 
    1772    14464481 :     if ( dec == 0 || st->hSpMusClas->wdlp_0_95_sp <= 0 )
    1773             :     {
    1774     8712956 :         st->sp_aud_decision0 = 0;
    1775     8712956 :         st->sp_aud_decision1 = 0;
    1776             :     }
    1777             :     else
    1778             :     {
    1779     5751525 :         st->sp_aud_decision0 = 1;
    1780     5751525 :         st->sp_aud_decision1 = 1;
    1781             :     }
    1782             : 
    1783             :     /*------------------------------------------------------------------*
    1784             :      * Updates
    1785             :      *------------------------------------------------------------------*/
    1786             : 
    1787             :     /* update buffer of past non-binary decisions */
    1788    14464481 :     mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 );
    1789    14464481 :     hSpMusClas->past_dlp[0] = dlp;
    1790             : 
    1791    14464481 :     mvr2r( &hSpMusClas->past_dlp_mean_ST[0], &hSpMusClas->past_dlp_mean_ST[1], HANG_LEN - 2 );
    1792    14464481 :     hSpMusClas->past_dlp_mean_ST[0] = hSpMusClas->dlp_mean_ST;
    1793             : 
    1794             :     /* update buffer of past binary decisions */
    1795    14464481 :     mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
    1796    14464481 :     hSpMusClas->past_dec[0] = dec;
    1797             : 
    1798             : #ifdef DEBUG_MODE_INFO
    1799             :     dbgwrite( &st->hSpMusClas->wdlp_0_95_sp, sizeof( float ), 1, 1, "res/wdlp_0_95_sp.x" );
    1800             : #endif
    1801             : 
    1802    14464481 :     return dec;
    1803             : }
    1804             : 
    1805             : /*---------------------------------------------------------------------*
    1806             :  * ivas_smc_mode_selection()
    1807             :  *
    1808             :  * 2nd stage speech/music classifier (select coding mode (ACELP, GSC and TCX) based on S/M classification)
    1809             :  * output (sp_aud_decision1 - sp_aud_decision2 -> coding mode):
    1810             :  * 0 - 0 -> ACELP
    1811             :  * 1 - 0 -> GSC
    1812             :  * 1 - 1 -> TCX
    1813             :  *---------------------------------------------------------------------*/
    1814             : 
    1815     3856787 : void ivas_smc_mode_selection(
    1816             :     Encoder_State *st,           /* i/o: encoder state structure                 */
    1817             :     const int32_t element_brate, /* i  : element bitrate                         */
    1818             :     int16_t smc_dec,             /* i  : raw decision of the 1st stage classifier*/
    1819             :     const float relE,            /* i  : relative frame energy                   */
    1820             :     const float Etot,            /* i  : total frame energy                      */
    1821             :     int16_t *attack_flag,        /* i/o: attack flag (GSC or TC)                 */
    1822             :     const float *inp,            /* i  : input signal                            */
    1823             :     const float S_map[],         /* i  : short-term correlation map              */
    1824             :     const int16_t flag_spitch    /* i  : flag to indicate very short stable pitch*/
    1825             : )
    1826             : {
    1827             :     int16_t attack;
    1828             :     float ton;
    1829             :     int16_t i;
    1830             :     float S_p2a, S_max, S_ave;
    1831             :     float thr_sp2a;
    1832             : 
    1833     3856787 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    1834             : 
    1835             :     /* initialization */
    1836     3856787 :     *attack_flag = 0;
    1837     3856787 :     st->sp_aud_decision2 = 0;
    1838             : 
    1839             :     /* signal stability estimation */
    1840     3856787 :     stab_est( Etot, hSpMusClas->gsc_lt_diff_etot, &hSpMusClas->gsc_mem_etot, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
    1841             : 
    1842             :     /* calculate variance of correlation */
    1843     3856787 :     var_cor_calc( st->old_corr, &hSpMusClas->mold_corr, hSpMusClas->var_cor_t, &hSpMusClas->high_stable_cor );
    1844             : 
    1845             :     /* attack detection */
    1846     3856787 :     attack = attack_det( inp, st->clas, st->localVAD, st->coder_type, 0, st->element_mode, st->clas, hSpMusClas->finc_prev, &hSpMusClas->lt_finc, &hSpMusClas->last_strong_attack );
    1847             : 
    1848             :     /* tonal detector */
    1849     3856787 :     ton = tonal_det( S_map, st->vad_flag, hSpMusClas->tod_S_map_lt, &hSpMusClas->tod_thr_lt, &hSpMusClas->tod_weight, &hSpMusClas->tod_S_mass_prev, &hSpMusClas->tod_S_mass_lt );
    1850             : 
    1851             : 
    1852             :     /* calculate spectral peak-to-average ratio */
    1853   312399747 :     for ( i = 0; i < TOD_NSPEC; i++ )
    1854             :     {
    1855   308542960 :         st->hSpMusClas->tod_lt_Bin_E[i] = P2A_FACT * st->hSpMusClas->tod_lt_Bin_E[i] + ( 1 - P2A_FACT ) * st->Bin_E[i];
    1856             :     }
    1857             : 
    1858     3856787 :     maximum( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC, &S_max );
    1859     3856787 :     S_ave = sum_f( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC ) / TOD_NSPEC;
    1860     3856787 :     S_p2a = S_max - S_ave;
    1861             : 
    1862     3856787 :     if ( element_brate <= IVAS_16k4 )
    1863             :     {
    1864     1149554 :         thr_sp2a = THR_P2A_HIGH;
    1865             :     }
    1866             :     else
    1867             :     {
    1868     2707233 :         thr_sp2a = THR_P2A;
    1869             :     }
    1870             : 
    1871             :     /* initial 3-way selection of coding modes (ACELP/GSC/TCX) */
    1872     3856787 :     if ( relE > -10.0f && ( S_p2a > thr_sp2a || ton > hSpMusClas->tod_thr_lt ) )
    1873             :     {
    1874             :         /* select TCX to encode extremely peaky signals or strongly tonal signals */
    1875      206213 :         st->sp_aud_decision1 = 1;
    1876      206213 :         st->sp_aud_decision2 = 1;
    1877             :     }
    1878     3650574 :     else if ( smc_dec == SPEECH )
    1879             :     {
    1880             :         /* select ACELP to encode speech */
    1881     1173404 :         st->sp_aud_decision1 = 0;
    1882     1173404 :         st->sp_aud_decision2 = 0;
    1883             :     }
    1884     2477170 :     else if ( smc_dec == SPEECH_OR_MUSIC )
    1885             :     {
    1886             :         /* select GSC to encode "unclear" segments (classifier's score on the borderline) */
    1887       61184 :         st->sp_aud_decision1 = 1;
    1888       61184 :         st->sp_aud_decision2 = 0;
    1889             :     }
    1890             :     else
    1891             :     {
    1892             :         /* select TCX to encode music */
    1893     2415986 :         st->sp_aud_decision1 = 1;
    1894     2415986 :         st->sp_aud_decision2 = 1;
    1895             :     }
    1896             : 
    1897             :     /* change decision from GSC to ACELP/TCX in some special cases */
    1898     3856787 :     if ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 )
    1899             :     {
    1900       61184 :         if ( hSpMusClas->ener_RAT < 0.18f && hSpMusClas->lt_dec_thres > 15.0f )
    1901             :         {
    1902             :             /* prevent GSC on strong music with almost no content below 1kHz */
    1903         107 :             st->sp_aud_decision2 = 1;
    1904             :         }
    1905       61077 :         else if ( flag_spitch )
    1906             :         {
    1907             :             /* prevent GSC on signals with very short and stable high pitch period */
    1908        2459 :             if ( hSpMusClas->wdlp_0_95_sp < 2.5f )
    1909             :             {
    1910             :                 /* select ACELP instead */
    1911        2334 :                 st->sp_aud_decision1 = 0;
    1912             :             }
    1913             :             else
    1914             :             {
    1915             :                 /* select TCX instead */
    1916         125 :                 st->sp_aud_decision2 = 1;
    1917             :             }
    1918             :         }
    1919       58618 :         else if ( hSpMusClas->high_stable_cor && st->pitch[0] >= 130 )
    1920             :         {
    1921             :             /* prevent GSC in highly correlated signal with low energy variation */
    1922             :             /* this is basically a patch against bassoon-type of music */
    1923           2 :             st->sp_aud_decision2 = 1;
    1924             :         }
    1925             :     }
    1926             : 
    1927             :     /* change decision from GSC to ACELP TC during attacks/onsets */
    1928     3856787 :     if ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 )
    1929             :     {
    1930       58616 :         if ( ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] > 4.5f ) &&
    1931        3865 :              ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] - hSpMusClas->gsc_lt_diff_etot[MAX_LT - 2] > 10.0f ) )
    1932             :         {
    1933        1055 :             if ( st->tc_cnt == 1 )
    1934             :             {
    1935             :                 /* do ACELP TC coding instead of GC/VC if onset has been already declared before */
    1936         283 :                 st->sp_aud_decision1 = 0;
    1937         283 :                 st->coder_type = TRANSITION;
    1938             :             }
    1939             :             else
    1940             :             {
    1941         772 :                 if ( attack >= ATT_3LSUB_POS )
    1942             :                 {
    1943             :                     /* do ACELP TC coding also if attack is located in the last subframe */
    1944         223 :                     st->sp_aud_decision1 = 0;
    1945         223 :                     *attack_flag = attack + 1;
    1946         223 :                     st->coder_type = TRANSITION;
    1947             :                 }
    1948         549 :                 else if ( attack >= ATT_SEG_LEN / 2 )
    1949             :                 {
    1950             :                     /* do GSC coding if attack is located after the first quarter of the first subframe */
    1951             :                     /* (pre-echo will be treated at the decoder side) */
    1952          69 :                     *attack_flag = 31;
    1953          69 :                     *attack_flag = attack + 1;
    1954             :                 }
    1955             :             }
    1956             :         }
    1957             :     }
    1958             : 
    1959     3856787 :     if ( st->localVAD == 1 && st->coder_type == GENERIC && attack > 0 /*&& *attack_flag < 32*/ /*&& st->tc_cnt != 2*/ && !( st->sp_aud_decision2 == 1 && ton > 0.65f ) )
    1960             :     {
    1961             :         /* change ACELP coder_type to TC if attack has been detected */
    1962       66036 :         st->sp_aud_decision1 = 0;
    1963       66036 :         st->sp_aud_decision2 = 0;
    1964             : 
    1965       66036 :         st->coder_type = TRANSITION;
    1966       66036 :         *attack_flag = attack + 1;
    1967             :     }
    1968             : 
    1969             : #ifdef DEBUGGING
    1970             :     if ( st->idchan == 0 && st->coder_type != INACTIVE )
    1971             :     {
    1972             :         if ( st->force == FORCE_GSC && element_brate < IVAS_24k4 )
    1973             :         {
    1974             :             /* enforce GSC */
    1975             :             st->sp_aud_decision1 = 1;
    1976             :             st->sp_aud_decision2 = 0;
    1977             :         }
    1978             :         else if ( st->force == FORCE_SPEECH && ( st->sp_aud_decision1 == 1 || st->sp_aud_decision2 == 1 ) )
    1979             :         {
    1980             :             if ( element_brate < IVAS_24k4 )
    1981             :             {
    1982             :                 /* convert TCX to GSC */
    1983             :                 st->sp_aud_decision1 = 1;
    1984             :                 st->sp_aud_decision2 = 0;
    1985             :             }
    1986             :             else
    1987             :             {
    1988             :                 /* convert TCX to ACELP */
    1989             :                 st->sp_aud_decision1 = 0;
    1990             :                 st->sp_aud_decision2 = 0;
    1991             :             }
    1992             :         }
    1993             :         else if ( st->force == FORCE_MUSIC )
    1994             :         {
    1995             :             /* enforce TCX */
    1996             :             st->sp_aud_decision1 = 1;
    1997             :             st->sp_aud_decision2 = 1;
    1998             :         }
    1999             :     }
    2000             : #endif
    2001             : 
    2002             :     /* set GSC noisy speech flag on unvoiced SWB segments */
    2003     3856787 :     st->GSC_noisy_speech = 0;
    2004     3856787 :     if ( st->vad_flag == 1 && element_brate <= IVAS_16k4 && st->lp_noise > 30.0f && st->sp_aud_decision1 == 0 && st->bwidth >= SWB && st->coder_type_raw == UNVOICED )
    2005             :     {
    2006        1561 :         st->GSC_noisy_speech = 1;
    2007             :     }
    2008             : 
    2009             :     /* set GSC submode */
    2010     3856787 :     if ( st->element_mode > EVS_MONO && ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 ) && st->total_brate > STEREO_GSC_BIT_RATE_ALLOC ) /* below STEREO_GSC_BIT_RATE_ALLOC, fall back on normal GSC */
    2011             :     {
    2012       54515 :         st->GSC_IVAS_mode = 1;
    2013       54515 :         if ( st->hSpMusClas->wdlp_0_95_sp > 0.0f )
    2014             :         {
    2015             :             /* music-like content */
    2016       35830 :             st->GSC_IVAS_mode = 3;
    2017             :         }
    2018       18685 :         else if ( st->tc_cnt > 0 )
    2019             :         {
    2020             :             /* likely presence of an onset, GSC bit allocation will be more focused on LF */
    2021        1705 :             st->GSC_IVAS_mode = 2;
    2022             :         }
    2023             : 
    2024       54515 :         if ( st->coder_type_raw == UNVOICED && st->sp_aud_decision0 == 0 /*&& st->GSC_IVAS_mode < 3*/ )
    2025             :         {
    2026        4048 :             st->GSC_noisy_speech = 1;
    2027             :         }
    2028             :         else
    2029             :         {
    2030       50467 :             st->GSC_noisy_speech = 0;
    2031             :         }
    2032             :     }
    2033             : 
    2034             :     /* set coder_type to AUDIO when GSC is selected (st->core will be set later in the decision matrix) */
    2035     3856787 :     if ( ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 ) || st->GSC_noisy_speech )
    2036             :     {
    2037       59034 :         st->coder_type = AUDIO;
    2038       59034 :         if ( st->hGSCEnc != NULL && st->GSC_noisy_speech == 0 ) /* In case of GSC_noisy_speech, NOISE_LEVEL should remain at NOISE_LEVEL_SP3 */
    2039             :         {
    2040       53425 :             st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
    2041             :         }
    2042             :     }
    2043             : 
    2044     3856787 :     return;
    2045             : }
    2046             : 
    2047             : 
    2048             : /*------------------------------------------------------------------------*
    2049             :  * music_mixed_classif_improv()
    2050             :  *
    2051             :  * Improve 1st stage speech/music decision for mixed&music signals
    2052             :  *------------------------------------------------------------------------*/
    2053             : 
    2054       52755 : static void music_mixed_classif_improv(
    2055             :     Encoder_State *st,      /* i/o: Encoder state structure                         */
    2056             :     const float *new_inp,   /* i  : new input signal                                */
    2057             :     const float *epsP,      /* i  : LP prediction error                             */
    2058             :     const float etot,       /* i  : total frame energy                              */
    2059             :     const float old_cor,    /* i  : normalized correlation                          */
    2060             :     const float cor_map_sum /* i  : correlation map sum                             */
    2061             : )
    2062             : {
    2063             :     int16_t i, dec, len, percus_flag;
    2064             :     float p2v_map[128], ftmp, ftmp1, lt_diff, log_max_spl, epsP_tilt, max_spl;
    2065             : 
    2066       52755 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2067             : 
    2068             :     /* find sample with maximum absolute amplitude */
    2069       52755 :     max_spl = 0;
    2070    13558035 :     for ( i = 0; i < L_FRAME; i++ )
    2071             :     {
    2072    13505280 :         if ( fabs( new_inp[i] ) > max_spl )
    2073             :         {
    2074      525845 :             max_spl = fabsf( new_inp[i] );
    2075             :         }
    2076             :     }
    2077             : 
    2078             :     /* music is considered only appearing in high SNR condition and active signal */
    2079       52755 :     if ( st->vad_flag == 0 || st->lp_speech - st->lp_noise < 25 )
    2080             :     {
    2081       15316 :         hSpMusClas->dec_mov = 0.5f;
    2082       15316 :         hSpMusClas->dec_mov1 = 0.5f;
    2083             : 
    2084       15316 :         if ( st->vad_flag == 0 )
    2085             :         {
    2086        7353 :             hSpMusClas->onset_cnt = 0;
    2087             :         }
    2088             : 
    2089       15316 :         return;
    2090             :     }
    2091             : 
    2092       37439 :     hSpMusClas->onset_cnt++;
    2093             : 
    2094       37439 :     if ( hSpMusClas->onset_cnt > 9 )
    2095             :     {
    2096       36499 :         hSpMusClas->onset_cnt = 9;
    2097             :     }
    2098             : 
    2099       37439 :     if ( hSpMusClas->onset_cnt == 1 )
    2100             :     {
    2101         109 :         set_f( hSpMusClas->buf_flux, -100, BUF_LEN );
    2102             :     }
    2103             : 
    2104             :     /* spectral analysis */
    2105       37439 :     spec_analysis( st->Bin_E, p2v_map );
    2106             : 
    2107             :     /* percussive music detection */
    2108       37439 :     log_max_spl = 20 * logf( max_spl + 0.0001f );
    2109       37439 :     lt_diff = log_max_spl - hSpMusClas->mov_log_max_spl;
    2110             : 
    2111      149756 :     for ( i = 0; i < 3; i++ )
    2112             :     {
    2113      112317 :         hSpMusClas->buf_etot[i] = hSpMusClas->buf_etot[i + 1];
    2114             :     }
    2115       37439 :     hSpMusClas->buf_etot[i] = etot;
    2116             : 
    2117       37439 :     percus_flag = 0;
    2118       37439 :     if ( hSpMusClas->buf_etot[1] - hSpMusClas->buf_etot[0] > 6 && hSpMusClas->buf_etot[2] < hSpMusClas->buf_etot[1] && hSpMusClas->buf_etot[1] - st->lp_speech > 3 )
    2119             :     {
    2120         223 :         if ( hSpMusClas->buf_etot[1] - hSpMusClas->buf_etot[3] > 3 && hSpMusClas->buf_etot[3] < hSpMusClas->buf_etot[2] && 0.5f * ( 0.5f * ( st->voicing[0] + st->voicing[1] ) + old_cor ) < 0.75f )
    2121             :         {
    2122          33 :             if ( hSpMusClas->dec_mov > 0.8f )
    2123             :             {
    2124           2 :                 percus_flag = 1;
    2125             :             }
    2126          31 :             else if ( old_cor < 0.75f && st->voicing[0] < 0.75f && st->voicing[1] < 0.75f && hSpMusClas->old_lt_diff[0] > 10 )
    2127             :             {
    2128           0 :                 percus_flag = 1;
    2129             :             }
    2130             :         }
    2131             :     }
    2132             : 
    2133             :     /* sound attack detection */
    2134       37439 :     if ( hSpMusClas->buf_etot[3] - hSpMusClas->buf_etot[2] > 6 && hSpMusClas->dec_mov > 0.9f && etot - st->lp_speech > 5 && hSpMusClas->old_lt_diff[0] > 5 )
    2135             :     {
    2136           0 :         hSpMusClas->attack_hangover = 3;
    2137             :     }
    2138             : 
    2139       37439 :     if ( st->voicing[0] > 0.9f && st->voicing[1] > 0.9f )
    2140             :     {
    2141        9187 :         if ( log_max_spl > hSpMusClas->mov_log_max_spl )
    2142             :         {
    2143         411 :             hSpMusClas->mov_log_max_spl = 0.75f * hSpMusClas->mov_log_max_spl + ( 1 - 0.75f ) * log_max_spl;
    2144             :         }
    2145             :         else
    2146             :         {
    2147        8776 :             hSpMusClas->mov_log_max_spl = 0.995f * hSpMusClas->mov_log_max_spl + ( 1 - 0.995f ) * log_max_spl;
    2148             :         }
    2149             :     }
    2150             : 
    2151       37439 :     hSpMusClas->old_lt_diff[0] = hSpMusClas->old_lt_diff[1];
    2152       37439 :     hSpMusClas->old_lt_diff[1] = lt_diff;
    2153             : 
    2154             :     /* calculate and buffer spectral energy fluctuation */
    2155       37439 :     flux( st->Bin_E, p2v_map, hSpMusClas->old_Bin_E, hSpMusClas->buf_flux, hSpMusClas->attack_hangover, hSpMusClas->dec_mov );
    2156             : 
    2157       37439 :     hSpMusClas->attack_hangover--;
    2158       37439 :     if ( hSpMusClas->attack_hangover < 0 )
    2159             :     {
    2160       37439 :         hSpMusClas->attack_hangover = 0;
    2161             :     }
    2162             : 
    2163             :     /* identify flux buffer status */
    2164       37439 :     len = 0;
    2165     2125319 :     for ( i = BUF_LEN - 1; i >= 0 && hSpMusClas->buf_flux[i] >= 0; i-- )
    2166             :     {
    2167     2087880 :         len++;
    2168             :     }
    2169             : 
    2170             :     /* reset flux buffer if percussive music is detected */
    2171       37439 :     if ( percus_flag == 1 )
    2172             :     {
    2173           2 :         set_f( &hSpMusClas->buf_flux[BUF_LEN - len], 5, len );
    2174             :     }
    2175             : 
    2176             :     /* calculate and buffer the tilt of residual LP analysis energies */
    2177       37439 :     ftmp = 0.00001f;
    2178       37439 :     ftmp1 = 0;
    2179      599024 :     for ( i = 1; i < 16; i++ )
    2180             :     {
    2181      561585 :         ftmp += epsP[i] * epsP[i];
    2182      561585 :         ftmp1 += epsP[i] * epsP[i + 1];
    2183             :     }
    2184             : 
    2185       37439 :     epsP_tilt = ftmp1 / ftmp;
    2186             : 
    2187     2246340 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2188             :     {
    2189     2208901 :         hSpMusClas->buf_epsP_tilt[i] = hSpMusClas->buf_epsP_tilt[i + 1];
    2190             :     }
    2191       37439 :     hSpMusClas->buf_epsP_tilt[i] = epsP_tilt;
    2192             : 
    2193             :     /* calculate and buffer highband spectral peakness */
    2194       37439 :     tonal_dist( p2v_map, hSpMusClas->buf_pkh, hSpMusClas->buf_Ntonal, hSpMusClas->buf_Ntonal2, hSpMusClas->buf_Ntonal_lf );
    2195             : 
    2196             :     /* buffer sum of correlation map */
    2197     2246340 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2198             :     {
    2199     2208901 :         hSpMusClas->buf_cor_map_sum[i] = hSpMusClas->buf_cor_map_sum[i + 1];
    2200             :     }
    2201       37439 :     hSpMusClas->buf_cor_map_sum[i] = cor_map_sum;
    2202             : 
    2203             :     /* buffer voicing metric */
    2204      374390 :     for ( i = 0; i < 9; i++ )
    2205             :     {
    2206      336951 :         hSpMusClas->buf_dlp[i] = hSpMusClas->buf_dlp[i + 1];
    2207             :     }
    2208       37439 :     hSpMusClas->buf_dlp[i] = hSpMusClas->lps - hSpMusClas->lpm;
    2209             : 
    2210             :     /* classification */
    2211       37439 :     dec = mode_decision( st, len, &hSpMusClas->dec_mov, hSpMusClas->buf_flux, hSpMusClas->buf_epsP_tilt, hSpMusClas->buf_pkh, hSpMusClas->buf_cor_map_sum, hSpMusClas->buf_Ntonal, hSpMusClas->buf_Ntonal2, hSpMusClas->buf_Ntonal_lf, hSpMusClas->buf_dlp );
    2212             : 
    2213             :     /* update long term moving average of the classification decisions */
    2214       37439 :     if ( len > 30 )
    2215             :     {
    2216       34680 :         hSpMusClas->dec_mov = 0.97f * hSpMusClas->dec_mov + ( 1 - 0.97f ) * dec;
    2217       34680 :         hSpMusClas->dec_mov1 = 0.97f * hSpMusClas->dec_mov1 + ( 1 - 0.97f ) * dec;
    2218             :     }
    2219             : 
    2220             :     /* update long-term unvoiced counter */
    2221       37439 :     if ( ( st->coder_type_raw == UNVOICED || st->coder_type_raw == INACTIVE ) && etot > 1.5f && hSpMusClas->buf_Ntonal2[59] < 2 )
    2222             :     {
    2223        1607 :         hSpMusClas->UV_cnt1 -= 8;
    2224             :     }
    2225             :     else
    2226             :     {
    2227       35832 :         hSpMusClas->UV_cnt1++;
    2228             :     }
    2229             : 
    2230       37439 :     if ( hSpMusClas->UV_cnt1 > 300 )
    2231             :     {
    2232       23019 :         hSpMusClas->UV_cnt1 = 300;
    2233             :     }
    2234       14420 :     else if ( hSpMusClas->UV_cnt1 < 0 )
    2235             :     {
    2236           4 :         hSpMusClas->UV_cnt1 = 0;
    2237             :     }
    2238             : 
    2239       37439 :     hSpMusClas->LT_UV_cnt1 = 0.9f * hSpMusClas->LT_UV_cnt1 + 0.1f * hSpMusClas->UV_cnt1;
    2240             : 
    2241             :     /* revert classification decision due to long-term unvoiced counter */
    2242       37439 :     if ( dec == 1 && hSpMusClas->dec_mov1 < 0.2f && hSpMusClas->LT_UV_cnt1 < 200 )
    2243             :     {
    2244          40 :         dec = 0;
    2245             :     }
    2246             : 
    2247             :     /* overwrite 1st stage speech/music decision to music */
    2248       37439 :     if ( dec == 1 )
    2249             :     {
    2250       10852 :         st->sp_aud_decision1 = 1;
    2251             :     }
    2252             : 
    2253       37439 :     return;
    2254             : }
    2255             : 
    2256             : 
    2257             : /*---------------------------------------------------------------------*
    2258             :  * spec_analysis()
    2259             :  *
    2260             :  * Spectral analysis for mixed/music classification improvement
    2261             :  *---------------------------------------------------------------------*/
    2262             : 
    2263       37439 : static void spec_analysis(
    2264             :     float *Bin_E,  /* i  : log energy spectrum of the current frame        */
    2265             :     float *p2v_map /* o  : spectral peakiness map                          */
    2266             : )
    2267             : {
    2268             :     int16_t i, k, m;
    2269             :     float peak[L_FFT / 4 + 1];
    2270             :     float valley[L_FFT / 4 + 1];
    2271             :     int16_t peak_idx[L_FFT / 4 + 1];
    2272             :     int16_t valey_idx[L_FFT / 4 + 1];
    2273             :     float p2v[L_FFT / 4 + 1];
    2274             : 
    2275             :     /* find spectral peaks */
    2276       37439 :     k = 0;
    2277     4717314 :     for ( i = 1; i < L_FFT / 2 - 2; i++ )
    2278             :     {
    2279     4679875 :         if ( Bin_E[i] > Bin_E[i - 1] && Bin_E[i] > Bin_E[i + 1] )
    2280             :         {
    2281     1256698 :             peak[k] = Bin_E[i];
    2282     1256698 :             peak_idx[k] = i;
    2283     1256698 :             k++;
    2284             :         }
    2285             :     }
    2286       37439 :     assert( k + 1 < L_FFT / 4 + 1 );
    2287       37439 :     peak_idx[k] = -1;
    2288       37439 :     peak_idx[k + 1] = -1;
    2289             : 
    2290       37439 :     if ( k == 0 )
    2291             :     {
    2292         768 :         for ( i = 0; i < L_FFT / 2 - 1; i++ )
    2293             :         {
    2294         762 :             p2v_map[i] = 0;
    2295             :         }
    2296             : 
    2297           6 :         return;
    2298             :     }
    2299             : 
    2300             :     /* find spectral valleys */
    2301       37433 :     m = 0;
    2302       37433 :     if ( Bin_E[0] < Bin_E[1] )
    2303             :     {
    2304       19380 :         valley[0] = Bin_E[0];
    2305       19380 :         valey_idx[0] = 0;
    2306       19380 :         m++;
    2307             :     }
    2308             : 
    2309       37433 :     k = L_FFT / 2 - 2;
    2310       64363 :     for ( i = L_FFT / 2 - 3; i >= 0 && Bin_E[i + 1] > Bin_E[i]; i-- )
    2311             :     {
    2312       26930 :         k = i;
    2313             :     }
    2314             : 
    2315     4689628 :     for ( i = 1; i < k; i++ )
    2316             :     {
    2317     4652195 :         if ( Bin_E[i] < Bin_E[i - 1] && Bin_E[i] < Bin_E[i + 1] )
    2318             :         {
    2319     1237318 :             valley[m] = Bin_E[i];
    2320     1237318 :             valey_idx[m] = i;
    2321     1237318 :             m++;
    2322             :         }
    2323             :     }
    2324             : 
    2325       37433 :     valley[m] = Bin_E[k];
    2326       37433 :     valey_idx[m] = k;
    2327             : 
    2328             :     /* find spectral peak to valley distances */
    2329       37433 :     k = 0;
    2330     1294131 :     for ( i = 0; i < m; i++ )
    2331             :     {
    2332     1256698 :         if ( peak_idx[k] > valey_idx[i] && peak_idx[k] < valey_idx[i + 1] )
    2333             :         {
    2334     1256698 :             p2v[k] = 2 * peak[k] - valley[i] - valley[i + 1];
    2335     1256698 :             k++;
    2336             :         }
    2337             :     }
    2338             : 
    2339     4791424 :     for ( i = 0; i < L_FFT / 2 - 1; i++ )
    2340             :     {
    2341     4753991 :         p2v_map[i] = 0;
    2342             :     }
    2343             : 
    2344     1294131 :     for ( i = 0; i < k; i++ )
    2345             :     {
    2346     1256698 :         p2v_map[peak_idx[i]] = p2v[i];
    2347             :     }
    2348             : 
    2349       37433 :     return;
    2350             : }
    2351             : 
    2352             : /*---------------------------------------------------------------------*
    2353             :  * flux()
    2354             :  *
    2355             :  * Calculation of spectral flux
    2356             :  *---------------------------------------------------------------------*/
    2357             : 
    2358       37439 : static void flux(
    2359             :     float *Bin_E,            /* i  : log energy spectrum of the current frame        */
    2360             :     float *p2v_map,          /* i  : spectral peakiness map                          */
    2361             :     float *old_Bin_E,        /* i/o: log energy spectrum of the frame 60ms ago       */
    2362             :     float *buf_flux,         /* i/o: buffer storing spectral energy fluctuation      */
    2363             :     int16_t attack_hangover, /* i/o: hangover preventing flux buffering              */
    2364             :     float dec_mov            /* i/o: moving average of classifier decision           */
    2365             : )
    2366             : {
    2367             :     int16_t i;
    2368             :     float *pt1, *pt2, *pt3, *pt4, *pt5, *pt6;
    2369             :     float flux;
    2370             :     int16_t cnt;
    2371             : 
    2372             :     /* calculate flux */
    2373       37439 :     flux = 0;
    2374       37439 :     cnt = 0;
    2375     1609877 :     for ( i = 0; i < N_OLD_BIN_E; i++ )
    2376             :     {
    2377     1572438 :         if ( p2v_map[i] != 0 )
    2378             :         {
    2379      389399 :             flux += fabsf( Bin_E[i] - old_Bin_E[i] );
    2380      389399 :             cnt++;
    2381             :         }
    2382             :     }
    2383             : 
    2384       37439 :     if ( cnt == 0 )
    2385             :     {
    2386           6 :         flux = 5;
    2387             :     }
    2388             :     else
    2389             :     {
    2390       37433 :         flux = flux / (float) cnt;
    2391             :     }
    2392             : 
    2393       37439 :     if ( flux > 20 && dec_mov > 0.8f )
    2394             :     {
    2395         940 :         flux = 20;
    2396             :     }
    2397             : 
    2398             :     /* update old Bin_E buffer */
    2399       37439 :     pt1 = old_Bin_E;
    2400       37439 :     pt2 = old_Bin_E + N_OLD_BIN_E;
    2401       37439 :     pt3 = Bin_E;
    2402       37439 :     pt4 = old_Bin_E + N_OLD_BIN_E;
    2403       37439 :     pt5 = old_Bin_E + 2 * N_OLD_BIN_E;
    2404       37439 :     pt6 = old_Bin_E + 2 * N_OLD_BIN_E;
    2405             : 
    2406     1609877 :     for ( i = 0; i < N_OLD_BIN_E; i++ )
    2407             :     {
    2408     1572438 :         *pt1++ = *pt2++;
    2409     1572438 :         *pt4++ = *pt5++;
    2410     1572438 :         *pt6++ = *pt3++;
    2411             :     }
    2412             : 
    2413             :     /* update flux buffer */
    2414       37439 :     if ( attack_hangover <= 0 )
    2415             :     {
    2416     2246340 :         for ( i = 0; i < BUF_LEN - 1; i++ )
    2417             :         {
    2418     2208901 :             buf_flux[i] = buf_flux[i + 1];
    2419             :         }
    2420             : 
    2421       37439 :         buf_flux[i] = flux;
    2422             :     }
    2423             : 
    2424       37439 :     return;
    2425             : }
    2426             : 
    2427             : 
    2428             : /*---------------------------------------------------------------------*
    2429             :  * tonal_dist()
    2430             :  *
    2431             :  * Calculation of spectral distance
    2432             :  *---------------------------------------------------------------------*/
    2433             : 
    2434       37439 : static void tonal_dist(
    2435             :     float *p2v_map,      /* i  : spectral peakiness map                          */
    2436             :     float *buf_pkh,      /* i/o: buffer storing highband spectral peakiness      */
    2437             :     float *buf_Ntonal,   /* i/o: buffer storing No.of 1st spectral tone          */
    2438             :     float *buf_Ntonal2,  /* i/o: buffer storing No.of 2nd spectral tone          */
    2439             :     float *buf_Ntonal_lf /* i/o: buffer storing low band spectral tone ratio     */
    2440             : )
    2441             : {
    2442             :     int16_t i;
    2443             :     float pk;
    2444             :     int16_t Ntonal;
    2445             :     int16_t Ntonal2;
    2446             :     int16_t Ntonal_lf;
    2447             : 
    2448             :     /* find number of tonals, number of tonals at low-band,
    2449             :     spectral peakiness at high-band */
    2450       37439 :     pk = 0;
    2451       37439 :     Ntonal = 0;
    2452       37439 :     Ntonal2 = 0;
    2453       37439 :     Ntonal_lf = 0;
    2454     2433535 :     for ( i = 0; i < 64; i++ )
    2455             :     {
    2456     2396096 :         if ( p2v_map[i] > 55 )
    2457             :         {
    2458      175285 :             Ntonal++;
    2459             :         }
    2460             : 
    2461     2396096 :         if ( p2v_map[i] > 80 )
    2462             :         {
    2463       96794 :             Ntonal2++;
    2464       96794 :             Ntonal_lf++;
    2465             :         }
    2466             :     }
    2467             : 
    2468     2396096 :     for ( i = 64; i < 127; i++ )
    2469             :     {
    2470     2358657 :         if ( p2v_map[i] != 0 )
    2471             :         {
    2472      651045 :             pk += p2v_map[i];
    2473             :         }
    2474             : 
    2475     2358657 :         if ( p2v_map[i] > 55 )
    2476             :         {
    2477       83834 :             Ntonal++;
    2478             :         }
    2479             : 
    2480     2358657 :         if ( p2v_map[i] > 80 )
    2481             :         {
    2482       33248 :             Ntonal2++;
    2483             :         }
    2484             :     }
    2485             : 
    2486             :     /* update buffers */
    2487     2246340 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2488             :     {
    2489     2208901 :         buf_pkh[i] = buf_pkh[i + 1];
    2490     2208901 :         buf_Ntonal[i] = buf_Ntonal[i + 1];
    2491     2208901 :         buf_Ntonal2[i] = buf_Ntonal2[i + 1];
    2492     2208901 :         buf_Ntonal_lf[i] = buf_Ntonal_lf[i + 1];
    2493             :     }
    2494             : 
    2495       37439 :     buf_pkh[i] = pk;
    2496       37439 :     buf_Ntonal[i] = (float) Ntonal;
    2497       37439 :     buf_Ntonal2[i] = (float) Ntonal2;
    2498       37439 :     buf_Ntonal_lf[i] = (float) Ntonal_lf;
    2499             : 
    2500       37439 :     return;
    2501             : }
    2502             : 
    2503             : 
    2504             : /*---------------------------------------------------------------------*
    2505             :  * mode_decision()
    2506             :  *
    2507             :  * Decision about internal mode of the mixed/music classifier improvement
    2508             :  *---------------------------------------------------------------------*/
    2509             : 
    2510       37439 : static int16_t mode_decision(
    2511             :     Encoder_State *st,
    2512             :     int16_t len,            /* i  : buffering status                                */
    2513             :     float *dec_mov,         /* i/o: moving average of classifier decision           */
    2514             :     float *buf_flux,        /* i  : buffer storing spectral energy fluctuation      */
    2515             :     float *buf_epsP_tilt,   /* i  : buffer storing LP prediciton error tilt         */
    2516             :     float *buf_pkh,         /* i  : buffer storing highband spectral peakiness      */
    2517             :     float *buf_cor_map_sum, /* i  : buffer storing correlation map sum              */
    2518             :     float *buf_Ntonal,      /* i  : buffer storing No.of 1st spectral tone          */
    2519             :     float *buf_Ntonal2,     /* i  : buffer storing No.of 2nd spectral tone          */
    2520             :     float *buf_Ntonal_lf,   /* i  : buffer storing low band spectral tone ratio     */
    2521             :     float *buf_dlp          /* i  : buffer storing voicing estimate                 */
    2522             : )
    2523             : {
    2524             :     int16_t mode;
    2525             :     int16_t i;
    2526             :     int16_t voiced_cnt;
    2527             :     float M_pkh;
    2528             :     float M_cor_map_sum;
    2529             :     float M_Ntonal;
    2530             :     float M_flux;
    2531             :     float V_epsP_tilt;
    2532             :     float lf_Ntonal_ratio;
    2533             : 
    2534       37439 :     mode = *dec_mov > 0.5f;
    2535             : 
    2536       37439 :     if ( len <= 5 )
    2537             :     {
    2538         529 :         return ( mode );
    2539             :     }
    2540       36910 :     else if ( len < 10 )
    2541             :     {
    2542         411 :         M_pkh = mean( buf_pkh + BUF_LEN - len, len );
    2543         411 :         M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - len, len );
    2544         411 :         M_Ntonal = mean( buf_Ntonal + BUF_LEN - len, len );
    2545         411 :         V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - len, len );
    2546             : 
    2547         411 :         voiced_cnt = 0;
    2548        2877 :         for ( i = 9; i > 3; i-- )
    2549             :         {
    2550        2466 :             if ( buf_dlp[i] > 0.0f )
    2551             :             {
    2552         866 :                 voiced_cnt++;
    2553             :             }
    2554             :         }
    2555             : 
    2556         411 :         if ( ( M_pkh > 1100 || V_epsP_tilt < 0.00008f || M_cor_map_sum > 100 ) && voiced_cnt < 4 )
    2557             :         {
    2558          68 :             mode = 1;
    2559             :         }
    2560         343 :         else if ( M_Ntonal > 27 && voiced_cnt < 4 )
    2561             :         {
    2562           0 :             mode = 1;
    2563             :         }
    2564             :     }
    2565             :     else
    2566             :     {
    2567       36499 :         voiced_cnt = 0;
    2568      401489 :         for ( i = 0; i < 10; i++ )
    2569             :         {
    2570      364990 :             if ( buf_dlp[i] > 0.0f )
    2571             :             {
    2572      163863 :                 voiced_cnt++;
    2573             :             }
    2574             :         }
    2575             : 
    2576       36499 :         M_flux = mean( &buf_flux[BUF_LEN - 10], 10 );
    2577       36499 :         M_pkh = mean( buf_pkh + BUF_LEN - 10, 10 );
    2578       36499 :         M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - 10, 10 );
    2579       36499 :         V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - 10, 10 );
    2580             : 
    2581       36499 :         if ( ( M_flux < 8.5f || ( V_epsP_tilt < 0.001f && M_flux < 12.0f ) || M_pkh > 1050 || M_cor_map_sum > 100 ) && voiced_cnt < 3 && mean( &buf_flux[55], 5 ) < 15 )
    2582             :         {
    2583        6923 :             mode = 1;
    2584        6923 :             *dec_mov = 1;
    2585        6923 :             return ( mode );
    2586             :         }
    2587             : 
    2588       29576 :         if ( M_flux > 16.0f || ( M_flux > 15 && voiced_cnt > 2 ) || mean( &buf_flux[55], 5 ) > 19.0f || ( buf_flux[59] >= 20 && st->hSpMusClas->lps - st->hSpMusClas->lpm > 0 ) )
    2589             :         {
    2590       23779 :             *dec_mov = 0;
    2591       23779 :             mode = 0;
    2592       23779 :             return ( mode );
    2593             :         }
    2594             : 
    2595      129567 :         for ( i = 10; i < len; i++ )
    2596             :         {
    2597      126891 :             M_flux = mean( &buf_flux[BUF_LEN - i], i );
    2598      126891 :             M_pkh = mean( buf_pkh + BUF_LEN - i, i );
    2599      126891 :             M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - i, i );
    2600      126891 :             V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - i, i );
    2601             : 
    2602      126891 :             if ( ( ( M_flux < 12 + 0.05f * ( len - 10 ) && mean( &buf_flux[BUF_LEN - 10], 10 ) < 15 ) || V_epsP_tilt < 0.0001f + 0.000018f * ( len - 10 ) || M_pkh > 1050 - 5.0f * ( len - 10 ) || M_cor_map_sum > 95 - 0.3f * ( len - 10 ) ) && voiced_cnt < 3 )
    2603             :             {
    2604        3121 :                 mode = 1;
    2605        3121 :                 return ( mode );
    2606             :             }
    2607             :         }
    2608             : 
    2609        2676 :         if ( len == BUF_LEN )
    2610             :         {
    2611        2331 :             M_Ntonal = mean( buf_Ntonal, BUF_LEN );
    2612        2331 :             lf_Ntonal_ratio = sum_f( buf_Ntonal_lf, BUF_LEN ) / ( sum_f( buf_Ntonal2, BUF_LEN ) + 0.0001f );
    2613             : 
    2614        2331 :             if ( M_Ntonal > 18 || lf_Ntonal_ratio < 0.2f )
    2615             :             {
    2616          20 :                 mode = 1;
    2617             :             }
    2618        2311 :             else if ( M_Ntonal < 1 )
    2619             :             {
    2620           0 :                 mode = 0;
    2621             :             }
    2622             :         }
    2623             :     }
    2624             : 
    2625        3087 :     return ( mode );
    2626             : }
    2627             : 
    2628             : 
    2629             : /*----------------------------------------------------------------------------------*
    2630             :  * tonal_context_improv()
    2631             :  *
    2632             :  * Context-based improvement of 1st/2nd stage speech/music decision on stable tonal signals
    2633             :  *----------------------------------------------------------------------------------*/
    2634             : 
    2635       52755 : static void tonal_context_improv(
    2636             :     Encoder_State *st,          /* i/o: encoder state structure                       */
    2637             :     const float PS[],           /* i  : energy spectrum                               */
    2638             :     const float voi_fv,         /* i  : scaled voicing feature                        */
    2639             :     const float cor_map_sum_fv, /* i  : scaled correlation map feature                */
    2640             :     const float LPCErr          /* i  : scaled LP prediction error feature            */
    2641             : )
    2642             : {
    2643             :     int16_t lt_pitch_diff;
    2644             :     float sort_max, sort_avg, sort_val[80];
    2645             :     float tonality, tonality1, tonality2, tonality3, t2, t3, tL, err, cor, dft;
    2646             : 
    2647       52755 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2648             : 
    2649             :     /* reset in case of codec mode switching */
    2650       52755 :     if ( st->last_codec_mode == MODE2 )
    2651             :     {
    2652       10114 :         set_f( hSpMusClas->tonality2_buf, 0, HANG_LEN_INIT );
    2653       10114 :         set_f( hSpMusClas->tonality3_buf, 0, HANG_LEN_INIT );
    2654       10114 :         set_f( hSpMusClas->LPCErr_buf, 0, HANG_LEN_INIT );
    2655       10114 :         hSpMusClas->lt_music_hangover = 0;
    2656       10114 :         hSpMusClas->lt_music_state = 0;
    2657       10114 :         hSpMusClas->lt_speech_state = 0;
    2658       10114 :         hSpMusClas->lt_speech_hangover = 0;
    2659             :     }
    2660             : 
    2661             :     /* estimate maximum tonality in bands [0-1 kHz], [1-2kHz] and [2-4kHz] */
    2662       52755 :     mvr2r( PS, sort_val, 80 );
    2663             : 
    2664             :     /* tonality in band 0-1 kHz */
    2665       52755 :     v_sort( sort_val, 0, 19 );
    2666       52755 :     sort_max = sort_val[19];
    2667       52755 :     sort_avg = sum_f( &sort_val[0], 10 );
    2668       52755 :     tonality1 = sort_max / sort_avg;
    2669             : 
    2670             :     /* tonality in band 1-2 kHz */
    2671       52755 :     v_sort( sort_val, 20, 39 );
    2672       52755 :     sort_max = sort_val[39];
    2673       52755 :     sort_avg = sum_f( &sort_val[20], 10 );
    2674       52755 :     tonality2 = sort_max / sort_avg;
    2675             : 
    2676             :     /* tonality in band 2-4 kHz */
    2677       52755 :     v_sort( sort_val, 40, 79 );
    2678       52755 :     sort_max = sort_val[79];
    2679       52755 :     sort_avg = sum_f( &sort_val[40], 20 );
    2680       52755 :     tonality3 = sort_max / sort_avg;
    2681             : 
    2682       52755 :     tonality = max( max( tonality1, tonality2 ), tonality3 );
    2683             : 
    2684       52755 :     if ( st->hVAD->hangover_cnt == 10 && st->vad_flag == 1 )
    2685             :     {
    2686             :         /* long-term voicing parameter */
    2687         657 :         hSpMusClas->lt_voicing = 0.1f * hSpMusClas->lt_voicing + 0.9f * *st->voicing;
    2688             : 
    2689             :         /* long-term correlation value */
    2690         657 :         hSpMusClas->lt_corr = 0.1f * hSpMusClas->lt_corr + 0.9f * st->old_corr;
    2691             : 
    2692             :         /* long-term tonality measure */
    2693         657 :         hSpMusClas->lt_tonality = 0.1f * hSpMusClas->lt_tonality + 0.9f * tonality;
    2694             :     }
    2695             :     else
    2696             :     {
    2697             :         /* long-term voicing parameter */
    2698       52098 :         hSpMusClas->lt_voicing = 0.7f * hSpMusClas->lt_voicing + 0.3f * *st->voicing;
    2699             : 
    2700             :         /* long-term correlation value */
    2701       52098 :         hSpMusClas->lt_corr = 0.7f * hSpMusClas->lt_corr + 0.3f * st->old_corr;
    2702             : 
    2703             :         /* long-term tonality measure */
    2704       52098 :         hSpMusClas->lt_tonality = 0.5f * hSpMusClas->lt_tonality + 0.5f * tonality;
    2705             :     }
    2706             : 
    2707             :     /* pitch difference w.r.t to past 3 frames */
    2708       52755 :     lt_pitch_diff = (int16_t) abs( hSpMusClas->lt_corr_pitch[0] - st->pitch[0] );
    2709       52755 :     lt_pitch_diff += (int16_t) abs( hSpMusClas->lt_corr_pitch[1] - st->pitch[0] );
    2710       52755 :     lt_pitch_diff += (int16_t) abs( hSpMusClas->lt_corr_pitch[2] - st->pitch[0] );
    2711             : 
    2712       52755 :     hSpMusClas->lt_corr_pitch[0] = hSpMusClas->lt_corr_pitch[1];
    2713       52755 :     hSpMusClas->lt_corr_pitch[1] = hSpMusClas->lt_corr_pitch[2];
    2714       52755 :     hSpMusClas->lt_corr_pitch[2] = st->pitch[0];
    2715             : 
    2716       52755 :     hSpMusClas->lt_old_mode[0] = hSpMusClas->lt_old_mode[1];
    2717       52755 :     hSpMusClas->lt_old_mode[1] = hSpMusClas->lt_old_mode[2];
    2718             : 
    2719       71952 :     if ( st->sp_aud_decision1 == 1 &&
    2720       35333 :          ( min( min( tonality1, tonality2 ), tonality3 ) > 50.0f ) &&
    2721        1263 :          ( tonality1 + tonality2 > 200.0f && tonality2 + tonality3 > 200.0f && tonality1 + tonality3 > 200.0f ) &&
    2722         952 :          ( hSpMusClas->lt_tonality < 20000.0f ) &&
    2723         952 :          ( ( hSpMusClas->lt_tonality > 1000 && max( hSpMusClas->lt_voicing, *st->voicing ) > 0.99f ) ||
    2724         907 :            ( hSpMusClas->lt_tonality > 1500 && hSpMusClas->lt_corr > 0.99f ) ||
    2725         905 :            ( hSpMusClas->lt_tonality > 3000 && hSpMusClas->lowrate_pitchGain > 0.96f ) ||
    2726         487 :            ( lt_pitch_diff == 0 && hSpMusClas->lowrate_pitchGain > 0.89f ) ) )
    2727             :     {
    2728          98 :         if ( sum_s( hSpMusClas->lt_old_mode, 2 ) < 2 )
    2729             :         {
    2730             :             /* probably speech - change the decision to speech */
    2731          26 :             st->sp_aud_decision1 = 0;
    2732          26 :             st->sp_aud_decision2 = 0;
    2733             : 
    2734          26 :             if ( hSpMusClas->lt_hangover == 0 )
    2735             :             {
    2736           6 :                 hSpMusClas->lt_hangover = 6;
    2737             :             }
    2738             :         }
    2739             :     }
    2740             :     else
    2741             :     {
    2742             :         /* not speech, but still in the hangover period - change the decision to speech */
    2743       52657 :         if ( hSpMusClas->lt_hangover > 0 )
    2744             :         {
    2745          36 :             st->sp_aud_decision1 = 0;
    2746          36 :             st->sp_aud_decision2 = 0;
    2747          36 :             hSpMusClas->lt_hangover--;
    2748             :         }
    2749             :     }
    2750             : 
    2751             :     /* calculate standard deviation of log-tonality */
    2752       52755 :     mvr2r( hSpMusClas->tonality2_buf + 1, hSpMusClas->tonality2_buf, HANG_LEN_INIT - 1 );
    2753       52755 :     hSpMusClas->tonality2_buf[HANG_LEN_INIT - 1] = 0.2f * log10f( tonality2 );
    2754       52755 :     t2 = std_dev( hSpMusClas->tonality2_buf, HANG_LEN_INIT );
    2755             : 
    2756       52755 :     mvr2r( hSpMusClas->tonality3_buf + 1, hSpMusClas->tonality3_buf, HANG_LEN_INIT - 1 );
    2757       52755 :     hSpMusClas->tonality3_buf[HANG_LEN_INIT - 1] = 0.2f * log10f( tonality3 );
    2758       52755 :     t3 = std_dev( hSpMusClas->tonality3_buf, HANG_LEN_INIT );
    2759             : 
    2760       52755 :     tL = 0.2f * log10f( hSpMusClas->lt_tonality );
    2761             : 
    2762             :     /* calculate standard deviation of residual LP energy */
    2763       52755 :     mvr2r( hSpMusClas->LPCErr_buf + 1, hSpMusClas->LPCErr_buf, HANG_LEN_INIT - 1 );
    2764       52755 :     hSpMusClas->LPCErr_buf[HANG_LEN_INIT - 1] = LPCErr;
    2765       52755 :     err = std_dev( hSpMusClas->LPCErr_buf, HANG_LEN_INIT );
    2766             : 
    2767       52755 :     cor = max( voi_fv - cor_map_sum_fv, 0.0f );
    2768       52755 :     dft = 0.2f * fabsf( log10f( tonality2 ) - log10f( tonality3 ) );
    2769             : 
    2770             :     /* state machine for strong music */
    2771       52755 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_music_state == 0 && hSpMusClas->lt_music_hangover == 0 &&
    2772       16492 :          t2 < 0.54f && t2 > 0.26f && t3 > 0.22f && tL < 0.54f && tL > 0.26f && err > 0.5f )
    2773             :     {
    2774         104 :         hSpMusClas->lt_music_state = 1;
    2775         104 :         hSpMusClas->lt_music_hangover = 6;
    2776             :     }
    2777       52651 :     else if ( hSpMusClas->lt_music_state == 1 && hSpMusClas->lt_music_hangover == 0 && t2 < 0.34 && t3 < 0.26f && tL < 0.45f )
    2778             :     {
    2779          95 :         hSpMusClas->lt_music_state = 0;
    2780          95 :         hSpMusClas->lt_music_hangover = 6;
    2781             :     }
    2782             : 
    2783       52755 :     if ( hSpMusClas->lt_music_hangover > 0 )
    2784             :     {
    2785        1162 :         hSpMusClas->lt_music_hangover--;
    2786             :     }
    2787             : 
    2788             :     /* state machine for strong speech */
    2789       52755 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_speech_state == 0 && hSpMusClas->lt_speech_hangover == 0 &&
    2790        1741 :          cor > 0.40f && dft < 0.1f && voi_fv > 2 * cor_map_sum_fv + 0.12f &&
    2791         287 :          t2 < cor && t3 < cor && tL < cor && cor_map_sum_fv < cor && voi_fv > cor && voi_fv > 0.76f )
    2792             :     {
    2793          77 :         hSpMusClas->lt_speech_state = 1;
    2794          77 :         hSpMusClas->lt_speech_hangover = 6;
    2795             :     }
    2796       52678 :     else if ( hSpMusClas->lt_speech_state == 1 && hSpMusClas->lt_speech_hangover == 0 && cor < 0.40f )
    2797             :     {
    2798          69 :         hSpMusClas->lt_speech_state = 0;
    2799          69 :         hSpMusClas->lt_speech_hangover = 6;
    2800             :     }
    2801             : 
    2802       52755 :     if ( hSpMusClas->lt_speech_hangover > 0 )
    2803             :     {
    2804         779 :         hSpMusClas->lt_speech_hangover--;
    2805             :     }
    2806             : 
    2807             :     /* final decision */
    2808       52755 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_speech_state == 1 )
    2809             :     {
    2810             :         /* strong speech - probably error in speech/music classification */
    2811         403 :         st->sp_aud_decision1 = 0;
    2812         403 :         st->sp_aud_decision2 = 0;
    2813             :     }
    2814       52352 :     else if ( st->sp_aud_decision1 == 0 && hSpMusClas->lt_music_state == 1 )
    2815             :     {
    2816             :         /* strong music - probably error in speech/music classification */
    2817         111 :         st->sp_aud_decision1 = 1;
    2818         111 :         st->sp_aud_decision2 = 1;
    2819             :     }
    2820             : 
    2821             :     /* update the buffer of past decisions */
    2822       52755 :     hSpMusClas->lt_old_mode[2] = st->sp_aud_decision1;
    2823             : 
    2824       52755 :     return;
    2825             : }
    2826             : 
    2827             : /*---------------------------------------------------------------------*
    2828             :  * detect_sparseness()
    2829             :  *
    2830             :  *
    2831             :  *---------------------------------------------------------------------*/
    2832             : 
    2833       16594 : static void detect_sparseness(
    2834             :     Encoder_State *st,             /* i/o: encoder state structure                */
    2835             :     const int16_t localVAD_HE_SAD, /* i  : HE-SAD flag without hangover           */
    2836             :     const float voi_fv             /* i  : scaled voicing feature                 */
    2837             : )
    2838             : {
    2839             :     float sum;
    2840             :     float ftmp;
    2841             :     float ftmp1;
    2842             :     float S1[128];
    2843             :     int16_t i, j;
    2844       16594 :     int16_t hb_sp_high_flag = 0;
    2845       16594 :     int16_t lb_sp_high_flag = 0;
    2846             :     float sumh;
    2847             :     float sparse;
    2848             :     float tmp_buf[4];
    2849       16594 :     float Mlpe = 0.0f;
    2850       16594 :     float Mv = 0.0f;
    2851             :     float Msp;
    2852             : 
    2853       16594 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2854             : 
    2855       16594 :     mvr2r( st->Bin_E, S1, 128 );
    2856             : 
    2857       16594 :     sum = 0;
    2858     1344114 :     for ( i = 0; i < 80; i++ )
    2859             :     {
    2860     1327520 :         if ( S1[i] < 0 )
    2861             :         {
    2862      222182 :             S1[i] = 0;
    2863             :         }
    2864     1327520 :         sum += S1[i];
    2865             :     }
    2866             : 
    2867       16594 :     sumh = 0;
    2868      813106 :     for ( i = 80; i < 128; i++ )
    2869             :     {
    2870      796512 :         if ( S1[i] < 0 )
    2871             :         {
    2872      224127 :             S1[i] = 0;
    2873             :         }
    2874      796512 :         sumh += S1[i];
    2875             :     }
    2876             : 
    2877       16594 :     sum += sumh;
    2878             : 
    2879             :     /* order spectral from max to min */
    2880       16594 :     order_spectrum( S1, 128 );
    2881             : 
    2882             :     /* calculate spectral sparseness in the range 0 - 6.4 kHz */
    2883       16594 :     j = 0;
    2884       16594 :     ftmp = 0.0f;
    2885       16594 :     ftmp1 = 0.75f * sum;
    2886      916137 :     for ( i = 0; i < 128; i++ )
    2887             :     {
    2888      916094 :         ftmp += S1[i];
    2889      916094 :         if ( ftmp > ftmp1 )
    2890             :         {
    2891       16551 :             j = i;
    2892       16551 :             break;
    2893             :         }
    2894             :     }
    2895             : 
    2896      132752 :     for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2897             :     {
    2898      116158 :         hSpMusClas->sparse_buf[i] = hSpMusClas->sparse_buf[i + 1];
    2899             :     }
    2900             : 
    2901       16594 :     sparse = (float) j;
    2902       16594 :     hSpMusClas->sparse_buf[i] = sparse;
    2903             : 
    2904       16594 :     if ( st->bwidth == WB )
    2905             :     {
    2906        1596 :         Msp = mean( hSpMusClas->sparse_buf, 8 );
    2907             : 
    2908             :         /* find long-term smoothed sparseness */
    2909        1596 :         if ( hSpMusClas->last_vad_spa == 0 )
    2910             :         {
    2911         169 :             set_f( &hSpMusClas->sparse_buf[0], sparse, HANG_LEN_INIT - 1 );
    2912         169 :             hSpMusClas->LT_sparse = sparse;
    2913             :         }
    2914             :         else
    2915             :         {
    2916        1427 :             set_f( tmp_buf, 0.0f, 4 );
    2917             : 
    2918       12843 :             for ( i = 0; i < HANG_LEN_INIT; i++ )
    2919             :             {
    2920       32527 :                 for ( j = 0; j < 4; j++ )
    2921             :                 {
    2922       29620 :                     if ( hSpMusClas->sparse_buf[i] > tmp_buf[j] )
    2923             :                     {
    2924        8509 :                         mvr2r( &tmp_buf[j], &tmp_buf[j + 1], 3 - j );
    2925        8509 :                         tmp_buf[j] = hSpMusClas->sparse_buf[i];
    2926        8509 :                         break;
    2927             :                     }
    2928             :                 }
    2929             :             }
    2930             : 
    2931        1427 :             ftmp = 0.25f * ( HANG_LEN_INIT * Msp - sum_f( tmp_buf, 4 ) ) - hSpMusClas->LT_sparse;
    2932             : 
    2933        1427 :             hSpMusClas->LT_sparse = hSpMusClas->LT_sparse + 0.25f * ftmp;
    2934             :         }
    2935             : 
    2936             :         /* find high-band sparseness */
    2937        1596 :         mvr2r( st->Bin_E + 80, S1, 48 );
    2938        1596 :         order_spectrum( S1, 48 );
    2939             : 
    2940       12768 :         for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2941             :         {
    2942       11172 :             hSpMusClas->hf_spar_buf[i] = hSpMusClas->hf_spar_buf[i + 1];
    2943             :         }
    2944        1596 :         hSpMusClas->hf_spar_buf[i] = sum_f( S1, 5 ) / ( sumh + 0.1f );
    2945        1596 :         if ( mean( hSpMusClas->hf_spar_buf, 8 ) > 0.2f )
    2946             :         {
    2947         490 :             hb_sp_high_flag = 1;
    2948             :         }
    2949             : 
    2950             :         /* find low-band sparseness */
    2951        1596 :         mvr2r( st->Bin_E, S1, 60 );
    2952        1596 :         order_spectrum( S1, 60 );
    2953             : 
    2954        1596 :         if ( sum_f( S1, 5 ) / sum_f( S1, 60 ) > 0.18f )
    2955             :         {
    2956        1015 :             lb_sp_high_flag = 1;
    2957             :         }
    2958             : 
    2959             :         /* find smoothed linear prediction efficiency */
    2960       12768 :         for ( i = 0; i < 7; i++ )
    2961             :         {
    2962       11172 :             hSpMusClas->lpe_buf[i] = hSpMusClas->lpe_buf[i + 1];
    2963             :         }
    2964             : 
    2965        1596 :         hSpMusClas->lpe_buf[i] = hSpMusClas->past_epsP2;
    2966        1596 :         Mlpe = mean( hSpMusClas->lpe_buf, 8 );
    2967             : 
    2968             :         /* find smoothed voicing */
    2969       12768 :         for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2970             :         {
    2971       11172 :             hSpMusClas->voicing_buf[i] = hSpMusClas->voicing_buf[i + 1];
    2972             :         }
    2973             : 
    2974        1596 :         hSpMusClas->voicing_buf[i] = voi_fv;
    2975        1596 :         Mv = mean( hSpMusClas->voicing_buf, 8 );
    2976             :     }
    2977             : 
    2978             :     /* avoid using LR-MDCT on sparse spectra */
    2979       16594 :     if ( st->sp_aud_decision1 == 1 )
    2980             :     {
    2981        6896 :         if ( st->bwidth == WB )
    2982             :         {
    2983         717 :             ftmp = 90;
    2984             :         }
    2985             :         else
    2986             :         {
    2987        6179 :             ftmp = 91;
    2988             :         }
    2989        6896 :         if ( sparse > ftmp )
    2990             :         {
    2991           0 :             st->sp_aud_decision1 = 0;
    2992           0 :             st->sp_aud_decision2 = 1;
    2993           0 :             hSpMusClas->gsc_hangover = 1;
    2994             :         }
    2995        6896 :         else if ( hSpMusClas->gsc_hangover == 1 )
    2996             :         {
    2997          53 :             if ( sparse > 85 )
    2998             :             {
    2999           0 :                 st->sp_aud_decision1 = 0;
    3000           0 :                 st->sp_aud_decision2 = 1;
    3001             :             }
    3002          53 :             else if ( fabs( sparse - mean( &hSpMusClas->sparse_buf[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) < 7.0f )
    3003             :             {
    3004          52 :                 st->sp_aud_decision1 = 0;
    3005          52 :                 st->sp_aud_decision2 = 1;
    3006             :             }
    3007             :         }
    3008             : 
    3009        6896 :         if ( st->bwidth == WB )
    3010             :         {
    3011         717 :             if ( hSpMusClas->LT_sparse > 60 && sparse > 50 && Mlpe < -1.3f && Mv > 0.85f &&
    3012          58 :                  lb_sp_high_flag == 0 && ( ( hb_sp_high_flag == 0 && sumh > 0.15f * sum ) || sumh <= 0.15f * sum ) )
    3013             :             {
    3014           9 :                 st->sp_aud_decision1 = 0;
    3015           9 :                 st->sp_aud_decision2 = 1;
    3016           9 :                 hSpMusClas->gsc_hangover = 1;
    3017             :             }
    3018         708 :             else if ( hSpMusClas->gsc_hangover == 1 && !( st->sp_aud_decision1 == 0 && st->sp_aud_decision2 == 1 ) )
    3019             :             {
    3020           1 :                 if ( fabs( sparse - mean( &hSpMusClas->sparse_buf[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) < 7.0f )
    3021             :                 {
    3022           0 :                     st->sp_aud_decision1 = 0;
    3023           0 :                     st->sp_aud_decision2 = 1;
    3024             :                 }
    3025             :             }
    3026             :         }
    3027             :     }
    3028             : 
    3029             :     /* update the counter of consecutive GSC frames with sparse spectrum */
    3030       16594 :     if ( st->sp_aud_decision1 == 0 && st->sp_aud_decision2 == 1 )
    3031             :     {
    3032          54 :         ( hSpMusClas->gsc_cnt )++;
    3033          54 :         if ( hSpMusClas->gsc_cnt > 7 )
    3034             :         {
    3035          46 :             hSpMusClas->gsc_cnt = 7;
    3036             :         }
    3037             :     }
    3038             :     else
    3039             :     {
    3040       16540 :         hSpMusClas->gsc_cnt = 0;
    3041       16540 :         hSpMusClas->gsc_hangover = 0;
    3042             :     }
    3043             : 
    3044       16594 :     hSpMusClas->last_vad_spa = localVAD_HE_SAD;
    3045             : 
    3046       16594 :     return;
    3047             : }
    3048             : 
    3049             : 
    3050             : /*---------------------------------------------------------------------*
    3051             :  * order_spectrum()
    3052             :  *
    3053             :  *
    3054             :  *---------------------------------------------------------------------*/
    3055             : 
    3056       19786 : static void order_spectrum(
    3057             :     float *vec,
    3058             :     const int16_t len )
    3059             : {
    3060             :     int16_t i, j, imax, imin;
    3061             :     float temp;
    3062             : 
    3063     1167986 :     for ( i = 0; i < len / 2; i++ )
    3064             :     {
    3065     1148200 :         imax = i;
    3066     1148200 :         imin = i;
    3067    72621120 :         for ( j = i; j < len - i; j++ )
    3068             :         {
    3069    71472920 :             if ( vec[j] > vec[imax] )
    3070             :             {
    3071     3114189 :                 imax = j;
    3072             :             }
    3073             :             else
    3074             :             {
    3075    68358731 :                 if ( vec[j] < vec[imin] )
    3076             :                 {
    3077     4518666 :                     imin = j;
    3078             :                 }
    3079             :             }
    3080             :         }
    3081             : 
    3082     1148200 :         temp = vec[i];
    3083     1148200 :         vec[i] = vec[imax];
    3084     1148200 :         vec[imax] = temp;
    3085             : 
    3086     1148200 :         if ( imin == i )
    3087             :         {
    3088      163891 :             imin = imax;
    3089             :         }
    3090             : 
    3091     1148200 :         temp = vec[len - i - 1];
    3092     1148200 :         vec[len - i - 1] = vec[imin];
    3093     1148200 :         vec[imin] = temp;
    3094             :     }
    3095             : 
    3096       19786 :     return;
    3097             : }

Generated by: LCOV version 1.14