LCOV - code coverage report
Current view: top level - lib_enc - speech_music_classif.c (source / functions) Hit Total Coverage
Test: Coverage on main @ 6baab0c613aa6c7100498ed7b93676aa8198a493 Lines: 1134 1151 98.5 %
Date: 2025-05-28 04:28:20 Functions: 17 17 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <assert.h>
      38             : #include <stdint.h>
      39             : #include "options.h"
      40             : #ifdef DEBUGGING
      41             : #include "debug.h"
      42             : #endif
      43             : #include <math.h>
      44             : #include "cnst.h"
      45             : #include "prot.h"
      46             : #include "ivas_prot.h"
      47             : #include "rom_enc.h"
      48             : #include "rom_com.h" /* Common static table prototypes         */
      49             : #include "wmc_auto.h"
      50             : 
      51             : 
      52             : /*---------------------------------------------------------------------*
      53             :  * Local constants
      54             :  *---------------------------------------------------------------------*/
      55             : 
      56             : #define ATT_SEG_LEN       ( L_FRAME / ATT_NSEG )
      57             : #define ATT_3LSUB_POS     ( 3 * ATT_NSEG / NB_SUBFR )
      58             : #define ATT_3LSUB_POS_16k ( int16_t )( ( 4.0f * ATT_NSEG / (float) NB_SUBFR16k ) + 0.5f )
      59             : 
      60             : #define THR_CORR_PEAK 0.95f
      61             : #define TON_FACT      0.95f
      62             : #define TON_ALPHA     0.95f
      63             : 
      64             : #define DLP_BIAS 0.138121f
      65             : 
      66             : #define THR_MASS_MAX     0.85f
      67             : #define THR_MASS_MIN     0.75f
      68             : #define THR_MASS_STEP_UP 0.01f
      69             : #define THR_MASS_STEP_DN 0.02f
      70             : 
      71             : 
      72             : /*---------------------------------------------------------------------*
      73             :  * Local function prototypes
      74             :  *---------------------------------------------------------------------*/
      75             : 
      76             : static void spec_analysis( float *Bin_E, float *p2v_map );
      77             : 
      78             : static void flux( float *Bin_E, float *p2v_map, float *old_Bin_E, float *buf_flux, int16_t attack_hangover, float dec_mov );
      79             : 
      80             : static void tonal_dist( float *p2v_map, float *buf_pkh, float *buf_Ntonal, float *buf_Ntonal2, float *buf_Ntonal_lf );
      81             : 
      82             : static int16_t mode_decision( Encoder_State *st, int16_t len, float *dec_mov, float *buf_flux, float *buf_epsP_tilt, float *buf_pkh, float *buf_cor_map_sum, float *buf_Ntonal, float *buf_Ntonal2, float *buf_Ntonal_lf, float *buf_dlp );
      83             : 
      84             : static void var_cor_calc( const float old_corr, float *mold_corr, float var_cor_t[], int16_t *high_stable_cor );
      85             : 
      86             : static int16_t attack_det( const float *inp, const int16_t last_clas, const int16_t localVAD, const int16_t coder_type, const int32_t total_brate, const int16_t element_mode, const int16_t clas, float finc_prev[], float *lt_finc, int16_t *last_strong_attack );
      87             : 
      88             : static float tonal_det( const float S[], int16_t vad_flag, float tod_S_map_lt[], float *tod_thr_lt, float *tod_weight, float *tod_S_mass_prev, float *tod_S_mass_lt );
      89             : 
      90             : static void tonal_context_improv( Encoder_State *st, const float PS[], const float voi_fv, const float cor_map_sum_fv, const float LPCErr );
      91             : 
      92             : static void order_spectrum( float *vec, const int16_t len );
      93             : 
      94             : static void detect_sparseness( Encoder_State *st, const int16_t localVAD_HE_SAD, const float voi_fv );
      95             : 
      96             : static int16_t sp_mus_classif_1st( Encoder_State *st, const int16_t localVAD_HE_SAD, const float lsp_new[M], const float cor_map_sum, const float epsP[M + 1], const float PS[], float non_sta, float relE, float *voi_fv, float *cor_map_sum_fv, float *LPCErr, int16_t *high_lpn_flag );
      97             : 
      98             : static void sp_mus_classif_2nd( Encoder_State *st, const float Etot, int16_t *attack_flag, const float *inp );
      99             : 
     100             : static void music_mixed_classif_improv( Encoder_State *st, const float *new_inp, const float *epsP, const float etot, const float old_cor, const float cor_map_sum );
     101             : 
     102             : 
     103             : /*---------------------------------------------------------------------*
     104             :  * speech_music_clas_init()
     105             :  *
     106             :  * Initialization of speech/music classifier
     107             :  *---------------------------------------------------------------------*/
     108             : 
     109      148039 : void speech_music_clas_init(
     110             :     SP_MUS_CLAS_HANDLE hSpMusClas /* i/o: speech/music classifier handle   */
     111             : )
     112             : {
     113             :     int16_t i;
     114             : 
     115      148039 :     set_f( hSpMusClas->FV_st, 0.0f, N_SMC_FEATURES );
     116             : 
     117      148039 :     hSpMusClas->inact_cnt = 0;
     118      148039 :     set_s( hSpMusClas->past_dec, 0, HANG_LEN - 1 );
     119      148039 :     set_f( hSpMusClas->past_dlp, 0, HANG_LEN - 1 );
     120      148039 :     set_f( hSpMusClas->past_dlp_mean_ST, 0, HANG_LEN - 1 );
     121      148039 :     hSpMusClas->dlp_mean_ST = 0.0f;
     122      148039 :     hSpMusClas->dlp_mean_LT = 0.0f;
     123      148039 :     hSpMusClas->dlp_var_LT = 0.0f;
     124             : 
     125     2368624 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
     126             :     {
     127     2220585 :         hSpMusClas->prev_FV[i] = 0.5f * hout_intervals[2 * i] + 0.5f * hout_intervals[2 * i + 1];
     128             :     }
     129             : 
     130     2368624 :     for ( i = 0; i < NB_BANDS_SPMUS; i++ )
     131             :     {
     132     2220585 :         hSpMusClas->past_log_enr[i] = logf( E_MIN );
     133             :     }
     134             : 
     135      148039 :     hSpMusClas->sp_mus_state = -8;
     136      148039 :     hSpMusClas->wdrop = 0.0f;
     137      148039 :     hSpMusClas->wrise = 0.0f;
     138      148039 :     hSpMusClas->wdlp_0_95_sp = 0.0f;
     139      148039 :     hSpMusClas->wdlp_xtalk = 0.0f;
     140      148039 :     set_f( hSpMusClas->last_lsp, 0.0f, M_LSP_SPMUS );
     141      148039 :     hSpMusClas->last_cor_map_sum = 0.0f;
     142      148039 :     hSpMusClas->last_non_sta = 0.0f;
     143      148039 :     set_f( hSpMusClas->past_PS, 0.0f, HIGHEST_FBIN - LOWEST_FBIN );
     144      148039 :     hSpMusClas->past_ps_diff = 0;
     145      148039 :     hSpMusClas->past_epsP2 = 01;
     146      148039 :     hSpMusClas->past_epsP = 0;
     147      148039 :     hSpMusClas->flag_spitch_cnt = 0;
     148             : 
     149      148039 :     hSpMusClas->gsc_thres[0] = TH_0_MIN;
     150      148039 :     hSpMusClas->gsc_thres[1] = TH_1_MIN;
     151      148039 :     hSpMusClas->gsc_thres[2] = TH_2_MIN;
     152      148039 :     hSpMusClas->gsc_thres[3] = TH_3_MIN;
     153      148039 :     set_f( hSpMusClas->gsc_lt_diff_etot, 0.0f, MAX_LT );
     154      148039 :     hSpMusClas->gsc_mem_etot = 0.0f;
     155      148039 :     hSpMusClas->gsc_last_music_flag = 0;
     156      148039 :     hSpMusClas->gsc_nb_thr_1 = 0;
     157      148039 :     hSpMusClas->gsc_nb_thr_3 = 0;
     158      148039 :     hSpMusClas->mold_corr = 0.9f;
     159      148039 :     hSpMusClas->mean_avr_dyn = 0.5f;
     160      148039 :     hSpMusClas->last_sw_dyn = 10.0f;
     161             : 
     162      148039 :     hSpMusClas->relE_attack_cnt = 0;
     163      148039 :     hSpMusClas->prev_relE = 0.0f;
     164      148039 :     hSpMusClas->prev_Etot = 0.0f;
     165      148039 :     hSpMusClas->prev_vad = 0;
     166      148039 :     hSpMusClas->vad_0_1_cnt = 0;
     167      148039 :     hSpMusClas->relE_attack_sum = 0;
     168             : 
     169             :     /* speech/music classifier improvement */
     170     9030379 :     for ( i = 0; i < BUF_LEN; i++ )
     171             :     {
     172     8882340 :         hSpMusClas->buf_flux[i] = -100;
     173     8882340 :         hSpMusClas->buf_pkh[i] = 0;
     174     8882340 :         hSpMusClas->buf_epsP_tilt[i] = 0;
     175     8882340 :         hSpMusClas->buf_cor_map_sum[i] = 0;
     176     8882340 :         hSpMusClas->buf_Ntonal[i] = 0;
     177     8882340 :         hSpMusClas->buf_Ntonal2[i] = 0;
     178     8882340 :         hSpMusClas->buf_Ntonal_lf[i] = 0;
     179             :     }
     180             : 
     181      148039 :     set_f( hSpMusClas->lpe_buf, 0, HANG_LEN_INIT );
     182      148039 :     set_f( hSpMusClas->voicing_buf, 0, HANG_LEN_INIT );
     183      148039 :     hSpMusClas->gsc_hangover = 0;
     184      148039 :     set_f( hSpMusClas->sparse_buf, 0, HANG_LEN_INIT );
     185      148039 :     set_f( hSpMusClas->hf_spar_buf, 0, HANG_LEN_INIT );
     186      148039 :     hSpMusClas->LT_sparse = 0.0f;
     187      148039 :     hSpMusClas->gsc_cnt = 0;
     188      148039 :     hSpMusClas->last_vad_spa = 0;
     189             : 
     190      148039 :     set_f( hSpMusClas->old_Bin_E, 0.0f, 3 * N_OLD_BIN_E );
     191      148039 :     set_f( hSpMusClas->buf_etot, 0, 4 );
     192      148039 :     set_f( hSpMusClas->buf_dlp, 0, 10 );
     193             : 
     194      148039 :     hSpMusClas->UV_cnt1 = 300;
     195      148039 :     hSpMusClas->LT_UV_cnt1 = 250.0f;
     196      148039 :     hSpMusClas->onset_cnt = 0;
     197      148039 :     hSpMusClas->attack_hangover = 0;
     198      148039 :     hSpMusClas->dec_mov = 0.0f;
     199      148039 :     hSpMusClas->dec_mov1 = 0.0f;
     200      148039 :     hSpMusClas->mov_log_max_spl = 200.0f;
     201      148039 :     hSpMusClas->old_lt_diff[0] = 0.0f;
     202      148039 :     hSpMusClas->old_lt_diff[1] = 0.0f;
     203             : 
     204      148039 :     set_f( hSpMusClas->finc_prev, 0.0f, ATT_NSEG );
     205      148039 :     hSpMusClas->lt_finc = 0.0f;
     206      148039 :     hSpMusClas->last_strong_attack = 0;
     207      148039 :     hSpMusClas->tdm_lt_Etot = 0.01f;
     208      148039 :     set_f( hSpMusClas->tod_lt_Bin_E, 0.0f, TOD_NSPEC );
     209      148039 :     set_f( hSpMusClas->tod_S_map_lt, 0.0f, TOD_NSPEC );
     210      148039 :     hSpMusClas->tod_thr_lt = TOD_THR_MASS;
     211      148039 :     hSpMusClas->tod_weight = 0.0f;
     212      148039 :     hSpMusClas->tod_S_mass_prev = 0.0f;
     213      148039 :     hSpMusClas->tod_S_mass_lt = 0.0f;
     214             : 
     215             :     /* speech/music classification */
     216      148039 :     set_s( hSpMusClas->lt_old_mode, 1, 3 );
     217      148039 :     hSpMusClas->lt_voicing = 0.5f;
     218      148039 :     hSpMusClas->lt_corr = 0.5f;
     219      148039 :     hSpMusClas->lt_tonality = 0;
     220      148039 :     set_s( hSpMusClas->lt_corr_pitch, 0, 3 );
     221      148039 :     hSpMusClas->lt_hangover = 0;
     222      148039 :     hSpMusClas->lowrate_pitchGain = 0;
     223             : 
     224      148039 :     hSpMusClas->lt_music_hangover = 0;
     225      148039 :     set_f( hSpMusClas->tonality2_buf, 0, HANG_LEN_INIT );
     226      148039 :     set_f( hSpMusClas->tonality3_buf, 0, HANG_LEN_INIT );
     227      148039 :     set_f( hSpMusClas->LPCErr_buf, 0, HANG_LEN_INIT );
     228      148039 :     hSpMusClas->lt_music_state = 0;
     229      148039 :     hSpMusClas->lt_speech_state = 0;
     230      148039 :     hSpMusClas->lt_speech_hangover = 0;
     231             : 
     232      148039 :     hSpMusClas->lt_dec_thres = 10.0f;
     233      148039 :     hSpMusClas->ener_RAT = 0.0f;
     234             : 
     235      148039 :     hSpMusClas->high_stable_cor = 0;
     236      148039 :     set_f( hSpMusClas->var_cor_t, 0.0f, VAR_COR_LEN );
     237             : 
     238      148039 :     hSpMusClas->lps = 0.0f;
     239      148039 :     hSpMusClas->lpm = 0.0f;
     240      148039 :     hSpMusClas->lpn = 0.0f;
     241             : 
     242      148039 :     return;
     243             : }
     244             : 
     245             : 
     246             : /*---------------------------------------------------------------------*
     247             :  * speech_music_classif()
     248             :  *
     249             :  * Speech/music classification
     250             :  *
     251             :  * The following technologies are used based on the outcome of the sp/mus classifier
     252             :  * sp_aud_decision1  sp_aud_decision2
     253             :  *       0                 0             use ACELP (+TD BWE)
     254             :  *       1                 0             use ACELP (+FD BWE) or HQ/LR-MDCT depending on bitrate
     255             :  *       1                 1             use GSC (+FD BWE) or HQ/LR-MDCT depending on bitrate
     256             :  *
     257             :  *       0                 1             exceptionally use GSC (+FD BWE) instead of LR-MDCT at 13.2 kbps (WB/SWB) for sparse spectra
     258             :  *---------------------------------------------------------------------*/
     259             : 
     260             : /*! r: 1st stage decision (1-music, 0-speech or noise) */
     261       96918 : void speech_music_classif(
     262             :     Encoder_State *st,             /* i/o: state structure                                 */
     263             :     const float *new_inp,          /* i  : new input signal                                */
     264             :     const float *inp,              /* i  : input signal to locate attach position          */
     265             :     const int16_t localVAD_HE_SAD, /* i  : HE-SAD flag without hangover                    */
     266             :     const float lsp_new[M],        /* i  : LSPs in current frame                           */
     267             :     const float cor_map_sum,       /* i  : correlation map sum (from multi-harmonic anal.) */
     268             :     const float epsP[M + 1],       /* i  : LP prediciton error                             */
     269             :     const float PS[],              /* i  : energy spectrum                                 */
     270             :     const float Etot,              /* i  : total frame energy                              */
     271             :     const float old_cor,           /* i  : max correlation from previous frame             */
     272             :     int16_t *attack_flag,          /* o  : attack flag (GSC or TC)                         */
     273             :     const float non_sta,           /* i  : unbound non-stationarity for sp/mus classifier  */
     274             :     const float relE,              /* i  : relative frame energy                           */
     275             :     int16_t *high_lpn_flag,        /* o  : sp/mus LPN flag                                 */
     276             :     const int16_t flag_spitch      /* i  : flag to indicate very short stable pitch        */
     277             : )
     278             : {
     279             :     float voi_fv, cor_map_sum_fv, LPCErr;
     280             : 
     281             :     /* 1st stage speech/music classification based on the GMM model */
     282       96918 :     st->sp_aud_decision1 = sp_mus_classif_1st( st, localVAD_HE_SAD, lsp_new, cor_map_sum, epsP, PS, non_sta, relE, &voi_fv, &cor_map_sum_fv, &LPCErr, high_lpn_flag );
     283             : 
     284       96918 :     if ( st->codec_mode == MODE1 || st->sr_core == INT_FS_12k8 )
     285             :     {
     286             : 
     287             :         /* Improvement of the 1st stage decision for mixed/music content */
     288       63355 :         if ( !st->Opt_SC_VBR && ( st->total_brate != ACELP_24k40 ) )
     289             :         {
     290       60545 :             music_mixed_classif_improv( st, new_inp, epsP, Etot, old_cor, cor_map_sum );
     291             :         }
     292             : 
     293       63355 :         st->sp_aud_decision0 = st->sp_aud_decision1;
     294             : 
     295             :         /* 2nd stage speech/music classification (rewrite music to speech in onsets) */
     296       63355 :         st->sp_aud_decision2 = st->sp_aud_decision1;
     297             : 
     298       63355 :         if ( st->bwidth > NB )
     299             :         {
     300       55005 :             sp_mus_classif_2nd( st, Etot, attack_flag, inp );
     301             : 
     302       55005 :             if ( flag_spitch && st->bwidth == WB && st->total_brate < ACELP_13k20 )
     303             :             {
     304             :                 /* avoid switch to AUDIO/MUSIC class for very short stable high pitch
     305             :                    and/or stable pitch with high correlation at low bitrates*/
     306          56 :                 st->sp_aud_decision2 = 0;
     307             :             }
     308             :         }
     309             : 
     310             :         /* Context-based improvement of 1st and 2nd stage decision on stable tonal signals */
     311       63355 :         if ( !st->Opt_SC_VBR && st->total_brate != ACELP_24k40 )
     312             :         {
     313       60545 :             tonal_context_improv( st, PS, voi_fv, cor_map_sum_fv, LPCErr );
     314             :         }
     315             : 
     316             :         /* Avoid using LR-MDCT on sparse spectra, use GSC instead at 13.2 kbps (WB/SWB) */
     317       63355 :         if ( !st->Opt_SC_VBR && st->total_brate == ACELP_13k20 && st->vad_flag == 1 && ( st->bwidth == WB || st->bwidth == SWB ) )
     318             :         {
     319       19856 :             detect_sparseness( st, localVAD_HE_SAD, voi_fv );
     320             :         }
     321             : 
     322             :         /* override speech/music classification to ACELP when background noise level reaches certain level */
     323             :         /* this is a patch against mis-classifications during active noisy speech segments */
     324       63355 :         if ( st->lp_noise > 12.0f )
     325             :         {
     326       16504 :             st->sp_aud_decision1 = 0;
     327       16504 :             st->sp_aud_decision2 = 0;
     328             :         }
     329             : 
     330             :         /* set GSC noisy speech flag on unvoiced SWB segments */
     331       63355 :         st->GSC_noisy_speech = 0;
     332       63355 :         if ( st->vad_flag == 1 && st->total_brate >= ACELP_13k20 && st->total_brate < ACELP_24k40 &&
     333       22379 :              st->lp_noise > 12.0f && st->sp_aud_decision1 == 0 && st->bwidth >= SWB &&
     334        1896 :              st->coder_type_raw == UNVOICED )
     335             :         {
     336         236 :             st->GSC_noisy_speech = 1;
     337             :         }
     338             : 
     339             :         /* Select AUDIO frames */
     340             : #ifdef DEBUGGING
     341             :         if ( st->codec_mode == MODE1 && ( st->force == 1 || ( st->force == -1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) ) ) )
     342             : #else
     343       63355 :         if ( st->codec_mode == MODE1 && ( st->sp_aud_decision2 || st->GSC_noisy_speech ) )
     344             : #endif
     345             :         {
     346       16226 :             st->coder_type = AUDIO;
     347       16226 :             st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
     348             :         }
     349             :     }
     350             :     else
     351             :     {
     352       33563 :         st->sp_aud_decision0 = st->sp_aud_decision1;
     353             :     }
     354             : 
     355             : 
     356       96918 :     return;
     357             : }
     358             : 
     359             : 
     360             : /*---------------------------------------------------------------------*
     361             :  * sp_mus_classif_1st()
     362             :  *
     363             :  * 1st stage speech/music classification (based on the GMM model)
     364             :  *---------------------------------------------------------------------*/
     365             : 
     366             : /*! r: decision flag (1-music, 0-speech or noise) */
     367       96918 : static int16_t sp_mus_classif_1st(
     368             :     Encoder_State *st,             /* i/o: state structure                                 */
     369             :     const int16_t localVAD_HE_SAD, /* i  : local VAD HE flag                               */
     370             :     const float lsp_new[M],        /* i  : LSPs in current frame                           */
     371             :     const float cor_map_sum,       /* i  : correlation map sum (from multi-harmonic anal.) */
     372             :     const float epsP[M + 1],       /* i  : LP prediciton error                             */
     373             :     const float PS[],              /* i  : energy spectrum                                 */
     374             :     float non_sta,                 /* i  : unbound non-stationarity                        */
     375             :     float relE,                    /* i  : relative frame energy                           */
     376             :     float *voi_fv,                 /* o  : scaled voicing feature                          */
     377             :     float *cor_map_sum_fv,         /* o  : scaled correlation map feature                  */
     378             :     float *LPCErr,                 /* o  : scaled LP prediction error feature              */
     379             :     int16_t *high_lpn_flag         /* o  : sp/mus LPN flag                                 */
     380             : )
     381             : {
     382             :     int16_t i, k, p, dec, vad;
     383             :     float dlp, ftmp, lepsP1, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght, mx;
     384       96918 :     float FV[N_FEATURES], *pFV = FV, PS_norm[128], dPS[128], lsp[M];
     385       96918 :     float pys, pym, xm[N_FEATURES], py, lps = 0, lpm = 0;
     386             :     const float *pSF;
     387       96918 :     float pyn, lpn = 0;
     388             : 
     389       96918 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
     390             : 
     391             :     /*------------------------------------------------------------------*
     392             :      * Initialization
     393             :      *------------------------------------------------------------------*/
     394             : 
     395       96918 :     vad = localVAD_HE_SAD;
     396             : 
     397             :     /*------------------------------------------------------------------*
     398             :      * Preparation of the feature vector
     399             :      *------------------------------------------------------------------*/
     400             : 
     401             :     /* [0] OL pitch */
     402       96918 :     if ( st->tc_cnt == 1 || st->tc_cnt == 2 )
     403             :     {
     404        8698 :         *pFV++ = (float) st->pitch[2];
     405             :     }
     406             :     else
     407             :     {
     408       88220 :         *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
     409             :     }
     410             : 
     411             :     /* [1] voicing */
     412       96918 :     if ( st->tc_cnt == 1 || st->tc_cnt == 2 )
     413             :     {
     414        8698 :         *pFV++ = st->voicing[2];
     415             :     }
     416             :     else
     417             :     {
     418       88220 :         *pFV++ = (float) ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
     419             :     }
     420             : 
     421             :     /* [2,3,4,5,6] LSFs */
     422       96918 :     mvr2r( lsp_new, lsp, M );
     423             : 
     424       96918 :     ftmp = (float) acos( lsp[1] );
     425       96918 :     *pFV++ = ftmp + hSpMusClas->last_lsp[1];
     426       96918 :     hSpMusClas->last_lsp[1] = ftmp;
     427             : 
     428       96918 :     ftmp = (float) acos( lsp[2] );
     429       96918 :     *pFV++ = ftmp + hSpMusClas->last_lsp[2];
     430       96918 :     hSpMusClas->last_lsp[2] = ftmp;
     431             : 
     432       96918 :     ftmp = (float) acos( lsp[3] );
     433       96918 :     *pFV++ = ftmp + hSpMusClas->last_lsp[3];
     434       96918 :     hSpMusClas->last_lsp[3] = ftmp;
     435             : 
     436       96918 :     ftmp = (float) acos( lsp[4] );
     437       96918 :     *pFV++ = ftmp + hSpMusClas->last_lsp[4];
     438       96918 :     hSpMusClas->last_lsp[4] = ftmp;
     439             : 
     440       96918 :     ftmp = (float) acos( lsp[5] );
     441       96918 :     *pFV++ = ftmp + hSpMusClas->last_lsp[5];
     442       96918 :     hSpMusClas->last_lsp[5] = ftmp;
     443             : 
     444             :     /* [7] cor_map_sum */
     445       96918 :     *pFV++ = cor_map_sum + hSpMusClas->last_cor_map_sum;
     446       96918 :     hSpMusClas->last_cor_map_sum = cor_map_sum;
     447             : 
     448             :     /* [8] non_sta */
     449       96918 :     *pFV++ = non_sta + hSpMusClas->last_non_sta;
     450       96918 :     hSpMusClas->last_non_sta = non_sta;
     451             : 
     452             :     /* [9] epsP */
     453       96918 :     if ( st->bwidth == NB )
     454             :     {
     455             :         /* do not take into account (statistics are too different) */
     456        8350 :         *pFV++ = -1.647f;
     457             :     }
     458             :     else
     459             :     {
     460       88568 :         lepsP1 = logf( epsP[1] + 1e-5f );
     461       88568 :         ftmp = logf( epsP[13] ) - lepsP1;
     462       88568 :         *pFV++ = ftmp + hSpMusClas->past_epsP2;
     463       88568 :         hSpMusClas->past_epsP2 = ftmp;
     464             :     }
     465             : 
     466             :     /* calculation of differential normalized power spectrum */
     467       96918 :     sum_PS = 1e-5f;
     468     6590424 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     469             :     {
     470     6493506 :         sum_PS += PS[i];
     471             :     }
     472             : 
     473     6590424 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     474             :     {
     475     6493506 :         PS_norm[i] = PS[i] / sum_PS;
     476     6493506 :         dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] );
     477             :     }
     478             : 
     479             :     /* [10] ps_diff (spectral difference) */
     480       96918 :     ps_diff = 0;
     481     6590424 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     482             :     {
     483     6493506 :         ps_diff += dPS[i];
     484             :     }
     485             : 
     486       96918 :     ps_diff = logf( ps_diff + 1e-5f );
     487       96918 :     *pFV++ = ps_diff + hSpMusClas->past_ps_diff;
     488       96918 :     hSpMusClas->past_ps_diff = ps_diff;
     489             : 
     490             :     /* [11] ps_sta (spectral stationarity) */
     491       96918 :     ps_sta = 0;
     492     6590424 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
     493             :     {
     494     6493506 :         mx = PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] ? PS_norm[i] : hSpMusClas->past_PS[i - LOWEST_FBIN];
     495     6493506 :         ps_sta += mx / ( dPS[i] + 1e-5f );
     496             :     }
     497             : 
     498       96918 :     *pFV++ = logf( ps_sta + 1e-5f );
     499       96918 :     mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN );
     500             : 
     501             :     /*------------------------------------------------------------------*
     502             :      * Scaling of the feature vector
     503             :      *------------------------------------------------------------------*/
     504             : 
     505       96918 :     pFV = FV;
     506       96918 :     if ( st->bwidth == NB )
     507             :     {
     508        8350 :         pSF = SF_8k;
     509             :     }
     510             :     else
     511             :     {
     512       88568 :         pSF = SF;
     513             :     }
     514             : 
     515     1259934 :     for ( i = 0; i < N_FEATURES; i++, pFV++, pSF += 2 )
     516             :     {
     517     1163016 :         *pFV = pSF[0] * *pFV + pSF[1];
     518             :     }
     519             : 
     520             :     /* store some scaled parameters for later correction of the 1st stage speech/music classification */
     521       96918 :     *voi_fv = FV[1];
     522       96918 :     *cor_map_sum_fv = FV[7];
     523       96918 :     *LPCErr = FV[9];
     524             : 
     525             :     /*------------------------------------------------------------------*
     526             :      * Calculation of posterior probability
     527             :      * Log-probability
     528             :      *------------------------------------------------------------------*/
     529             : 
     530       96918 :     pys = pym = pyn = 1e-5f;
     531             : 
     532             :     /* run loop for all mixtures (for each mixture, calculate the probability of speech or noise and the probability of music) */
     533      678426 :     for ( k = 0; k < N_MIXTURES; k++ )
     534             :     {
     535             :         /* active frames - calculate the probability of speech */
     536     7559604 :         for ( p = 0; p < N_FEATURES; p++ )
     537             :         {
     538     6978096 :             xm[p] = FV[p] - m_speech[k * N_FEATURES + p];
     539             :         }
     540             : 
     541      581508 :         py = lvm_speech[k] + dot_product_mat( xm, &invV_speech[k * N_FEATURES * N_FEATURES], N_FEATURES );
     542      581508 :         pys += expf( py );
     543             :         /* inactive frames - calculate the probability of noise */
     544     7559604 :         for ( p = 0; p < N_FEATURES; p++ )
     545             :         {
     546     6978096 :             xm[p] = FV[p] - m_noise[k * N_FEATURES + p];
     547             :         }
     548             : 
     549      581508 :         py = lvm_noise[k] + dot_product_mat( xm, &invV_noise[k * N_FEATURES * N_FEATURES], N_FEATURES );
     550      581508 :         pyn += expf( py );
     551             : 
     552             :         /* either active or inactive frames - calculate the probability of music */
     553     7559604 :         for ( p = 0; p < N_FEATURES; p++ )
     554             :         {
     555     6978096 :             xm[p] = FV[p] - m_music[k * N_FEATURES + p];
     556             :         }
     557             : 
     558      581508 :         py = lvm_music[k] + dot_product_mat( xm, &invV_music[k * N_FEATURES * N_FEATURES], N_FEATURES );
     559      581508 :         pym += expf( py );
     560             :     }
     561             : 
     562             :     /* calculate log-probability */
     563       96918 :     lps = logf( pys ) - 0.5f * N_FEATURES * logf( PI2 );
     564       96918 :     lpm = logf( pym ) - 0.5f * N_FEATURES * logf( PI2 );
     565       96918 :     lpn = logf( pyn ) - 0.5f * N_FEATURES * logf( PI2 );
     566             : 
     567       96918 :     *high_lpn_flag = 0;
     568       96918 :     if ( lpn > lps && lpn > lpm )
     569             :     {
     570       14498 :         *high_lpn_flag = 1;
     571             :     }
     572             : 
     573       96918 :     if ( !vad )
     574             :     {
     575             :         /* artificially increase log-probability of noise */
     576       10848 :         lps = lpn * 1.2f;
     577             :     }
     578             : 
     579       96918 :     hSpMusClas->lpm = lpm;
     580       96918 :     hSpMusClas->lps = lps;
     581             : 
     582             :     /* determine HQ Generic speech class */
     583       96918 :     if ( st->hHQ_core != NULL )
     584             :     {
     585       96918 :         if ( lps > lpm + 0.5f )
     586             :         {
     587       46420 :             st->hHQ_core->hq_generic_speech_class = 1;
     588             :         }
     589             :         else
     590             :         {
     591       50498 :             st->hHQ_core->hq_generic_speech_class = 0;
     592             :         }
     593             :     }
     594             : 
     595             :     /*------------------------------------------------------------------*
     596             :      * State machine (sp_mus_state < 0 .. inactive, > 0 .. entry, = 0 .. active )
     597             :      *------------------------------------------------------------------*/
     598             : 
     599       96918 :     if ( vad )
     600             :     {
     601       86070 :         if ( relE < -20 || ( lps <= -5 && lpm <= -5 ) )
     602             :         {
     603        8236 :             if ( hSpMusClas->sp_mus_state > 0 )
     604             :             {
     605        1567 :                 if ( hSpMusClas->sp_mus_state < HANG_LEN )
     606             :                 {
     607             :                     /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
     608         132 :                     hSpMusClas->inact_cnt = 0;
     609             :                 }
     610             : 
     611             :                 /* energy is too low -> we are going to instable state */
     612        1567 :                 hSpMusClas->sp_mus_state = 0;
     613             :             }
     614        6669 :             else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
     615             :             {
     616             :                 /* energy is still too low -> we are still in instable state */
     617        3180 :                 hSpMusClas->sp_mus_state--;
     618             :             }
     619             :         }
     620       77834 :         else if ( hSpMusClas->sp_mus_state <= 0 )
     621             :         {
     622        2084 :             if ( hSpMusClas->inact_cnt == 0 )
     623             :             {
     624             : 
     625         741 :                 hSpMusClas->sp_mus_state = 1;
     626             :             }
     627             :             else
     628             :             {
     629             : 
     630        1343 :                 hSpMusClas->sp_mus_state = HANG_LEN;
     631             :             }
     632             : 
     633        2084 :             hSpMusClas->inact_cnt = 12;
     634             :         }
     635       75750 :         else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     636             :         {
     637             :             /* we are inside an entry period -> increment the counter of entry frames */
     638        4180 :             hSpMusClas->sp_mus_state++;
     639             :         }
     640             : 
     641       86070 :         if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
     642             :         {
     643        3776 :             hSpMusClas->inact_cnt--;
     644             :         }
     645             :     }
     646             :     else
     647             :     {
     648       10848 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     649             :         {
     650          50 :             hSpMusClas->inact_cnt = 0;
     651             :         }
     652       10798 :         else if ( hSpMusClas->inact_cnt > 0 )
     653             :         {
     654        1967 :             hSpMusClas->inact_cnt--;
     655             :         }
     656             : 
     657       10848 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     658             :         {
     659          50 :             hSpMusClas->sp_mus_state = -HANG_LEN;
     660             :         }
     661       10798 :         else if ( hSpMusClas->sp_mus_state > 0 )
     662             :         {
     663         264 :             hSpMusClas->sp_mus_state = -1;
     664             :         }
     665       10534 :         else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
     666             :         {
     667             :             /* we are in inactive state */
     668        1289 :             hSpMusClas->sp_mus_state--;
     669             :         }
     670             :     }
     671             : 
     672             :     /*------------------------------------------------------------------*
     673             :      * Decision without hangover
     674             :      * Weighted decision
     675             :      *------------------------------------------------------------------*/
     676             : 
     677             :     /* decision without hangover (0 - speech/noise, 1 - music) */
     678       96918 :     dec = lpm > lps;
     679       96918 :     dlp = lpm - lps;
     680             : 
     681       96918 :     if ( !vad )
     682             :     {
     683       10848 :         dec = 0;
     684       10848 :         dlp = 0;
     685             :     }
     686             : 
     687             :     /* calculate weight based on relE (close to 0.01 in low-E regions, close to 1 in high-E regions) */
     688       96918 :     wrelE = 1.0f + relE / 15;
     689             : 
     690       96918 :     if ( wrelE > 1.0f )
     691             :     {
     692       36086 :         wrelE = 1.0f;
     693             :     }
     694       60832 :     else if ( wrelE < 0.01f )
     695             :     {
     696       18229 :         wrelE = 0.01f;
     697             :     }
     698             : 
     699             :     /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
     700       96918 :     if ( dlp < 0 && dlp < hSpMusClas->past_dlp[0] )
     701             :     {
     702       24346 :         if ( hSpMusClas->past_dlp[0] > 0 )
     703             :         {
     704        8737 :             hSpMusClas->wdrop = -dlp;
     705             :         }
     706             :         else
     707             :         {
     708       15609 :             hSpMusClas->wdrop += hSpMusClas->past_dlp[0] - dlp;
     709             :         }
     710             :     }
     711             :     else
     712             :     {
     713       72572 :         hSpMusClas->wdrop = 0;
     714             :     }
     715             : 
     716       96918 :     wdrop = hSpMusClas->wdrop / 20;
     717             : 
     718       96918 :     if ( wdrop > 1.0f )
     719             :     {
     720           0 :         wdrop = 1.0f;
     721             :     }
     722       96918 :     else if ( wdrop < 0.1f )
     723             :     {
     724       83026 :         wdrop = 0.1f;
     725             :     }
     726             : 
     727             :     /* combine weights into one */
     728       96918 :     wght = wrelE * wdrop;
     729       96918 :     if ( wght < 0.01f )
     730             :     {
     731       20050 :         wght = 0.01f;
     732             :     }
     733             : 
     734             :     /* calculate weighted decision */
     735       96918 :     hSpMusClas->wdlp_0_95_sp = wght * dlp + ( 1 - wght ) * hSpMusClas->wdlp_0_95_sp;
     736             : 
     737       96918 :     if ( hSpMusClas->sp_mus_state == -HANG_LEN )
     738             :     {
     739       13184 :         hSpMusClas->wdlp_0_95_sp = 0;
     740             :     }
     741             : 
     742             :     /*------------------------------------------------------------------*
     743             :      * Final speech/music decision
     744             :      *------------------------------------------------------------------*/
     745             : 
     746       96918 :     if ( !vad && hSpMusClas->sp_mus_state == -HANG_LEN )
     747             :     {
     748             :         /* inactive state */
     749        9445 :         dec = 0;
     750             :     }
     751       87473 :     else if ( hSpMusClas->sp_mus_state <= 0 )
     752             :     {
     753             :         /* transition from active to inactive state or instable state */
     754        9639 :         dec = hSpMusClas->past_dec[0];
     755             :     }
     756       77834 :     else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
     757             :     {
     758             :         /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
     759        4362 :         ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp;
     760        4362 :         ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 );
     761        4362 :         dec = ftmp > 2.0f;
     762             :     }
     763             :     else
     764             :     {
     765             :         /* stable active state */
     766       73472 :         if ( hSpMusClas->wdlp_0_95_sp > 0 && hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 )
     767             :         {
     768             :             /* switching from speech to music */
     769         509 :             dec = 1;
     770             :         }
     771       72963 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < 0 )
     772             :         {
     773             :             /* switching from music to speech */
     774         473 :             dec = 0;
     775             :         }
     776             :         else
     777             :         {
     778       72490 :             dec = hSpMusClas->past_dec[0];
     779             :         }
     780             :     }
     781             : 
     782             :     /*------------------------------------------------------------------*
     783             :      * Updates
     784             :      *------------------------------------------------------------------*/
     785             : 
     786             :     /* update buffer of past non-binary decisions */
     787       96918 :     mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 );
     788       96918 :     hSpMusClas->past_dlp[0] = dlp;
     789             : 
     790             :     /* update buffer of past binary decisions */
     791       96918 :     mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
     792       96918 :     hSpMusClas->past_dec[0] = dec;
     793             : 
     794       96918 :     return dec;
     795             : }
     796             : 
     797             : 
     798             : /*---------------------------------------------------------------------*
     799             :  * sp_mus_classif_2nd()
     800             :  *
     801             :  * 2nd stage speech/music classifier (convert music to speech for onsets)
     802             :  *---------------------------------------------------------------------*/
     803             : 
     804       55005 : static void sp_mus_classif_2nd(
     805             :     Encoder_State *st,    /* i/o: encoder state structure     */
     806             :     const float Etot,     /* i  : total frame energy          */
     807             :     int16_t *attack_flag, /* i/o: attack flag (GSC or TC)     */
     808             :     const float *inp      /* i  : input signal                */
     809             : )
     810             : {
     811             :     int16_t attack;
     812       55005 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
     813             : 
     814             :     /* initialization */
     815       55005 :     *attack_flag = 0;
     816             : 
     817             :     /* signal stability estimation */
     818       55005 :     stab_est( Etot, hSpMusClas->gsc_lt_diff_etot, &hSpMusClas->gsc_mem_etot, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
     819             : 
     820             :     /* calculate variance of correlation */
     821       55005 :     var_cor_calc( st->old_corr, &hSpMusClas->mold_corr, hSpMusClas->var_cor_t, &hSpMusClas->high_stable_cor );
     822             : 
     823             :     /* attack detection */
     824       55005 :     attack = attack_det( inp, st->clas, st->localVAD, st->coder_type, st->total_brate, EVS_MONO, st->clas, hSpMusClas->finc_prev, &hSpMusClas->lt_finc, &hSpMusClas->last_strong_attack );
     825             : 
     826             :     /* change decision from music to speech in certain special cases */
     827       55005 :     if ( st->sp_aud_decision1 == 1 )
     828             :     {
     829       18301 :         if ( hSpMusClas->ener_RAT < 0.18f && hSpMusClas->lt_dec_thres > 15.0f )
     830             :         {
     831             :             /* strong music decision but almost no content below 1kHz */
     832           0 :             st->sp_aud_decision2 = 0;
     833             :         }
     834       18301 :         else if ( hSpMusClas->high_stable_cor && st->pitch[0] >= 130 )
     835             :         {
     836             :             /* prevent GSC in highly correlated signal with low energy variation */
     837             :             /* this is basically a patch against bassoon-type of music */
     838          53 :             st->sp_aud_decision2 = 0;
     839             : 
     840          53 :             if ( st->codec_mode == MODE1 && st->coder_type == TRANSITION )
     841             :             {
     842           0 :                 st->coder_type = GENERIC;
     843             :             }
     844             :         }
     845       18248 :         else if ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] > 4.5f && ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] - hSpMusClas->gsc_lt_diff_etot[MAX_LT - 2] > 10.0f ) )
     846             :         {
     847         328 :             if ( st->tc_cnt == 1 )
     848             :             {
     849             :                 /* do TC coding instead of GC/VC if onset has been already declared before */
     850          14 :                 st->sp_aud_decision2 = 0;
     851             : 
     852          14 :                 if ( st->codec_mode == MODE1 )
     853             :                 {
     854          14 :                     st->coder_type = TRANSITION;
     855             :                 }
     856             :             }
     857             :             else
     858             :             {
     859         314 :                 if ( attack >= ATT_3LSUB_POS )
     860             :                 {
     861             :                     /* do TC coding if attack is located in the last subframe */
     862          91 :                     st->sp_aud_decision2 = 0;
     863          91 :                     *attack_flag = attack + 1;
     864             : 
     865          91 :                     if ( st->codec_mode == MODE1 )
     866             :                     {
     867          90 :                         st->coder_type = TRANSITION;
     868             :                     }
     869             :                 }
     870         223 :                 else if ( attack >= ATT_SEG_LEN / 2 )
     871             :                 {
     872             :                     /* do GSC coding if attack is located after the first quarter of the first subframe */
     873             :                     /* (pre-echo will be treated at the decoder side) */
     874           1 :                     st->sp_aud_decision2 = 1;
     875           1 :                     *attack_flag = 31;
     876             :                 }
     877             :             }
     878             :         }
     879             :     }
     880       36704 :     else if ( st->localVAD == 1 && st->coder_type == GENERIC && ( ( attack >= ATT_3LSUB_POS && st->total_brate < ACELP_24k40 ) || ( attack >= ATT_3LSUB_POS_16k && st->total_brate >= ACELP_24k40 && st->total_brate < ACELP_48k ) ) )
     881             :     {
     882             :         /* do TC coding if attack is located in the last subframe */
     883         644 :         *attack_flag = attack + 1;
     884             : 
     885         644 :         if ( st->codec_mode == MODE1 )
     886             :         {
     887         524 :             st->coder_type = TRANSITION;
     888             :         }
     889             :     }
     890             : 
     891       55005 :     return;
     892             : }
     893             : 
     894             : 
     895             : /*---------------------------------------------------------------------*
     896             :  * tonal_det()
     897             :  *
     898             :  * Tonal detector based on spectral stability and harmonicity
     899             :  *---------------------------------------------------------------------*/
     900             : 
     901     4454425 : static float tonal_det(
     902             :     const float S[],
     903             :     int16_t vad_flag,
     904             :     float tod_S_map_lt[],
     905             :     float *tod_thr_lt,
     906             :     float *tod_weight,
     907             :     float *tod_S_mass_prev,
     908             :     float *tod_S_mass_lt )
     909             : {
     910             :     int16_t i;
     911             :     float S_mass, alpha;
     912             : 
     913             :     /* update the adaptive weight */
     914     4454425 :     *tod_weight = TON_ALPHA * *tod_weight + ( 1 - TON_ALPHA ) * vad_flag;
     915     4454425 :     if ( *tod_weight > TON_ALPHA )
     916             :     {
     917     2538071 :         *tod_weight = TON_ALPHA;
     918             :     }
     919     1916354 :     else if ( *tod_weight < ( 1 - TON_ALPHA ) )
     920             :     {
     921      666807 :         *tod_weight = 1 - TON_ALPHA;
     922             :     }
     923             : 
     924             :     /* calculate LT spectral correlation in each band up to 4KHz */
     925     4454425 :     S_mass = 0.0f;
     926   360808425 :     for ( i = 0; i < TOD_NSPEC; i++ )
     927             :     {
     928   356354000 :         tod_S_map_lt[i] = *tod_weight * tod_S_map_lt[i] + ( 1 - *tod_weight ) * S[i];
     929             : 
     930   356354000 :         S_mass += tod_S_map_lt[i];
     931             :     }
     932     4454425 :     S_mass /= TOD_NSPEC;
     933             : 
     934     4454425 :     if ( S_mass > *tod_S_mass_prev )
     935             :     {
     936     2111620 :         alpha = 0.7f;
     937             :     }
     938             :     else
     939             :     {
     940     2342805 :         alpha = 0.3f;
     941             :     }
     942     4454425 :     *tod_S_mass_prev = S_mass;
     943     4454425 :     *tod_S_mass_lt = alpha * *tod_S_mass_lt + ( 1 - alpha ) * S_mass;
     944     4454425 :     S_mass = *tod_S_mass_lt;
     945             : 
     946             :     /* updating adaptive decision threshold */
     947     4454425 :     if ( S_mass > *tod_thr_lt )
     948             :     {
     949       73464 :         *tod_thr_lt -= THR_MASS_STEP_DN;
     950             :     }
     951             :     else
     952             :     {
     953     4380961 :         *tod_thr_lt += THR_MASS_STEP_UP;
     954             :     }
     955             : 
     956     4454425 :     if ( *tod_thr_lt > THR_MASS_MAX )
     957             :     {
     958     4369211 :         *tod_thr_lt = THR_MASS_MAX;
     959             :     }
     960             : 
     961     4454425 :     if ( *tod_thr_lt < THR_MASS_MIN )
     962             :     {
     963       66593 :         *tod_thr_lt = THR_MASS_MIN;
     964             :     }
     965             : 
     966     4454425 :     return S_mass;
     967             : }
     968             : 
     969             : /*---------------------------------------------------------------------*
     970             :  * var_cor_calc()
     971             :  *
     972             :  * Calculate variance of correlation
     973             :  *---------------------------------------------------------------------*/
     974             : 
     975     4509430 : static void var_cor_calc(
     976             :     const float old_corr,
     977             :     float *mold_corr,
     978             :     float var_cor_t[],
     979             :     int16_t *high_stable_cor )
     980             : {
     981             :     int16_t i;
     982             :     float var_cor;
     983             : 
     984             :     /* update buffer of old correlation values */
     985    45094300 :     for ( i = VAR_COR_LEN - 1; i > 0; i-- )
     986             :     {
     987    40584870 :         var_cor_t[i] = var_cor_t[i - 1];
     988             :     }
     989     4509430 :     var_cor_t[i] = old_corr;
     990             : 
     991             :     /* calculate variance of correlation */
     992     4509430 :     var_cor = var( var_cor_t, VAR_COR_LEN );
     993             : 
     994             :     /* set flag in case of highly-correlated stable signal */
     995     4509430 :     if ( *mold_corr > 0.8f && var_cor < 5e-4f )
     996             :     {
     997      319083 :         *high_stable_cor = 1;
     998             :     }
     999             :     else
    1000             :     {
    1001     4190347 :         *high_stable_cor = 0;
    1002             :     }
    1003             : 
    1004             :     /* update average correlation */
    1005     4509430 :     *mold_corr = 0.1f * old_corr + 0.9f * *mold_corr;
    1006             : 
    1007     4509430 :     return;
    1008             : }
    1009             : 
    1010             : /*---------------------------------------------------------------------*
    1011             :  * attack_det()
    1012             :  *
    1013             :  * Attack detection
    1014             :  *---------------------------------------------------------------------*/
    1015             : 
    1016     4509430 : static int16_t attack_det(
    1017             :     const float *inp,           /* i  : input signal                           */
    1018             :     const int16_t last_clas,    /* i  : last signal clas                       */
    1019             :     const int16_t localVAD,     /* i  : local VAD flag                         */
    1020             :     const int16_t coder_type,   /* i  : coder type                             */
    1021             :     const int32_t total_brate,  /* i  : total bitrate                          */
    1022             :     const int16_t element_mode, /* i  : IVAS element mode                      */
    1023             :     const int16_t clas,         /* i  : signal class                           */
    1024             :     float finc_prev[],          /* i/o: previous finc                          */
    1025             :     float *lt_finc,             /* i/o: long-term mean finc                    */
    1026             :     int16_t *last_strong_attack /* i/o: last strong attack flag                */
    1027             : )
    1028             : {
    1029             :     int16_t i, attack;
    1030             :     float etmp, etmp2, finc[ATT_NSEG];
    1031             :     int16_t att_3lsub_pos;
    1032             :     int16_t attack1;
    1033             : 
    1034     4509430 :     att_3lsub_pos = ATT_3LSUB_POS;
    1035     4509430 :     if ( total_brate >= ACELP_24k40 )
    1036             :     {
    1037       22364 :         att_3lsub_pos = ATT_3LSUB_POS_16k;
    1038             :     }
    1039             : 
    1040             :     /* compute energy per section */
    1041   148811190 :     for ( i = 0; i < ATT_NSEG; i++ )
    1042             :     {
    1043   144301760 :         finc[i] = sum2_f( inp + i * ATT_SEG_LEN, ATT_SEG_LEN );
    1044             :     }
    1045             : 
    1046     4509430 :     attack = maximum( finc, ATT_NSEG, &etmp );
    1047     4509430 :     attack1 = attack;
    1048             : 
    1049     4509430 :     if ( localVAD == 1 && coder_type == GENERIC )
    1050             :     {
    1051             :         /* compute mean energy in the first three subframes */
    1052     1852547 :         etmp = mean( finc, att_3lsub_pos );
    1053             : 
    1054             :         /* compute mean energy after the attack */
    1055     1852547 :         etmp2 = mean( finc + attack, ATT_NSEG - attack );
    1056             : 
    1057             :         /* and compare them */
    1058     1852547 :         if ( etmp * 8 > etmp2 )
    1059             :         {
    1060             :             /* stop, if the attack is not sufficiently strong */
    1061     1790164 :             attack = 0;
    1062             :         }
    1063             : 
    1064     1852547 :         if ( last_clas == VOICED_CLAS && etmp * 20 > etmp2 )
    1065             :         {
    1066             :             /* stop, if the signal was voiced and the attack is not sufficiently strong */
    1067      437968 :             attack = 0;
    1068             :         }
    1069             : 
    1070             :         /* compare wrt. other sections (reduces miss-classification) */
    1071     1852547 :         if ( attack > 0 )
    1072             :         {
    1073       54845 :             etmp2 = finc[attack];
    1074             : 
    1075     1135562 :             for ( i = 2; i < att_3lsub_pos - 2; i++ )
    1076             :             {
    1077     1083303 :                 if ( finc[i] * 2.0f > etmp2 )
    1078             :                 {
    1079             :                     /* stop, if the attack is not sufficiently strong */
    1080        2586 :                     attack = 0;
    1081        2586 :                     break;
    1082             :                 }
    1083             :             }
    1084             :         }
    1085             : 
    1086     1852547 :         if ( attack == 0 && element_mode > EVS_MONO && ( clas < VOICED_TRANSITION || clas == ONSET ) )
    1087             :         {
    1088     1210945 :             mvr2r( finc, finc_prev, attack1 );
    1089             : 
    1090             :             /* compute mean energy before the attack */
    1091     1210945 :             etmp = mean( finc_prev, ATT_NSEG );
    1092             : 
    1093     1210945 :             etmp2 = finc[attack1];
    1094             : 
    1095     1210945 :             if ( ( etmp * 16 < etmp2 ) || ( etmp * 12 < etmp2 && last_clas == UNVOICED_CLAS ) )
    1096             :             {
    1097       57645 :                 attack = attack1;
    1098             :             }
    1099             : 
    1100     1210945 :             if ( 20 * *lt_finc > etmp2 || *last_strong_attack )
    1101             :             {
    1102     1127071 :                 attack = 0;
    1103             :             }
    1104             :         }
    1105             : 
    1106     1852547 :         *last_strong_attack = attack;
    1107             :     }
    1108             : 
    1109             :     /* compare wrt. other sections (reduces miss-classification) */
    1110     2656883 :     else if ( attack > 0 )
    1111             :     {
    1112    30391398 :         for ( i = 2; i < att_3lsub_pos - 2; i++ )
    1113             :         {
    1114    29466946 :             if ( i != attack && finc[i] * 1.3f > finc[attack] )
    1115             :             {
    1116             :                 /* stop, if the attack is not sufficiently strong */
    1117     1483878 :                 attack = 0;
    1118     1483878 :                 break;
    1119             :             }
    1120             :         }
    1121     2408330 :         *last_strong_attack = 0;
    1122             :     }
    1123             : 
    1124             :     /* updates */
    1125     4509430 :     mvr2r( finc, finc_prev, ATT_NSEG );
    1126     4509430 :     *lt_finc = 0.95f * *lt_finc + 0.05f * mean( finc, ATT_NSEG );
    1127             : 
    1128     4509430 :     return attack;
    1129             : }
    1130             : 
    1131             : /*---------------------------------------------------------------------*
    1132             :  * ivas_smc_gmm()
    1133             :  *
    1134             :  * 1st stage of the speech/music classification (based on the GMM model)
    1135             :  *---------------------------------------------------------------------*/
    1136             : 
    1137             : /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */
    1138    16112682 : int16_t ivas_smc_gmm(
    1139             :     Encoder_State *st,                    /* i/o: state structure                                     */
    1140             :     STEREO_CLASSIF_HANDLE hStereoClassif, /* i/o: stereo classifier structure                         */
    1141             :     const int16_t localVAD_HE_SAD,        /* i  : HE-SAD flag without hangover                        */
    1142             :     const float Etot,                     /* i  : total frame energy                                  */
    1143             :     const float lsp_new[M],               /* i  : LSPs in current frame                               */
    1144             :     const float cor_map_sum,              /* i  : correlation map sum (from multi-harmonic anal.)     */
    1145             :     const float epsP[M + 1],              /* i  : LP prediciton error                                 */
    1146             :     const float PS[],                     /* i  : energy spectrum                                     */
    1147             :     const float non_sta,                  /* i  : unbound non-stationarity                            */
    1148             :     const float relE,                     /* i  : relative frame energy                               */
    1149             :     int16_t *high_lpn_flag,               /* i/o: sp/mus LPN flag                                     */
    1150             :     const int16_t flag_spitch             /* i  : flag to indicate very short stable pitch            */
    1151             : )
    1152             : {
    1153             :     int16_t i, m, dec;
    1154             :     int16_t flag_odv;
    1155             :     float lps, lpm, lpn;
    1156             :     float ps[N_SMC_MIXTURES], pm[N_SMC_MIXTURES], pn[N_SMC_MIXTURES];
    1157             :     float fvm[N_PCA_COEF], lprob;
    1158             :     float dlp, ftmp, sum_PS, ps_diff, ps_sta, wrelE, wdrop, wght;
    1159             :     float wrise;
    1160             :     float dlp_mean2var;
    1161             :     float FV[N_SMC_FEATURES], *pFV, PS_norm[128], dPS[128];
    1162             :     const float *pODV;
    1163             :     float *pFV_st, smc_st_mean_fact;
    1164             :     int16_t relE_attack_flag;
    1165             :     int16_t j, len;
    1166             :     const float *pt_mel_fb;
    1167             :     float melS[NB_MEL_BANDS], mfcc[NB_MEL_BANDS];
    1168             :     int16_t odv_cnt;
    1169             :     int16_t i_out[N_SMC_FEATURES], *p_out;
    1170             : 
    1171             :     /*------------------------------------------------------------------*
    1172             :      * Initialization
    1173             :      *------------------------------------------------------------------*/
    1174             : 
    1175    16112682 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    1176             : 
    1177             :     /*------------------------------------------------------------------*
    1178             :      * State machine (sp_mus_state: -8 = INACTIVE, -7:-1 = UNSTABLE, 0:7 = ENTRY, 8 = STABLE )
    1179             :      *------------------------------------------------------------------*/
    1180             : 
    1181    16112682 :     if ( localVAD_HE_SAD )
    1182             :     {
    1183    12897285 :         if ( relE < -20 )
    1184             :         {
    1185      749785 :             if ( hSpMusClas->sp_mus_state > 0 )
    1186             :             {
    1187      131355 :                 if ( hSpMusClas->sp_mus_state < HANG_LEN )
    1188             :                 {
    1189             :                     /* energy is too low but we are in entry period -> reset the inactive counter to allow new entry later */
    1190       31121 :                     hSpMusClas->inact_cnt = 0;
    1191             :                 }
    1192             : 
    1193             :                 /* energy is too low -> we are going to instable state */
    1194      131355 :                 hSpMusClas->sp_mus_state = 0;
    1195             :             }
    1196      618430 :             else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
    1197             :             {
    1198             :                 /* energy is still too low -> we are still in instable state */
    1199      269743 :                 hSpMusClas->sp_mus_state--;
    1200             :             }
    1201             :         }
    1202    12147500 :         else if ( hSpMusClas->sp_mus_state <= 0 )
    1203             :         {
    1204      303231 :             if ( hSpMusClas->inact_cnt == 0 )
    1205             :             {
    1206             : 
    1207      182328 :                 hSpMusClas->sp_mus_state = 1;
    1208             :             }
    1209             :             else
    1210             :             {
    1211             : 
    1212      120903 :                 hSpMusClas->sp_mus_state = HANG_LEN;
    1213             :             }
    1214             : 
    1215      303231 :             hSpMusClas->inact_cnt = 12;
    1216             :         }
    1217    11844269 :         else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1218             :         {
    1219             :             /* we are inside an entry period -> increment the counter of entry frames */
    1220      775328 :             hSpMusClas->sp_mus_state++;
    1221             :         }
    1222             : 
    1223    12897285 :         if ( hSpMusClas->sp_mus_state < 0 && hSpMusClas->inact_cnt > 0 )
    1224             :         {
    1225      251694 :             hSpMusClas->inact_cnt--;
    1226             :         }
    1227             :     }
    1228             :     else
    1229             :     {
    1230     3215397 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1231             :         {
    1232       16662 :             hSpMusClas->inact_cnt = 0;
    1233             :         }
    1234     3198735 :         else if ( hSpMusClas->inact_cnt > 0 )
    1235             :         {
    1236      384664 :             hSpMusClas->inact_cnt--;
    1237             :         }
    1238             : 
    1239     3215397 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1240             :         {
    1241       16662 :             hSpMusClas->sp_mus_state = -HANG_LEN;
    1242             :         }
    1243     3198735 :         else if ( hSpMusClas->sp_mus_state > 0 )
    1244             :         {
    1245       52236 :             hSpMusClas->sp_mus_state = -1;
    1246             :         }
    1247     3146499 :         else if ( hSpMusClas->sp_mus_state > -HANG_LEN )
    1248             :         {
    1249             :             /* we are in inactive state */
    1250      263410 :             hSpMusClas->sp_mus_state--;
    1251             :         }
    1252             :     }
    1253             : 
    1254             :     /* detect attacks based on relE */
    1255    16112682 :     if ( relE > hSpMusClas->prev_relE )
    1256             :     {
    1257     6351905 :         hSpMusClas->relE_attack_sum += relE - hSpMusClas->prev_relE;
    1258             :     }
    1259             :     else
    1260             :     {
    1261     9760777 :         hSpMusClas->relE_attack_sum = 0;
    1262             :     }
    1263    16112682 :     hSpMusClas->prev_relE = relE;
    1264             : 
    1265             :     /* update counter from last VAD 0->1 change */
    1266    16112682 :     if ( hSpMusClas->prev_vad == 0 && localVAD_HE_SAD == 1 )
    1267             :     {
    1268      229314 :         hSpMusClas->vad_0_1_cnt = 1;
    1269             :     }
    1270    15883368 :     else if ( localVAD_HE_SAD == 1 && hSpMusClas->vad_0_1_cnt > 0 && hSpMusClas->vad_0_1_cnt < 50 )
    1271             :     {
    1272     3417131 :         hSpMusClas->vad_0_1_cnt++;
    1273             :     }
    1274             :     else
    1275             :     {
    1276    12466237 :         hSpMusClas->vad_0_1_cnt = 0;
    1277             :     }
    1278    16112682 :     hSpMusClas->prev_vad = localVAD_HE_SAD;
    1279             : 
    1280    16112682 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && hSpMusClas->relE_attack_sum > 5.0f )
    1281             :     {
    1282      263534 :         hSpMusClas->relE_attack_cnt++;
    1283             : 
    1284             :         /* set flag only in the first X frames in a series */
    1285      263534 :         if ( hSpMusClas->relE_attack_cnt > 0 && hSpMusClas->relE_attack_cnt < 3 )
    1286             :         {
    1287      203948 :             relE_attack_flag = 1;
    1288             :         }
    1289             :         else
    1290             :         {
    1291       59586 :             relE_attack_flag = 0;
    1292             :         }
    1293             :     }
    1294             :     else
    1295             :     {
    1296    15849148 :         hSpMusClas->relE_attack_cnt = 0;
    1297    15849148 :         relE_attack_flag = 0;
    1298             :     }
    1299             : 
    1300    16112682 :     hSpMusClas->prev_Etot = Etot;
    1301             : 
    1302             :     /*------------------------------------------------------------------*
    1303             :      * Preparation of the feature vector
    1304             :      *------------------------------------------------------------------*/
    1305             : 
    1306    16112682 :     pFV = FV;
    1307             : 
    1308             :     /* [0] OL pitch */
    1309    16112682 :     if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 )
    1310             :     {
    1311     1181703 :         *pFV++ = (float) st->pitch[2];
    1312             :     }
    1313             :     else
    1314             :     {
    1315    14930979 :         *pFV++ = (float) ( st->pitch[0] + st->pitch[1] + st->pitch[2] ) / 3.0f;
    1316             :     }
    1317             : 
    1318             :     /* [1] voicing */
    1319    16112682 :     if ( relE_attack_flag || st->tc_cnt == 1 || st->tc_cnt == 2 )
    1320             :     {
    1321     1181703 :         *pFV++ = st->voicing[2];
    1322             :     }
    1323             :     else
    1324             :     {
    1325    14930979 :         *pFV++ = ( st->voicing[0] + st->voicing[1] + st->voicing[2] ) / 3.0f;
    1326             :     }
    1327             : 
    1328             :     /* [2,3,4,5,6] LSFs */
    1329    16112682 :     *pFV++ = acosf( lsp_new[2] );
    1330    16112682 :     *pFV++ = acosf( lsp_new[3] );
    1331    16112682 :     *pFV++ = acosf( lsp_new[4] );
    1332    16112682 :     *pFV++ = acosf( lsp_new[5] );
    1333    16112682 :     *pFV++ = acosf( lsp_new[6] );
    1334             : 
    1335             :     /* [7] cor_map_sum */
    1336    16112682 :     *pFV++ = cor_map_sum;
    1337             : 
    1338             :     /* [8] non_sta */
    1339    16112682 :     *pFV++ = non_sta;
    1340             : 
    1341             :     /* [9] epsP */
    1342    16112682 :     *pFV++ = logf( epsP[14] + 1e-5f ) - logf( epsP[0] + 1e-5f );
    1343             : 
    1344             :     /* [10,11,12] MFCCs */
    1345    16112682 :     set_zero( melS, NB_MEL_BANDS );
    1346    16112682 :     pt_mel_fb = mel_fb;
    1347   660619962 :     for ( i = 0; i < NB_MEL_BANDS; i++ )
    1348             :     {
    1349   644507280 :         j = mel_fb_start[i];
    1350   644507280 :         len = mel_fb_len[i];
    1351   644507280 :         melS[i] = logf( dotp( &PS[j], pt_mel_fb, len ) + 1e-5f );
    1352   644507280 :         pt_mel_fb += len;
    1353             :     }
    1354             : 
    1355    16112682 :     v_mult_mat( mfcc, melS, dct_mtx, NB_MEL_BANDS, NB_MEL_COEF );
    1356             : 
    1357    16112682 :     *pFV++ = mfcc[2];
    1358    16112682 :     *pFV++ = mfcc[6];
    1359    16112682 :     *pFV++ = mfcc[12];
    1360             : 
    1361             :     /* calculation of differential normalized power spectrum */
    1362    16112682 :     sum_PS = 1e-5f;
    1363  1095662376 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1364             :     {
    1365  1079549694 :         sum_PS += PS[i];
    1366             :     }
    1367             : 
    1368  1095662376 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1369             :     {
    1370  1079549694 :         PS_norm[i] = PS[i] / sum_PS;
    1371  1079549694 :         dPS[i] = fabsf( PS_norm[i] - hSpMusClas->past_PS[i - LOWEST_FBIN] );
    1372             :     }
    1373             : 
    1374             :     /* [13] ps_diff (spectral difference) */
    1375    16112682 :     ps_diff = 0;
    1376  1095662376 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1377             :     {
    1378  1079549694 :         ps_diff += dPS[i];
    1379             :     }
    1380             : 
    1381    16112682 :     *pFV++ = ps_diff;
    1382             : 
    1383             :     /* [14] ps_sta (spectral stationarity) */
    1384    16112682 :     ps_sta = 0;
    1385  1095662376 :     for ( i = LOWEST_FBIN; i < HIGHEST_FBIN; i++ )
    1386             :     {
    1387  1079549694 :         if ( PS_norm[i] > hSpMusClas->past_PS[i - LOWEST_FBIN] )
    1388             :         {
    1389   505460463 :             ps_sta += PS_norm[i] / ( dPS[i] + 1e-5f );
    1390             :         }
    1391             :         else
    1392             :         {
    1393   574089231 :             ps_sta += hSpMusClas->past_PS[i - LOWEST_FBIN] / ( dPS[i] + 1e-5f );
    1394             :         }
    1395             :     }
    1396             : 
    1397    16112682 :     *pFV++ = logf( ps_sta + 1e-5f );
    1398    16112682 :     mvr2r( &PS_norm[LOWEST_FBIN], hSpMusClas->past_PS, HIGHEST_FBIN - LOWEST_FBIN );
    1399             : 
    1400             :     /* save ps_diff and ps_sta features for XTALK and UNCLR classifier */
    1401    16112682 :     if ( hStereoClassif != NULL )
    1402             :     {
    1403    11910154 :         if ( st->idchan == 0 )
    1404             :         {
    1405     6355819 :             hStereoClassif->ps_diff_ch1 = ps_diff;
    1406     6355819 :             hStereoClassif->ps_sta_ch1 = logf( ps_sta + 1e-5f );
    1407             :         }
    1408             :         else
    1409             :         {
    1410     5554335 :             hStereoClassif->ps_diff_ch2 = ps_diff;
    1411     5554335 :             hStereoClassif->ps_sta_ch2 = logf( ps_sta + 1e-5f );
    1412             :         }
    1413             :     }
    1414             : 
    1415             :     /*------------------------------------------------------------------*
    1416             :      * Outlier detection based on feature histograms
    1417             :      *------------------------------------------------------------------*/
    1418             : 
    1419    16112682 :     flag_odv = 0;
    1420    16112682 :     if ( localVAD_HE_SAD )
    1421             :     {
    1422    12897285 :         pFV = FV;
    1423    12897285 :         pODV = hout_intervals;
    1424    12897285 :         p_out = i_out;
    1425    12897285 :         odv_cnt = 0;
    1426   206356560 :         for ( i = 0; i < N_SMC_FEATURES; i++ )
    1427             :         {
    1428   193459275 :             if ( *pFV < pODV[0] || *pFV > pODV[1] )
    1429             :             {
    1430      160343 :                 *p_out++ = i;
    1431      160343 :                 odv_cnt++;
    1432             :             }
    1433             : 
    1434   193459275 :             pFV++;
    1435   193459275 :             pODV += 2;
    1436             :         }
    1437             : 
    1438             :         /* set outlier flag */
    1439    12897285 :         if ( odv_cnt >= 2 )
    1440             :         {
    1441       43865 :             flag_odv = 1;
    1442             : 
    1443             :             /* replace outlying features with values from the previous frame */
    1444      141920 :             for ( i = 0; i < odv_cnt; i++ )
    1445             :             {
    1446       98055 :                 FV[i_out[i]] = hSpMusClas->prev_FV[i_out[i]];
    1447             :             }
    1448             :         }
    1449             :     }
    1450             : 
    1451             :     /*------------------------------------------------------------------*
    1452             :      * Adaptive short-term mean filter on feature vector
    1453             :      *------------------------------------------------------------------*/
    1454             : 
    1455    16112682 :     pFV = FV;
    1456    16112682 :     pFV_st = hSpMusClas->FV_st;
    1457    16112682 :     smc_st_mean_fact = SMC_ST_MEAN_FACT;
    1458   257802912 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1459             :     {
    1460   241690230 :         *pFV_st = smc_st_mean_fact * ( *pFV_st ) + ( 1 - smc_st_mean_fact ) * ( *pFV );
    1461             : 
    1462   241690230 :         if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) )
    1463             :         {
    1464             :             /* strong attack or outlier frame during entry state -> features cannot be trusted but there is also no useful past info -> */
    1465             :             /* -> do whatever you want because dlp will be reset to 0 anyway */
    1466     3079455 :             pFV++;
    1467     3079455 :             pFV_st++;
    1468             :         }
    1469   238610775 :         else if ( hSpMusClas->sp_mus_state == HANG_LEN && ( st->tc_cnt == 1 || st->tc_cnt == 2 ) )
    1470             :         {
    1471             :             /* energy attack in stable state -> use current features intead of the long-term average */
    1472    13532160 :             pFV++;
    1473    13532160 :             pFV_st++;
    1474             :         }
    1475             :         else
    1476             :         {
    1477   225078615 :             *pFV++ = *pFV_st++;
    1478             :         }
    1479             :     }
    1480             : 
    1481             :     /* update */
    1482    16112682 :     mvr2r( FV, hSpMusClas->prev_FV, N_SMC_FEATURES );
    1483             : 
    1484             :     /*------------------------------------------------------------------*
    1485             :      * Non-linear power transformation (boxcox) on certain features
    1486             :      *------------------------------------------------------------------*/
    1487             : 
    1488    16112682 :     pFV = FV;
    1489   257802912 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1490             :     {
    1491   241690230 :         if ( bcox_lmbd[i] != 0 )
    1492             :         {
    1493    48338046 :             *pFV -= bcox_add_cnst[i];
    1494    48338046 :             if ( *pFV < 1 )
    1495             :             {
    1496     2873226 :                 *pFV = 1;
    1497             :             }
    1498    48338046 :             *pFV = ( powf( *pFV, bcox_lmbd[i] ) - 1 ) / bcox_lmbd[i];
    1499             :         }
    1500             : 
    1501   241690230 :         pFV++;
    1502             :     }
    1503             : 
    1504             :     /*------------------------------------------------------------------*
    1505             :      * Scaling of the feature vector
    1506             :      * PCA
    1507             :      *------------------------------------------------------------------*/
    1508             : 
    1509    16112682 :     pFV = FV;
    1510   257802912 :     for ( i = 0; i < N_SMC_FEATURES; i++ )
    1511             :     {
    1512             :         /* Standard scaler - mean and variance normalization */
    1513   241690230 :         *pFV = ( *pFV - sm_means[i] ) / sm_scale[i];
    1514   241690230 :         pFV++;
    1515             : 
    1516             :         /* MinMax sclaer - mean and variance normalization */
    1517             :         /**pFV = *pFV * sm_scale[i] + sm_min[i];*/
    1518             :         /*pFV++;*/
    1519             :     }
    1520             : 
    1521             :     /* PCA */
    1522    16112682 :     v_sub( FV, pca_mean_, FV, N_SMC_FEATURES );
    1523    16112682 :     v_mult_mat( FV, FV, pca_components_, N_SMC_FEATURES, N_PCA_COEF );
    1524             : 
    1525             :     /*------------------------------------------------------------------*
    1526             :      * Calculation of posterior probability
    1527             :      * Log-probability
    1528             :      *------------------------------------------------------------------*/
    1529             : 
    1530             :     /* run loop for all mixtures (for each mixture, calculate the probability of speech, music and noise) */
    1531    16112682 :     lps = lpm = lpn = 0;
    1532   112788774 :     for ( m = 0; m < N_SMC_MIXTURES; m++ )
    1533             :     {
    1534    96676092 :         v_sub( FV, &means_speech[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1535    96676092 :         lprob = dot_product_cholesky( fvm, &prec_chol_speech[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1536    96676092 :         ps[m] = logf( weights_speech[m] ) + log_det_chol_speech[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1537             : 
    1538    96676092 :         v_sub( FV, &means_music[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1539    96676092 :         lprob = dot_product_cholesky( fvm, &prec_chol_music[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1540    96676092 :         pm[m] = logf( weights_music[m] ) + log_det_chol_music[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1541             : 
    1542    96676092 :         v_sub( FV, &means_noise[m * N_PCA_COEF], fvm, N_PCA_COEF );
    1543    96676092 :         lprob = dot_product_cholesky( fvm, &prec_chol_noise[m * ( N_PCA_COEF * N_PCA_COEF + N_PCA_COEF ) / 2], N_PCA_COEF );
    1544    96676092 :         pn[m] = logf( weights_noise[m] ) + log_det_chol_noise[m] - 0.5f * N_PCA_COEF * logf( PI2 ) - 0.5f * lprob;
    1545             :     }
    1546             : 
    1547    16112682 :     lps = logsumexp( ps, N_SMC_MIXTURES );
    1548    16112682 :     lpm = logsumexp( pm, N_SMC_MIXTURES );
    1549    16112682 :     lpn = logsumexp( pn, N_SMC_MIXTURES );
    1550             : 
    1551    16112682 :     *high_lpn_flag = 0;
    1552    16112682 :     if ( lpn > lps && lpn > lpm )
    1553             :     {
    1554     1944511 :         *high_lpn_flag = 1;
    1555             :     }
    1556             : 
    1557    16112682 :     hSpMusClas->lpm = lpm;
    1558    16112682 :     hSpMusClas->lps = lps;
    1559    16112682 :     hSpMusClas->lpn = lpn;
    1560             : 
    1561             :     /* determine HQ Generic speech class */
    1562    16112682 :     if ( st->hHQ_core != NULL )
    1563             :     {
    1564     4570166 :         if ( lps > lpm + 0.5f )
    1565             :         {
    1566     2057970 :             st->hHQ_core->hq_generic_speech_class = 1;
    1567             :         }
    1568             :         else
    1569             :         {
    1570     2512196 :             st->hHQ_core->hq_generic_speech_class = 0;
    1571             :         }
    1572             :     }
    1573             : 
    1574             :     /*------------------------------------------------------------------*
    1575             :      * Decision without hangover
    1576             :      * Weighted decision
    1577             :      *------------------------------------------------------------------*/
    1578             : 
    1579             :     /* decision without hangover (0 - speech/noise, 1 - music) */
    1580    16112682 :     if ( !localVAD_HE_SAD || Etot < 10 || ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN && ( relE_attack_flag || flag_odv ) ) )
    1581             :     {
    1582     3557034 :         dlp = 0;
    1583             :     }
    1584             :     else
    1585             :     {
    1586    12555648 :         dlp = lpm - lps + DLP_BIAS;
    1587             : 
    1588    12555648 :         if ( dlp > 30.0f )
    1589             :         {
    1590      896633 :             dlp = 30.0f;
    1591             :         }
    1592    11659015 :         else if ( dlp < -30.0f )
    1593             :         {
    1594           0 :             dlp = -30.0f;
    1595             :         }
    1596             :     }
    1597             : 
    1598    16112682 :     dec = dlp > 0;
    1599             : 
    1600             :     /* calculate weight based on relE (higher relE -> lower weight, lower relE -> higher weight) */
    1601    16112682 :     wrelE = lin_interp( relE, 15.0f, 0.9f, -15.0f, 0.99f, 1 );
    1602             : 
    1603             :     /* calculate weight based on drops of dlp (close to 1 during sudden drops of dlp, close to 0 otherwise) */
    1604    16112682 :     hSpMusClas->dlp_mean_ST = 0.8f * hSpMusClas->dlp_mean_ST + 0.2f * dlp;
    1605    16112682 :     hSpMusClas->lt_dec_thres = hSpMusClas->dlp_mean_ST;
    1606             : 
    1607    16112682 :     if ( dlp < 0 && dlp < hSpMusClas->dlp_mean_ST )
    1608             :     {
    1609     3571657 :         if ( hSpMusClas->dlp_mean_ST > 0 )
    1610             :         {
    1611     1168711 :             hSpMusClas->wdrop = -dlp;
    1612             :         }
    1613     2402946 :         else if ( hSpMusClas->wdrop > 0 )
    1614             :         {
    1615      651477 :             hSpMusClas->wdrop += hSpMusClas->dlp_mean_ST - dlp;
    1616             :         }
    1617             :     }
    1618             :     else
    1619             :     {
    1620    12541025 :         hSpMusClas->wdrop = 0;
    1621             :     }
    1622             : 
    1623    16112682 :     wdrop = lin_interp( hSpMusClas->wdrop, 15.0f, 0.7f, 0.0f, 1.0f, 1 );
    1624             : 
    1625             :     /* calculate weight based on rises of dlp (close to 1 during sudden rise of dlp, close to 0 otherwise) */
    1626    16112682 :     if ( hSpMusClas->sp_mus_state == HANG_LEN && hSpMusClas->dlp_mean_ST > 0 && hSpMusClas->dlp_mean_ST > hSpMusClas->past_dlp_mean_ST[0] )
    1627             :     {
    1628     3729747 :         if ( hSpMusClas->past_dlp_mean_ST[0] < 0 )
    1629             :         {
    1630      226011 :             hSpMusClas->wrise = hSpMusClas->dlp_mean_ST;
    1631             :         }
    1632     3503736 :         else if ( hSpMusClas->wrise > 0 )
    1633             :         {
    1634      525825 :             hSpMusClas->wrise += hSpMusClas->dlp_mean_ST - hSpMusClas->past_dlp_mean_ST[0];
    1635             :         }
    1636             :     }
    1637             :     else
    1638             :     {
    1639    12382935 :         hSpMusClas->wrise = 0;
    1640             :     }
    1641             : 
    1642    16112682 :     wrise = lin_interp( hSpMusClas->wrise, 5.0f, 0.95f, 0.0f, 1.0f, 1 );
    1643             : 
    1644             :     /* combine weights into one */
    1645    16112682 :     wght = wrelE * wdrop * wrise;
    1646             : 
    1647             :     /* ratio of delta means vs. delta variances */
    1648    16112682 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1649             :     {
    1650      873219 :         hSpMusClas->dlp_mean_LT = dlp;
    1651      873219 :         hSpMusClas->dlp_var_LT = 0;
    1652             :     }
    1653             : 
    1654    16112682 :     hSpMusClas->dlp_mean_LT = 0.9f * hSpMusClas->dlp_mean_LT + 0.1f * dlp;
    1655    16112682 :     ftmp = dlp - hSpMusClas->dlp_mean_LT;
    1656    16112682 :     hSpMusClas->dlp_var_LT = 0.9f * hSpMusClas->dlp_var_LT + 0.1f * ( ftmp * ftmp );
    1657             : 
    1658    16112682 :     if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1659             :     {
    1660      873219 :         dlp_mean2var = 0;
    1661             :     }
    1662             :     else
    1663             :     {
    1664    15239463 :         dlp_mean2var = fabsf( hSpMusClas->dlp_mean_LT ) / ( sqrtf( fabsf( hSpMusClas->dlp_var_LT ) ) + 1.0f );
    1665             :     }
    1666             : 
    1667    16112682 :     if ( dlp_mean2var > 15.0f )
    1668             :     {
    1669             :         /* decrease the weight little bit when the classifier indicates "strong speech" or "strong music" */
    1670      209884 :         wght *= 0.9f;
    1671             :     }
    1672             : 
    1673    16112682 :     if ( wght > 1.0f )
    1674             :     {
    1675           0 :         wght = 1.0f;
    1676             :     }
    1677    16112682 :     else if ( wght < 0.01f )
    1678             :     {
    1679           0 :         wght = 0.01f;
    1680             :     }
    1681             : 
    1682    16112682 :     if ( Etot < 10 )
    1683             :     {
    1684             :         /* silence */
    1685     2657007 :         wght = 0.92f;
    1686             :     }
    1687             : 
    1688             :     /* calculate weighted decision */
    1689    16112682 :     hSpMusClas->wdlp_0_95_sp = wght * hSpMusClas->wdlp_0_95_sp + ( 1 - wght ) * dlp;
    1690             : 
    1691             :     /* xtalk classifier: apply long hysteresis to prevent LRTD on music */
    1692    16112682 :     hSpMusClas->wdlp_xtalk = 0.995f * hSpMusClas->wdlp_xtalk + 0.005f * dlp;
    1693             : 
    1694             :     /*------------------------------------------------------------------*
    1695             :      * Final speech/music decision
    1696             :      *------------------------------------------------------------------*/
    1697             : 
    1698    16112682 :     if ( flag_spitch )
    1699             :     {
    1700      945319 :         hSpMusClas->flag_spitch_cnt = 5;
    1701             :     }
    1702    15167363 :     else if ( hSpMusClas->flag_spitch_cnt > 0 )
    1703             :     {
    1704      118082 :         hSpMusClas->flag_spitch_cnt--;
    1705             :     }
    1706             : 
    1707    16112682 :     if ( Etot < 10 )
    1708             :     {
    1709             :         /* silence */
    1710     2657007 :         dec = 0;
    1711             :     }
    1712    13455675 :     else if ( hSpMusClas->sp_mus_state > 0 && hSpMusClas->sp_mus_state < HANG_LEN )
    1713             :     {
    1714             :         /* entry state -> final decision is calculated based on weighted average of past non-binary decisions */
    1715      872068 :         ftmp = w_spmus[hSpMusClas->sp_mus_state - 1][0] * dlp;
    1716      872068 :         ftmp += dotp( &w_spmus[hSpMusClas->sp_mus_state - 1][1], hSpMusClas->past_dlp, HANG_LEN - 1 );
    1717      872068 :         if ( ftmp > 2.0f )
    1718             :         {
    1719      418870 :             if ( dlp > 2.0f )
    1720             :             {
    1721      268204 :                 dec = 2;
    1722             :             }
    1723             :             else
    1724             :             {
    1725      150666 :                 dec = 1;
    1726             :             }
    1727             :         }
    1728             :         else
    1729             :         {
    1730      453198 :             dec = 0;
    1731             :         }
    1732             :     }
    1733             :     else
    1734             :     {
    1735             :         /* stable active state */
    1736    12583607 :         if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->past_dec[1] == 0 && hSpMusClas->past_dec[2] == 0 &&
    1737     5733882 :              ( ( hSpMusClas->flag_spitch_cnt > 0 && hSpMusClas->wdlp_0_95_sp > 3.4f ) || ( hSpMusClas->flag_spitch_cnt == 0 && hSpMusClas->wdlp_0_95_sp > 2.1f ) ) )
    1738             :         {
    1739             :             /* switching from speech to unclear */
    1740       22667 :             dec = 1;
    1741             :         }
    1742    12560940 :         else if ( hSpMusClas->past_dec[0] == 0 && hSpMusClas->vad_0_1_cnt < 50 && hSpMusClas->relE_attack_sum == 0.0f && hSpMusClas->wdlp_0_95_sp > 1.0f )
    1743             :         {
    1744             :             /* switch from speech to unclear also during slowly rising weak music onsets */
    1745       36248 :             dec = 1;
    1746             :         }
    1747    12524692 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp > 2.5f )
    1748             :         {
    1749             :             /* switching from unclear to music */
    1750       48828 :             dec = 2;
    1751             :         }
    1752    12475864 :         else if ( hSpMusClas->past_dec[0] == 2 && hSpMusClas->past_dec[1] == 2 && hSpMusClas->past_dec[2] == 2 && hSpMusClas->wdlp_0_95_sp < -1.0f )
    1753             :         {
    1754             :             /* switching from music to unclear */
    1755       34934 :             dec = 1;
    1756             :         }
    1757    12440930 :         else if ( hSpMusClas->past_dec[0] == 1 && hSpMusClas->wdlp_0_95_sp < -2.5f )
    1758             :         {
    1759             :             /* switching from unclear to speech */
    1760       36259 :             dec = 0;
    1761             :         }
    1762             :         else
    1763             :         {
    1764    12404671 :             dec = hSpMusClas->past_dec[0];
    1765             :         }
    1766             :     }
    1767             : 
    1768             :     /*------------------------------------------------------------------*
    1769             :      * raw S/M decision based on smoothed GMM score
    1770             :      *------------------------------------------------------------------*/
    1771             : 
    1772    16112682 :     if ( dec == 0 || st->hSpMusClas->wdlp_0_95_sp <= 0 )
    1773             :     {
    1774     9730840 :         st->sp_aud_decision0 = 0;
    1775     9730840 :         st->sp_aud_decision1 = 0;
    1776             :     }
    1777             :     else
    1778             :     {
    1779     6381842 :         st->sp_aud_decision0 = 1;
    1780     6381842 :         st->sp_aud_decision1 = 1;
    1781             :     }
    1782             : 
    1783             :     /*------------------------------------------------------------------*
    1784             :      * Updates
    1785             :      *------------------------------------------------------------------*/
    1786             : 
    1787             :     /* update buffer of past non-binary decisions */
    1788    16112682 :     mvr2r( &hSpMusClas->past_dlp[0], &hSpMusClas->past_dlp[1], HANG_LEN - 2 );
    1789    16112682 :     hSpMusClas->past_dlp[0] = dlp;
    1790             : 
    1791    16112682 :     mvr2r( &hSpMusClas->past_dlp_mean_ST[0], &hSpMusClas->past_dlp_mean_ST[1], HANG_LEN - 2 );
    1792    16112682 :     hSpMusClas->past_dlp_mean_ST[0] = hSpMusClas->dlp_mean_ST;
    1793             : 
    1794             :     /* update buffer of past binary decisions */
    1795    16112682 :     mvs2s( &hSpMusClas->past_dec[0], &hSpMusClas->past_dec[1], HANG_LEN - 2 );
    1796    16112682 :     hSpMusClas->past_dec[0] = dec;
    1797             : 
    1798             : #ifdef DEBUG_MODE_INFO
    1799             :     dbgwrite( &st->hSpMusClas->wdlp_0_95_sp, sizeof( float ), 1, 1, "res/wdlp_0_95_sp.x" );
    1800             : #endif
    1801             : 
    1802    16112682 :     return dec;
    1803             : }
    1804             : 
    1805             : /*---------------------------------------------------------------------*
    1806             :  * ivas_smc_mode_selection()
    1807             :  *
    1808             :  * 2nd stage speech/music classifier (select coding mode (ACELP, GSC and TCX) based on S/M classification)
    1809             :  * output (sp_aud_decision1 - sp_aud_decision2 -> coding mode):
    1810             :  * 0 - 0 -> ACELP
    1811             :  * 1 - 0 -> GSC
    1812             :  * 1 - 1 -> TCX
    1813             :  *---------------------------------------------------------------------*/
    1814             : 
    1815     4454425 : void ivas_smc_mode_selection(
    1816             :     Encoder_State *st,           /* i/o: encoder state structure                 */
    1817             :     const int32_t element_brate, /* i  : element bitrate                         */
    1818             :     int16_t smc_dec,             /* i  : raw decision of the 1st stage classifier*/
    1819             :     const float relE,            /* i  : relative frame energy                   */
    1820             :     const float Etot,            /* i  : total frame energy                      */
    1821             :     int16_t *attack_flag,        /* i/o: attack flag (GSC or TC)                 */
    1822             :     const float *inp,            /* i  : input signal                            */
    1823             :     const float S_map[],         /* i  : short-term correlation map              */
    1824             :     const int16_t flag_spitch    /* i  : flag to indicate very short stable pitch*/
    1825             : )
    1826             : {
    1827             :     int16_t attack;
    1828             :     float ton;
    1829             :     int16_t i;
    1830             :     float S_p2a, S_max, S_ave;
    1831             :     float thr_sp2a;
    1832             : 
    1833     4454425 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    1834             : 
    1835             :     /* initialization */
    1836     4454425 :     *attack_flag = 0;
    1837     4454425 :     st->sp_aud_decision2 = 0;
    1838             : 
    1839             :     /* signal stability estimation */
    1840     4454425 :     stab_est( Etot, hSpMusClas->gsc_lt_diff_etot, &hSpMusClas->gsc_mem_etot, &hSpMusClas->gsc_nb_thr_3, &hSpMusClas->gsc_nb_thr_1, hSpMusClas->gsc_thres, &hSpMusClas->gsc_last_music_flag, st->vad_flag );
    1841             : 
    1842             :     /* calculate variance of correlation */
    1843     4454425 :     var_cor_calc( st->old_corr, &hSpMusClas->mold_corr, hSpMusClas->var_cor_t, &hSpMusClas->high_stable_cor );
    1844             : 
    1845             :     /* attack detection */
    1846     4454425 :     attack = attack_det( inp, st->clas, st->localVAD, st->coder_type, 0, st->element_mode, st->clas, hSpMusClas->finc_prev, &hSpMusClas->lt_finc, &hSpMusClas->last_strong_attack );
    1847             : 
    1848             :     /* tonal detector */
    1849     4454425 :     ton = tonal_det( S_map, st->vad_flag, hSpMusClas->tod_S_map_lt, &hSpMusClas->tod_thr_lt, &hSpMusClas->tod_weight, &hSpMusClas->tod_S_mass_prev, &hSpMusClas->tod_S_mass_lt );
    1850             : 
    1851             : 
    1852             :     /* calculate spectral peak-to-average ratio */
    1853   360808425 :     for ( i = 0; i < TOD_NSPEC; i++ )
    1854             :     {
    1855   356354000 :         st->hSpMusClas->tod_lt_Bin_E[i] = P2A_FACT * st->hSpMusClas->tod_lt_Bin_E[i] + ( 1 - P2A_FACT ) * st->Bin_E[i];
    1856             :     }
    1857             : 
    1858     4454425 :     maximum( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC, &S_max );
    1859     4454425 :     S_ave = sum_f( st->hSpMusClas->tod_lt_Bin_E, TOD_NSPEC ) / TOD_NSPEC;
    1860     4454425 :     S_p2a = S_max - S_ave;
    1861             : 
    1862     4454425 :     if ( element_brate <= IVAS_16k4 )
    1863             :     {
    1864     1347658 :         thr_sp2a = THR_P2A_HIGH;
    1865             :     }
    1866             :     else
    1867             :     {
    1868     3106767 :         thr_sp2a = THR_P2A;
    1869             :     }
    1870             : 
    1871             :     /* initial 3-way selection of coding modes (ACELP/GSC/TCX) */
    1872     4454425 :     if ( relE > -10.0f && ( S_p2a > thr_sp2a || ton > hSpMusClas->tod_thr_lt ) )
    1873             :     {
    1874             :         /* select TCX to encode extremely peaky signals or strongly tonal signals */
    1875      229487 :         st->sp_aud_decision1 = 1;
    1876      229487 :         st->sp_aud_decision2 = 1;
    1877             :     }
    1878     4224938 :     else if ( smc_dec == SPEECH )
    1879             :     {
    1880             :         /* select ACELP to encode speech */
    1881     1380915 :         st->sp_aud_decision1 = 0;
    1882     1380915 :         st->sp_aud_decision2 = 0;
    1883             :     }
    1884     2844023 :     else if ( smc_dec == SPEECH_OR_MUSIC )
    1885             :     {
    1886             :         /* select GSC to encode "unclear" segments (classifier's score on the borderline) */
    1887       70569 :         st->sp_aud_decision1 = 1;
    1888       70569 :         st->sp_aud_decision2 = 0;
    1889             :     }
    1890             :     else
    1891             :     {
    1892             :         /* select TCX to encode music */
    1893     2773454 :         st->sp_aud_decision1 = 1;
    1894     2773454 :         st->sp_aud_decision2 = 1;
    1895             :     }
    1896             : 
    1897             :     /* change decision from GSC to ACELP/TCX in some special cases */
    1898     4454425 :     if ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 )
    1899             :     {
    1900       70569 :         if ( hSpMusClas->ener_RAT < 0.18f && hSpMusClas->lt_dec_thres > 15.0f )
    1901             :         {
    1902             :             /* prevent GSC on strong music with almost no content below 1kHz */
    1903         111 :             st->sp_aud_decision2 = 1;
    1904             :         }
    1905       70458 :         else if ( flag_spitch )
    1906             :         {
    1907             :             /* prevent GSC on signals with very short and stable high pitch period */
    1908        2582 :             if ( hSpMusClas->wdlp_0_95_sp < 2.5f )
    1909             :             {
    1910             :                 /* select ACELP instead */
    1911        2451 :                 st->sp_aud_decision1 = 0;
    1912             :             }
    1913             :             else
    1914             :             {
    1915             :                 /* select TCX instead */
    1916         131 :                 st->sp_aud_decision2 = 1;
    1917             :             }
    1918             :         }
    1919       67876 :         else if ( hSpMusClas->high_stable_cor && st->pitch[0] >= 130 )
    1920             :         {
    1921             :             /* prevent GSC in highly correlated signal with low energy variation */
    1922             :             /* this is basically a patch against bassoon-type of music */
    1923           2 :             st->sp_aud_decision2 = 1;
    1924             :         }
    1925             :     }
    1926             : 
    1927             :     /* change decision from GSC to ACELP TC during attacks/onsets */
    1928     4454425 :     if ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 )
    1929             :     {
    1930       67874 :         if ( ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] > 4.5f ) &&
    1931        4601 :              ( hSpMusClas->gsc_lt_diff_etot[MAX_LT - 1] - hSpMusClas->gsc_lt_diff_etot[MAX_LT - 2] > 10.0f ) )
    1932             :         {
    1933        1197 :             if ( st->tc_cnt == 1 )
    1934             :             {
    1935             :                 /* do ACELP TC coding instead of GC/VC if onset has been already declared before */
    1936         333 :                 st->sp_aud_decision1 = 0;
    1937         333 :                 st->coder_type = TRANSITION;
    1938             :             }
    1939             :             else
    1940             :             {
    1941         864 :                 if ( attack >= ATT_3LSUB_POS )
    1942             :                 {
    1943             :                     /* do ACELP TC coding also if attack is located in the last subframe */
    1944         244 :                     st->sp_aud_decision1 = 0;
    1945         244 :                     *attack_flag = attack + 1;
    1946         244 :                     st->coder_type = TRANSITION;
    1947             :                 }
    1948         620 :                 else if ( attack >= ATT_SEG_LEN / 2 )
    1949             :                 {
    1950             :                     /* do GSC coding if attack is located after the first quarter of the first subframe */
    1951             :                     /* (pre-echo will be treated at the decoder side) */
    1952          68 :                     *attack_flag = 31;
    1953          68 :                     *attack_flag = attack + 1;
    1954             :                 }
    1955             :             }
    1956             :         }
    1957             :     }
    1958             : 
    1959     4454425 :     if ( st->localVAD == 1 && st->coder_type == GENERIC && attack > 0 /*&& *attack_flag < 32*/ /*&& st->tc_cnt != 2*/ && !( st->sp_aud_decision2 == 1 && ton > 0.65f ) )
    1960             :     {
    1961             :         /* change ACELP coder_type to TC if attack has been detected */
    1962       75279 :         st->sp_aud_decision1 = 0;
    1963       75279 :         st->sp_aud_decision2 = 0;
    1964             : 
    1965       75279 :         st->coder_type = TRANSITION;
    1966       75279 :         *attack_flag = attack + 1;
    1967             :     }
    1968             : 
    1969             : #ifdef DEBUGGING
    1970             :     if ( st->idchan == 0 && st->coder_type != INACTIVE )
    1971             :     {
    1972             :         if ( st->force == FORCE_GSC && element_brate < IVAS_24k4 )
    1973             :         {
    1974             :             /* enforce GSC */
    1975             :             st->sp_aud_decision1 = 1;
    1976             :             st->sp_aud_decision2 = 0;
    1977             :         }
    1978             :         else if ( st->force == FORCE_SPEECH && ( st->sp_aud_decision1 == 1 || st->sp_aud_decision2 == 1 ) )
    1979             :         {
    1980             :             if ( element_brate < IVAS_24k4 )
    1981             :             {
    1982             :                 /* convert TCX to GSC */
    1983             :                 st->sp_aud_decision1 = 1;
    1984             :                 st->sp_aud_decision2 = 0;
    1985             :             }
    1986             :             else
    1987             :             {
    1988             :                 /* convert TCX to ACELP */
    1989             :                 st->sp_aud_decision1 = 0;
    1990             :                 st->sp_aud_decision2 = 0;
    1991             :             }
    1992             :         }
    1993             :         else if ( st->force == FORCE_MUSIC )
    1994             :         {
    1995             :             /* enforce TCX */
    1996             :             st->sp_aud_decision1 = 1;
    1997             :             st->sp_aud_decision2 = 1;
    1998             :         }
    1999             :     }
    2000             : #endif
    2001             : 
    2002             :     /* set GSC noisy speech flag on unvoiced SWB segments */
    2003     4454425 :     st->GSC_noisy_speech = 0;
    2004     4454425 :     if ( st->vad_flag == 1 && element_brate <= IVAS_16k4 && st->lp_noise > 30.0f && st->sp_aud_decision1 == 0 && st->bwidth >= SWB && st->coder_type_raw == UNVOICED )
    2005             :     {
    2006        3245 :         st->GSC_noisy_speech = 1;
    2007             :     }
    2008             : 
    2009             :     /* set GSC submode */
    2010     4454425 :     if ( st->element_mode > EVS_MONO && ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 ) && st->total_brate > STEREO_GSC_BIT_RATE_ALLOC ) /* below STEREO_GSC_BIT_RATE_ALLOC, fall back on normal GSC */
    2011             :     {
    2012       62048 :         st->GSC_IVAS_mode = 1;
    2013       62048 :         if ( st->hSpMusClas->wdlp_0_95_sp > 0.0f )
    2014             :         {
    2015             :             /* music-like content */
    2016       40740 :             st->GSC_IVAS_mode = 3;
    2017             :         }
    2018       21308 :         else if ( st->tc_cnt > 0 )
    2019             :         {
    2020             :             /* likely presence of an onset, GSC bit allocation will be more focused on LF */
    2021        2062 :             st->GSC_IVAS_mode = 2;
    2022             :         }
    2023             : 
    2024       62048 :         if ( st->coder_type_raw == UNVOICED && st->sp_aud_decision0 == 0 /*&& st->GSC_IVAS_mode < 3*/ )
    2025             :         {
    2026        4186 :             st->GSC_noisy_speech = 1;
    2027             :         }
    2028             :         else
    2029             :         {
    2030       57862 :             st->GSC_noisy_speech = 0;
    2031             :         }
    2032             :     }
    2033             : 
    2034             :     /* set coder_type to AUDIO when GSC is selected (st->core will be set later in the decision matrix) */
    2035     4454425 :     if ( ( st->sp_aud_decision1 == 1 && st->sp_aud_decision2 == 0 ) || st->GSC_noisy_speech )
    2036             :     {
    2037       69784 :         st->coder_type = AUDIO;
    2038       69784 :         if ( st->hGSCEnc != NULL && st->GSC_noisy_speech == 0 ) /* In case of GSC_noisy_speech, NOISE_LEVEL should remain at NOISE_LEVEL_SP3 */
    2039             :         {
    2040       62353 :             st->hGSCEnc->noise_lev = NOISE_LEVEL_SP0;
    2041             :         }
    2042             :     }
    2043             : 
    2044     4454425 :     return;
    2045             : }
    2046             : 
    2047             : 
    2048             : /*------------------------------------------------------------------------*
    2049             :  * music_mixed_classif_improv()
    2050             :  *
    2051             :  * Improve 1st stage speech/music decision for mixed&music signals
    2052             :  *------------------------------------------------------------------------*/
    2053             : 
    2054       60545 : static void music_mixed_classif_improv(
    2055             :     Encoder_State *st,      /* i/o: Encoder state structure                         */
    2056             :     const float *new_inp,   /* i  : new input signal                                */
    2057             :     const float *epsP,      /* i  : LP prediction error                             */
    2058             :     const float etot,       /* i  : total frame energy                              */
    2059             :     const float old_cor,    /* i  : normalized correlation                          */
    2060             :     const float cor_map_sum /* i  : correlation map sum                             */
    2061             : )
    2062             : {
    2063             :     int16_t i, dec, len, percus_flag;
    2064             :     float p2v_map[128], ftmp, ftmp1, lt_diff, log_max_spl, epsP_tilt, max_spl;
    2065             : 
    2066       60545 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2067             : 
    2068             :     /* find sample with maximum absolute amplitude */
    2069       60545 :     max_spl = 0;
    2070    15560065 :     for ( i = 0; i < L_FRAME; i++ )
    2071             :     {
    2072    15499520 :         if ( fabs( new_inp[i] ) > max_spl )
    2073             :         {
    2074      590384 :             max_spl = fabsf( new_inp[i] );
    2075             :         }
    2076             :     }
    2077             : 
    2078             :     /* music is considered only appearing in high SNR condition and active signal */
    2079       60545 :     if ( st->vad_flag == 0 || st->lp_speech - st->lp_noise < 25 )
    2080             :     {
    2081       15324 :         hSpMusClas->dec_mov = 0.5f;
    2082       15324 :         hSpMusClas->dec_mov1 = 0.5f;
    2083             : 
    2084       15324 :         if ( st->vad_flag == 0 )
    2085             :         {
    2086        7361 :             hSpMusClas->onset_cnt = 0;
    2087             :         }
    2088             : 
    2089       15324 :         return;
    2090             :     }
    2091             : 
    2092       45221 :     hSpMusClas->onset_cnt++;
    2093             : 
    2094       45221 :     if ( hSpMusClas->onset_cnt > 9 )
    2095             :     {
    2096       43048 :         hSpMusClas->onset_cnt = 9;
    2097             :     }
    2098             : 
    2099       45221 :     if ( hSpMusClas->onset_cnt == 1 )
    2100             :     {
    2101         246 :         set_f( hSpMusClas->buf_flux, -100, BUF_LEN );
    2102             :     }
    2103             : 
    2104             :     /* spectral analysis */
    2105       45221 :     spec_analysis( st->Bin_E, p2v_map );
    2106             : 
    2107             :     /* percussive music detection */
    2108       45221 :     log_max_spl = 20 * logf( max_spl + 0.0001f );
    2109       45221 :     lt_diff = log_max_spl - hSpMusClas->mov_log_max_spl;
    2110             : 
    2111      180884 :     for ( i = 0; i < 3; i++ )
    2112             :     {
    2113      135663 :         hSpMusClas->buf_etot[i] = hSpMusClas->buf_etot[i + 1];
    2114             :     }
    2115       45221 :     hSpMusClas->buf_etot[i] = etot;
    2116             : 
    2117       45221 :     percus_flag = 0;
    2118       45221 :     if ( hSpMusClas->buf_etot[1] - hSpMusClas->buf_etot[0] > 6 && hSpMusClas->buf_etot[2] < hSpMusClas->buf_etot[1] && hSpMusClas->buf_etot[1] - st->lp_speech > 3 )
    2119             :     {
    2120         257 :         if ( hSpMusClas->buf_etot[1] - hSpMusClas->buf_etot[3] > 3 && hSpMusClas->buf_etot[3] < hSpMusClas->buf_etot[2] && 0.5f * ( 0.5f * ( st->voicing[0] + st->voicing[1] ) + old_cor ) < 0.75f )
    2121             :         {
    2122          44 :             if ( hSpMusClas->dec_mov > 0.8f )
    2123             :             {
    2124           2 :                 percus_flag = 1;
    2125             :             }
    2126          42 :             else if ( old_cor < 0.75f && st->voicing[0] < 0.75f && st->voicing[1] < 0.75f && hSpMusClas->old_lt_diff[0] > 10 )
    2127             :             {
    2128           0 :                 percus_flag = 1;
    2129             :             }
    2130             :         }
    2131             :     }
    2132             : 
    2133             :     /* sound attack detection */
    2134       45221 :     if ( hSpMusClas->buf_etot[3] - hSpMusClas->buf_etot[2] > 6 && hSpMusClas->dec_mov > 0.9f && etot - st->lp_speech > 5 && hSpMusClas->old_lt_diff[0] > 5 )
    2135             :     {
    2136           0 :         hSpMusClas->attack_hangover = 3;
    2137             :     }
    2138             : 
    2139       45221 :     if ( st->voicing[0] > 0.9f && st->voicing[1] > 0.9f )
    2140             :     {
    2141       11967 :         if ( log_max_spl > hSpMusClas->mov_log_max_spl )
    2142             :         {
    2143         419 :             hSpMusClas->mov_log_max_spl = 0.75f * hSpMusClas->mov_log_max_spl + ( 1 - 0.75f ) * log_max_spl;
    2144             :         }
    2145             :         else
    2146             :         {
    2147       11548 :             hSpMusClas->mov_log_max_spl = 0.995f * hSpMusClas->mov_log_max_spl + ( 1 - 0.995f ) * log_max_spl;
    2148             :         }
    2149             :     }
    2150             : 
    2151       45221 :     hSpMusClas->old_lt_diff[0] = hSpMusClas->old_lt_diff[1];
    2152       45221 :     hSpMusClas->old_lt_diff[1] = lt_diff;
    2153             : 
    2154             :     /* calculate and buffer spectral energy fluctuation */
    2155       45221 :     flux( st->Bin_E, p2v_map, hSpMusClas->old_Bin_E, hSpMusClas->buf_flux, hSpMusClas->attack_hangover, hSpMusClas->dec_mov );
    2156             : 
    2157       45221 :     hSpMusClas->attack_hangover--;
    2158       45221 :     if ( hSpMusClas->attack_hangover < 0 )
    2159             :     {
    2160       45221 :         hSpMusClas->attack_hangover = 0;
    2161             :     }
    2162             : 
    2163             :     /* identify flux buffer status */
    2164       45221 :     len = 0;
    2165     2385282 :     for ( i = BUF_LEN - 1; i >= 0 && hSpMusClas->buf_flux[i] >= 0; i-- )
    2166             :     {
    2167     2340061 :         len++;
    2168             :     }
    2169             : 
    2170             :     /* reset flux buffer if percussive music is detected */
    2171       45221 :     if ( percus_flag == 1 )
    2172             :     {
    2173           2 :         set_f( &hSpMusClas->buf_flux[BUF_LEN - len], 5, len );
    2174             :     }
    2175             : 
    2176             :     /* calculate and buffer the tilt of residual LP analysis energies */
    2177       45221 :     ftmp = 0.00001f;
    2178       45221 :     ftmp1 = 0;
    2179      723536 :     for ( i = 1; i < 16; i++ )
    2180             :     {
    2181      678315 :         ftmp += epsP[i] * epsP[i];
    2182      678315 :         ftmp1 += epsP[i] * epsP[i + 1];
    2183             :     }
    2184             : 
    2185       45221 :     epsP_tilt = ftmp1 / ftmp;
    2186             : 
    2187     2713260 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2188             :     {
    2189     2668039 :         hSpMusClas->buf_epsP_tilt[i] = hSpMusClas->buf_epsP_tilt[i + 1];
    2190             :     }
    2191       45221 :     hSpMusClas->buf_epsP_tilt[i] = epsP_tilt;
    2192             : 
    2193             :     /* calculate and buffer highband spectral peakness */
    2194       45221 :     tonal_dist( p2v_map, hSpMusClas->buf_pkh, hSpMusClas->buf_Ntonal, hSpMusClas->buf_Ntonal2, hSpMusClas->buf_Ntonal_lf );
    2195             : 
    2196             :     /* buffer sum of correlation map */
    2197     2713260 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2198             :     {
    2199     2668039 :         hSpMusClas->buf_cor_map_sum[i] = hSpMusClas->buf_cor_map_sum[i + 1];
    2200             :     }
    2201       45221 :     hSpMusClas->buf_cor_map_sum[i] = cor_map_sum;
    2202             : 
    2203             :     /* buffer voicing metric */
    2204      452210 :     for ( i = 0; i < 9; i++ )
    2205             :     {
    2206      406989 :         hSpMusClas->buf_dlp[i] = hSpMusClas->buf_dlp[i + 1];
    2207             :     }
    2208       45221 :     hSpMusClas->buf_dlp[i] = hSpMusClas->lps - hSpMusClas->lpm;
    2209             : 
    2210             :     /* classification */
    2211       45221 :     dec = mode_decision( st, len, &hSpMusClas->dec_mov, hSpMusClas->buf_flux, hSpMusClas->buf_epsP_tilt, hSpMusClas->buf_pkh, hSpMusClas->buf_cor_map_sum, hSpMusClas->buf_Ntonal, hSpMusClas->buf_Ntonal2, hSpMusClas->buf_Ntonal_lf, hSpMusClas->buf_dlp );
    2212             : 
    2213             :     /* update long term moving average of the classification decisions */
    2214       45221 :     if ( len > 30 )
    2215             :     {
    2216       38592 :         hSpMusClas->dec_mov = 0.97f * hSpMusClas->dec_mov + ( 1 - 0.97f ) * dec;
    2217       38592 :         hSpMusClas->dec_mov1 = 0.97f * hSpMusClas->dec_mov1 + ( 1 - 0.97f ) * dec;
    2218             :     }
    2219             : 
    2220             :     /* update long-term unvoiced counter */
    2221       45221 :     if ( ( st->coder_type_raw == UNVOICED || st->coder_type_raw == INACTIVE ) && etot > 1.5f && hSpMusClas->buf_Ntonal2[59] < 2 )
    2222             :     {
    2223        1889 :         hSpMusClas->UV_cnt1 -= 8;
    2224             :     }
    2225             :     else
    2226             :     {
    2227       43332 :         hSpMusClas->UV_cnt1++;
    2228             :     }
    2229             : 
    2230       45221 :     if ( hSpMusClas->UV_cnt1 > 300 )
    2231             :     {
    2232       28269 :         hSpMusClas->UV_cnt1 = 300;
    2233             :     }
    2234       16952 :     else if ( hSpMusClas->UV_cnt1 < 0 )
    2235             :     {
    2236           4 :         hSpMusClas->UV_cnt1 = 0;
    2237             :     }
    2238             : 
    2239       45221 :     hSpMusClas->LT_UV_cnt1 = 0.9f * hSpMusClas->LT_UV_cnt1 + 0.1f * hSpMusClas->UV_cnt1;
    2240             : 
    2241             :     /* revert classification decision due to long-term unvoiced counter */
    2242       45221 :     if ( dec == 1 && hSpMusClas->dec_mov1 < 0.2f && hSpMusClas->LT_UV_cnt1 < 200 )
    2243             :     {
    2244          40 :         dec = 0;
    2245             :     }
    2246             : 
    2247             :     /* overwrite 1st stage speech/music decision to music */
    2248       45221 :     if ( dec == 1 )
    2249             :     {
    2250       11239 :         st->sp_aud_decision1 = 1;
    2251             :     }
    2252             : 
    2253       45221 :     return;
    2254             : }
    2255             : 
    2256             : 
    2257             : /*---------------------------------------------------------------------*
    2258             :  * spec_analysis()
    2259             :  *
    2260             :  * Spectral analysis for mixed/music classification improvement
    2261             :  *---------------------------------------------------------------------*/
    2262             : 
    2263       45221 : static void spec_analysis(
    2264             :     float *Bin_E,  /* i  : log energy spectrum of the current frame        */
    2265             :     float *p2v_map /* o  : spectral peakiness map                          */
    2266             : )
    2267             : {
    2268             :     int16_t i, k, m;
    2269             :     float peak[L_FFT / 4 + 1];
    2270             :     float valley[L_FFT / 4 + 1];
    2271             :     int16_t peak_idx[L_FFT / 4 + 1];
    2272             :     int16_t valey_idx[L_FFT / 4 + 1];
    2273             :     float p2v[L_FFT / 4 + 1];
    2274             : 
    2275             :     /* find spectral peaks */
    2276       45221 :     k = 0;
    2277     5697846 :     for ( i = 1; i < L_FFT / 2 - 2; i++ )
    2278             :     {
    2279     5652625 :         if ( Bin_E[i] > Bin_E[i - 1] && Bin_E[i] > Bin_E[i + 1] )
    2280             :         {
    2281     1508898 :             peak[k] = Bin_E[i];
    2282     1508898 :             peak_idx[k] = i;
    2283     1508898 :             k++;
    2284             :         }
    2285             :     }
    2286       45221 :     assert( k + 1 < L_FFT / 4 + 1 );
    2287       45221 :     peak_idx[k] = -1;
    2288       45221 :     peak_idx[k + 1] = -1;
    2289             : 
    2290       45221 :     if ( k == 0 )
    2291             :     {
    2292         768 :         for ( i = 0; i < L_FFT / 2 - 1; i++ )
    2293             :         {
    2294         762 :             p2v_map[i] = 0;
    2295             :         }
    2296             : 
    2297           6 :         return;
    2298             :     }
    2299             : 
    2300             :     /* find spectral valleys */
    2301       45215 :     m = 0;
    2302       45215 :     if ( Bin_E[0] < Bin_E[1] )
    2303             :     {
    2304       21150 :         valley[0] = Bin_E[0];
    2305       21150 :         valey_idx[0] = 0;
    2306       21150 :         m++;
    2307             :     }
    2308             : 
    2309       45215 :     k = L_FFT / 2 - 2;
    2310       76940 :     for ( i = L_FFT / 2 - 3; i >= 0 && Bin_E[i + 1] > Bin_E[i]; i-- )
    2311             :     {
    2312       31725 :         k = i;
    2313             :     }
    2314             : 
    2315     5665365 :     for ( i = 1; i < k; i++ )
    2316             :     {
    2317     5620150 :         if ( Bin_E[i] < Bin_E[i - 1] && Bin_E[i] < Bin_E[i + 1] )
    2318             :         {
    2319     1487748 :             valley[m] = Bin_E[i];
    2320     1487748 :             valey_idx[m] = i;
    2321     1487748 :             m++;
    2322             :         }
    2323             :     }
    2324             : 
    2325       45215 :     valley[m] = Bin_E[k];
    2326       45215 :     valey_idx[m] = k;
    2327             : 
    2328             :     /* find spectral peak to valley distances */
    2329       45215 :     k = 0;
    2330     1554113 :     for ( i = 0; i < m; i++ )
    2331             :     {
    2332     1508898 :         if ( peak_idx[k] > valey_idx[i] && peak_idx[k] < valey_idx[i + 1] )
    2333             :         {
    2334     1508898 :             p2v[k] = 2 * peak[k] - valley[i] - valley[i + 1];
    2335     1508898 :             k++;
    2336             :         }
    2337             :     }
    2338             : 
    2339     5787520 :     for ( i = 0; i < L_FFT / 2 - 1; i++ )
    2340             :     {
    2341     5742305 :         p2v_map[i] = 0;
    2342             :     }
    2343             : 
    2344     1554113 :     for ( i = 0; i < k; i++ )
    2345             :     {
    2346     1508898 :         p2v_map[peak_idx[i]] = p2v[i];
    2347             :     }
    2348             : 
    2349       45215 :     return;
    2350             : }
    2351             : 
    2352             : /*---------------------------------------------------------------------*
    2353             :  * flux()
    2354             :  *
    2355             :  * Calculation of spectral flux
    2356             :  *---------------------------------------------------------------------*/
    2357             : 
    2358       45221 : static void flux(
    2359             :     float *Bin_E,            /* i  : log energy spectrum of the current frame        */
    2360             :     float *p2v_map,          /* i  : spectral peakiness map                          */
    2361             :     float *old_Bin_E,        /* i/o: log energy spectrum of the frame 60ms ago       */
    2362             :     float *buf_flux,         /* i/o: buffer storing spectral energy fluctuation      */
    2363             :     int16_t attack_hangover, /* i/o: hangover preventing flux buffering              */
    2364             :     float dec_mov            /* i/o: moving average of classifier decision           */
    2365             : )
    2366             : {
    2367             :     int16_t i;
    2368             :     float *pt1, *pt2, *pt3, *pt4, *pt5, *pt6;
    2369             :     float flux;
    2370             :     int16_t cnt;
    2371             : 
    2372             :     /* calculate flux */
    2373       45221 :     flux = 0;
    2374       45221 :     cnt = 0;
    2375     1944503 :     for ( i = 0; i < N_OLD_BIN_E; i++ )
    2376             :     {
    2377     1899282 :         if ( p2v_map[i] != 0 )
    2378             :         {
    2379      463558 :             flux += fabsf( Bin_E[i] - old_Bin_E[i] );
    2380      463558 :             cnt++;
    2381             :         }
    2382             :     }
    2383             : 
    2384       45221 :     if ( cnt == 0 )
    2385             :     {
    2386           6 :         flux = 5;
    2387             :     }
    2388             :     else
    2389             :     {
    2390       45215 :         flux = flux / (float) cnt;
    2391             :     }
    2392             : 
    2393       45221 :     if ( flux > 20 && dec_mov > 0.8f )
    2394             :     {
    2395         988 :         flux = 20;
    2396             :     }
    2397             : 
    2398             :     /* update old Bin_E buffer */
    2399       45221 :     pt1 = old_Bin_E;
    2400       45221 :     pt2 = old_Bin_E + N_OLD_BIN_E;
    2401       45221 :     pt3 = Bin_E;
    2402       45221 :     pt4 = old_Bin_E + N_OLD_BIN_E;
    2403       45221 :     pt5 = old_Bin_E + 2 * N_OLD_BIN_E;
    2404       45221 :     pt6 = old_Bin_E + 2 * N_OLD_BIN_E;
    2405             : 
    2406     1944503 :     for ( i = 0; i < N_OLD_BIN_E; i++ )
    2407             :     {
    2408     1899282 :         *pt1++ = *pt2++;
    2409     1899282 :         *pt4++ = *pt5++;
    2410     1899282 :         *pt6++ = *pt3++;
    2411             :     }
    2412             : 
    2413             :     /* update flux buffer */
    2414       45221 :     if ( attack_hangover <= 0 )
    2415             :     {
    2416     2713260 :         for ( i = 0; i < BUF_LEN - 1; i++ )
    2417             :         {
    2418     2668039 :             buf_flux[i] = buf_flux[i + 1];
    2419             :         }
    2420             : 
    2421       45221 :         buf_flux[i] = flux;
    2422             :     }
    2423             : 
    2424       45221 :     return;
    2425             : }
    2426             : 
    2427             : 
    2428             : /*---------------------------------------------------------------------*
    2429             :  * tonal_dist()
    2430             :  *
    2431             :  * Calculation of spectral distance
    2432             :  *---------------------------------------------------------------------*/
    2433             : 
    2434       45221 : static void tonal_dist(
    2435             :     float *p2v_map,      /* i  : spectral peakiness map                          */
    2436             :     float *buf_pkh,      /* i/o: buffer storing highband spectral peakiness      */
    2437             :     float *buf_Ntonal,   /* i/o: buffer storing No.of 1st spectral tone          */
    2438             :     float *buf_Ntonal2,  /* i/o: buffer storing No.of 2nd spectral tone          */
    2439             :     float *buf_Ntonal_lf /* i/o: buffer storing low band spectral tone ratio     */
    2440             : )
    2441             : {
    2442             :     int16_t i;
    2443             :     float pk;
    2444             :     int16_t Ntonal;
    2445             :     int16_t Ntonal2;
    2446             :     int16_t Ntonal_lf;
    2447             : 
    2448             :     /* find number of tonals, number of tonals at low-band,
    2449             :     spectral peakiness at high-band */
    2450       45221 :     pk = 0;
    2451       45221 :     Ntonal = 0;
    2452       45221 :     Ntonal2 = 0;
    2453       45221 :     Ntonal_lf = 0;
    2454     2939365 :     for ( i = 0; i < 64; i++ )
    2455             :     {
    2456     2894144 :         if ( p2v_map[i] > 55 )
    2457             :         {
    2458      218371 :             Ntonal++;
    2459             :         }
    2460             : 
    2461     2894144 :         if ( p2v_map[i] > 80 )
    2462             :         {
    2463      123506 :             Ntonal2++;
    2464      123506 :             Ntonal_lf++;
    2465             :         }
    2466             :     }
    2467             : 
    2468     2894144 :     for ( i = 64; i < 127; i++ )
    2469             :     {
    2470     2848923 :         if ( p2v_map[i] != 0 )
    2471             :         {
    2472      785835 :             pk += p2v_map[i];
    2473             :         }
    2474             : 
    2475     2848923 :         if ( p2v_map[i] > 55 )
    2476             :         {
    2477      101641 :             Ntonal++;
    2478             :         }
    2479             : 
    2480     2848923 :         if ( p2v_map[i] > 80 )
    2481             :         {
    2482       39210 :             Ntonal2++;
    2483             :         }
    2484             :     }
    2485             : 
    2486             :     /* update buffers */
    2487     2713260 :     for ( i = 0; i < BUF_LEN - 1; i++ )
    2488             :     {
    2489     2668039 :         buf_pkh[i] = buf_pkh[i + 1];
    2490     2668039 :         buf_Ntonal[i] = buf_Ntonal[i + 1];
    2491     2668039 :         buf_Ntonal2[i] = buf_Ntonal2[i + 1];
    2492     2668039 :         buf_Ntonal_lf[i] = buf_Ntonal_lf[i + 1];
    2493             :     }
    2494             : 
    2495       45221 :     buf_pkh[i] = pk;
    2496       45221 :     buf_Ntonal[i] = (float) Ntonal;
    2497       45221 :     buf_Ntonal2[i] = (float) Ntonal2;
    2498       45221 :     buf_Ntonal_lf[i] = (float) Ntonal_lf;
    2499             : 
    2500       45221 :     return;
    2501             : }
    2502             : 
    2503             : 
    2504             : /*---------------------------------------------------------------------*
    2505             :  * mode_decision()
    2506             :  *
    2507             :  * Decision about internal mode of the mixed/music classifier improvement
    2508             :  *---------------------------------------------------------------------*/
    2509             : 
    2510       45221 : static int16_t mode_decision(
    2511             :     Encoder_State *st,
    2512             :     int16_t len,            /* i  : buffering status                                */
    2513             :     float *dec_mov,         /* i/o: moving average of classifier decision           */
    2514             :     float *buf_flux,        /* i  : buffer storing spectral energy fluctuation      */
    2515             :     float *buf_epsP_tilt,   /* i  : buffer storing LP prediciton error tilt         */
    2516             :     float *buf_pkh,         /* i  : buffer storing highband spectral peakiness      */
    2517             :     float *buf_cor_map_sum, /* i  : buffer storing correlation map sum              */
    2518             :     float *buf_Ntonal,      /* i  : buffer storing No.of 1st spectral tone          */
    2519             :     float *buf_Ntonal2,     /* i  : buffer storing No.of 2nd spectral tone          */
    2520             :     float *buf_Ntonal_lf,   /* i  : buffer storing low band spectral tone ratio     */
    2521             :     float *buf_dlp          /* i  : buffer storing voicing estimate                 */
    2522             : )
    2523             : {
    2524             :     int16_t mode;
    2525             :     int16_t i;
    2526             :     int16_t voiced_cnt;
    2527             :     float M_pkh;
    2528             :     float M_cor_map_sum;
    2529             :     float M_Ntonal;
    2530             :     float M_flux;
    2531             :     float V_epsP_tilt;
    2532             :     float lf_Ntonal_ratio;
    2533             : 
    2534       45221 :     mode = *dec_mov > 0.5f;
    2535             : 
    2536       45221 :     if ( len <= 5 )
    2537             :     {
    2538        1214 :         return ( mode );
    2539             :     }
    2540       44007 :     else if ( len < 10 )
    2541             :     {
    2542         959 :         M_pkh = mean( buf_pkh + BUF_LEN - len, len );
    2543         959 :         M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - len, len );
    2544         959 :         M_Ntonal = mean( buf_Ntonal + BUF_LEN - len, len );
    2545         959 :         V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - len, len );
    2546             : 
    2547         959 :         voiced_cnt = 0;
    2548        6713 :         for ( i = 9; i > 3; i-- )
    2549             :         {
    2550        5754 :             if ( buf_dlp[i] > 0.0f )
    2551             :             {
    2552        2962 :                 voiced_cnt++;
    2553             :             }
    2554             :         }
    2555             : 
    2556         959 :         if ( ( M_pkh > 1100 || V_epsP_tilt < 0.00008f || M_cor_map_sum > 100 ) && voiced_cnt < 4 )
    2557             :         {
    2558          69 :             mode = 1;
    2559             :         }
    2560         890 :         else if ( M_Ntonal > 27 && voiced_cnt < 4 )
    2561             :         {
    2562           0 :             mode = 1;
    2563             :         }
    2564             :     }
    2565             :     else
    2566             :     {
    2567       43048 :         voiced_cnt = 0;
    2568      473528 :         for ( i = 0; i < 10; i++ )
    2569             :         {
    2570      430480 :             if ( buf_dlp[i] > 0.0f )
    2571             :             {
    2572      202301 :                 voiced_cnt++;
    2573             :             }
    2574             :         }
    2575             : 
    2576       43048 :         M_flux = mean( &buf_flux[BUF_LEN - 10], 10 );
    2577       43048 :         M_pkh = mean( buf_pkh + BUF_LEN - 10, 10 );
    2578       43048 :         M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - 10, 10 );
    2579       43048 :         V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - 10, 10 );
    2580             : 
    2581       43048 :         if ( ( M_flux < 8.5f || ( V_epsP_tilt < 0.001f && M_flux < 12.0f ) || M_pkh > 1050 || M_cor_map_sum > 100 ) && voiced_cnt < 3 && mean( &buf_flux[55], 5 ) < 15 )
    2582             :         {
    2583        7163 :             mode = 1;
    2584        7163 :             *dec_mov = 1;
    2585        7163 :             return ( mode );
    2586             :         }
    2587             : 
    2588       35885 :         if ( M_flux > 16.0f || ( M_flux > 15 && voiced_cnt > 2 ) || mean( &buf_flux[55], 5 ) > 19.0f || ( buf_flux[59] >= 20 && st->hSpMusClas->lps - st->hSpMusClas->lpm > 0 ) )
    2589             :         {
    2590       29867 :             *dec_mov = 0;
    2591       29867 :             mode = 0;
    2592       29867 :             return ( mode );
    2593             :         }
    2594             : 
    2595      134147 :         for ( i = 10; i < len; i++ )
    2596             :         {
    2597      131383 :             M_flux = mean( &buf_flux[BUF_LEN - i], i );
    2598      131383 :             M_pkh = mean( buf_pkh + BUF_LEN - i, i );
    2599      131383 :             M_cor_map_sum = mean( buf_cor_map_sum + BUF_LEN - i, i );
    2600      131383 :             V_epsP_tilt = var( buf_epsP_tilt + BUF_LEN - i, i );
    2601             : 
    2602      131383 :             if ( ( ( M_flux < 12 + 0.05f * ( len - 10 ) && mean( &buf_flux[BUF_LEN - 10], 10 ) < 15 ) || V_epsP_tilt < 0.0001f + 0.000018f * ( len - 10 ) || M_pkh > 1050 - 5.0f * ( len - 10 ) || M_cor_map_sum > 95 - 0.3f * ( len - 10 ) ) && voiced_cnt < 3 )
    2603             :             {
    2604        3254 :                 mode = 1;
    2605        3254 :                 return ( mode );
    2606             :             }
    2607             :         }
    2608             : 
    2609        2764 :         if ( len == BUF_LEN )
    2610             :         {
    2611        2418 :             M_Ntonal = mean( buf_Ntonal, BUF_LEN );
    2612        2418 :             lf_Ntonal_ratio = sum_f( buf_Ntonal_lf, BUF_LEN ) / ( sum_f( buf_Ntonal2, BUF_LEN ) + 0.0001f );
    2613             : 
    2614        2418 :             if ( M_Ntonal > 18 || lf_Ntonal_ratio < 0.2f )
    2615             :             {
    2616          20 :                 mode = 1;
    2617             :             }
    2618        2398 :             else if ( M_Ntonal < 1 )
    2619             :             {
    2620           0 :                 mode = 0;
    2621             :             }
    2622             :         }
    2623             :     }
    2624             : 
    2625        3723 :     return ( mode );
    2626             : }
    2627             : 
    2628             : 
    2629             : /*----------------------------------------------------------------------------------*
    2630             :  * tonal_context_improv()
    2631             :  *
    2632             :  * Context-based improvement of 1st/2nd stage speech/music decision on stable tonal signals
    2633             :  *----------------------------------------------------------------------------------*/
    2634             : 
    2635       60545 : static void tonal_context_improv(
    2636             :     Encoder_State *st,          /* i/o: encoder state structure                       */
    2637             :     const float PS[],           /* i  : energy spectrum                               */
    2638             :     const float voi_fv,         /* i  : scaled voicing feature                        */
    2639             :     const float cor_map_sum_fv, /* i  : scaled correlation map feature                */
    2640             :     const float LPCErr          /* i  : scaled LP prediction error feature            */
    2641             : )
    2642             : {
    2643             :     int16_t lt_pitch_diff;
    2644             :     float sort_max, sort_avg, sort_val[80];
    2645             :     float tonality, tonality1, tonality2, tonality3, t2, t3, tL, err, cor, dft;
    2646             : 
    2647       60545 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2648             : 
    2649             :     /* reset in case of codec mode switching */
    2650       60545 :     if ( st->last_codec_mode == MODE2 )
    2651             :     {
    2652       12782 :         set_f( hSpMusClas->tonality2_buf, 0, HANG_LEN_INIT );
    2653       12782 :         set_f( hSpMusClas->tonality3_buf, 0, HANG_LEN_INIT );
    2654       12782 :         set_f( hSpMusClas->LPCErr_buf, 0, HANG_LEN_INIT );
    2655       12782 :         hSpMusClas->lt_music_hangover = 0;
    2656       12782 :         hSpMusClas->lt_music_state = 0;
    2657       12782 :         hSpMusClas->lt_speech_state = 0;
    2658       12782 :         hSpMusClas->lt_speech_hangover = 0;
    2659             :     }
    2660             : 
    2661             :     /* estimate maximum tonality in bands [0-1 kHz], [1-2kHz] and [2-4kHz] */
    2662       60545 :     mvr2r( PS, sort_val, 80 );
    2663             : 
    2664             :     /* tonality in band 0-1 kHz */
    2665       60545 :     v_sort( sort_val, 0, 19 );
    2666       60545 :     sort_max = sort_val[19];
    2667       60545 :     sort_avg = sum_f( &sort_val[0], 10 );
    2668       60545 :     tonality1 = sort_max / sort_avg;
    2669             : 
    2670             :     /* tonality in band 1-2 kHz */
    2671       60545 :     v_sort( sort_val, 20, 39 );
    2672       60545 :     sort_max = sort_val[39];
    2673       60545 :     sort_avg = sum_f( &sort_val[20], 10 );
    2674       60545 :     tonality2 = sort_max / sort_avg;
    2675             : 
    2676             :     /* tonality in band 2-4 kHz */
    2677       60545 :     v_sort( sort_val, 40, 79 );
    2678       60545 :     sort_max = sort_val[79];
    2679       60545 :     sort_avg = sum_f( &sort_val[40], 20 );
    2680       60545 :     tonality3 = sort_max / sort_avg;
    2681             : 
    2682       60545 :     tonality = max( max( tonality1, tonality2 ), tonality3 );
    2683             : 
    2684       60545 :     if ( st->hVAD->hangover_cnt == 10 && st->vad_flag == 1 )
    2685             :     {
    2686             :         /* long-term voicing parameter */
    2687         667 :         hSpMusClas->lt_voicing = 0.1f * hSpMusClas->lt_voicing + 0.9f * *st->voicing;
    2688             : 
    2689             :         /* long-term correlation value */
    2690         667 :         hSpMusClas->lt_corr = 0.1f * hSpMusClas->lt_corr + 0.9f * st->old_corr;
    2691             : 
    2692             :         /* long-term tonality measure */
    2693         667 :         hSpMusClas->lt_tonality = 0.1f * hSpMusClas->lt_tonality + 0.9f * tonality;
    2694             :     }
    2695             :     else
    2696             :     {
    2697             :         /* long-term voicing parameter */
    2698       59878 :         hSpMusClas->lt_voicing = 0.7f * hSpMusClas->lt_voicing + 0.3f * *st->voicing;
    2699             : 
    2700             :         /* long-term correlation value */
    2701       59878 :         hSpMusClas->lt_corr = 0.7f * hSpMusClas->lt_corr + 0.3f * st->old_corr;
    2702             : 
    2703             :         /* long-term tonality measure */
    2704       59878 :         hSpMusClas->lt_tonality = 0.5f * hSpMusClas->lt_tonality + 0.5f * tonality;
    2705             :     }
    2706             : 
    2707             :     /* pitch difference w.r.t to past 3 frames */
    2708       60545 :     lt_pitch_diff = (int16_t) abs( hSpMusClas->lt_corr_pitch[0] - st->pitch[0] );
    2709       60545 :     lt_pitch_diff += (int16_t) abs( hSpMusClas->lt_corr_pitch[1] - st->pitch[0] );
    2710       60545 :     lt_pitch_diff += (int16_t) abs( hSpMusClas->lt_corr_pitch[2] - st->pitch[0] );
    2711             : 
    2712       60545 :     hSpMusClas->lt_corr_pitch[0] = hSpMusClas->lt_corr_pitch[1];
    2713       60545 :     hSpMusClas->lt_corr_pitch[1] = hSpMusClas->lt_corr_pitch[2];
    2714       60545 :     hSpMusClas->lt_corr_pitch[2] = st->pitch[0];
    2715             : 
    2716       60545 :     hSpMusClas->lt_old_mode[0] = hSpMusClas->lt_old_mode[1];
    2717       60545 :     hSpMusClas->lt_old_mode[1] = hSpMusClas->lt_old_mode[2];
    2718             : 
    2719       80431 :     if ( st->sp_aud_decision1 == 1 &&
    2720       36581 :          ( min( min( tonality1, tonality2 ), tonality3 ) > 50.0f ) &&
    2721        1314 :          ( tonality1 + tonality2 > 200.0f && tonality2 + tonality3 > 200.0f && tonality1 + tonality3 > 200.0f ) &&
    2722         985 :          ( hSpMusClas->lt_tonality < 20000.0f ) &&
    2723         985 :          ( ( hSpMusClas->lt_tonality > 1000 && max( hSpMusClas->lt_voicing, *st->voicing ) > 0.99f ) ||
    2724         940 :            ( hSpMusClas->lt_tonality > 1500 && hSpMusClas->lt_corr > 0.99f ) ||
    2725         938 :            ( hSpMusClas->lt_tonality > 3000 && hSpMusClas->lowrate_pitchGain > 0.96f ) ||
    2726         506 :            ( lt_pitch_diff == 0 && hSpMusClas->lowrate_pitchGain > 0.89f ) ) )
    2727             :     {
    2728          98 :         if ( sum_s( hSpMusClas->lt_old_mode, 2 ) < 2 )
    2729             :         {
    2730             :             /* probably speech - change the decision to speech */
    2731          26 :             st->sp_aud_decision1 = 0;
    2732          26 :             st->sp_aud_decision2 = 0;
    2733             : 
    2734          26 :             if ( hSpMusClas->lt_hangover == 0 )
    2735             :             {
    2736           6 :                 hSpMusClas->lt_hangover = 6;
    2737             :             }
    2738             :         }
    2739             :     }
    2740             :     else
    2741             :     {
    2742             :         /* not speech, but still in the hangover period - change the decision to speech */
    2743       60447 :         if ( hSpMusClas->lt_hangover > 0 )
    2744             :         {
    2745          36 :             st->sp_aud_decision1 = 0;
    2746          36 :             st->sp_aud_decision2 = 0;
    2747          36 :             hSpMusClas->lt_hangover--;
    2748             :         }
    2749             :     }
    2750             : 
    2751             :     /* calculate standard deviation of log-tonality */
    2752       60545 :     mvr2r( hSpMusClas->tonality2_buf + 1, hSpMusClas->tonality2_buf, HANG_LEN_INIT - 1 );
    2753       60545 :     hSpMusClas->tonality2_buf[HANG_LEN_INIT - 1] = 0.2f * log10f( tonality2 );
    2754       60545 :     t2 = std_dev( hSpMusClas->tonality2_buf, HANG_LEN_INIT );
    2755             : 
    2756       60545 :     mvr2r( hSpMusClas->tonality3_buf + 1, hSpMusClas->tonality3_buf, HANG_LEN_INIT - 1 );
    2757       60545 :     hSpMusClas->tonality3_buf[HANG_LEN_INIT - 1] = 0.2f * log10f( tonality3 );
    2758       60545 :     t3 = std_dev( hSpMusClas->tonality3_buf, HANG_LEN_INIT );
    2759             : 
    2760       60545 :     tL = 0.2f * log10f( hSpMusClas->lt_tonality );
    2761             : 
    2762             :     /* calculate standard deviation of residual LP energy */
    2763       60545 :     mvr2r( hSpMusClas->LPCErr_buf + 1, hSpMusClas->LPCErr_buf, HANG_LEN_INIT - 1 );
    2764       60545 :     hSpMusClas->LPCErr_buf[HANG_LEN_INIT - 1] = LPCErr;
    2765       60545 :     err = std_dev( hSpMusClas->LPCErr_buf, HANG_LEN_INIT );
    2766             : 
    2767       60545 :     cor = max( voi_fv - cor_map_sum_fv, 0.0f );
    2768       60545 :     dft = 0.2f * fabsf( log10f( tonality2 ) - log10f( tonality3 ) );
    2769             : 
    2770             :     /* state machine for strong music */
    2771       60545 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_music_state == 0 && hSpMusClas->lt_music_hangover == 0 &&
    2772       17044 :          t2 < 0.54f && t2 > 0.26f && t3 > 0.22f && tL < 0.54f && tL > 0.26f && err > 0.5f )
    2773             :     {
    2774         111 :         hSpMusClas->lt_music_state = 1;
    2775         111 :         hSpMusClas->lt_music_hangover = 6;
    2776             :     }
    2777       60434 :     else if ( hSpMusClas->lt_music_state == 1 && hSpMusClas->lt_music_hangover == 0 && t2 < 0.34 && t3 < 0.26f && tL < 0.45f )
    2778             :     {
    2779         101 :         hSpMusClas->lt_music_state = 0;
    2780         101 :         hSpMusClas->lt_music_hangover = 6;
    2781             :     }
    2782             : 
    2783       60545 :     if ( hSpMusClas->lt_music_hangover > 0 )
    2784             :     {
    2785        1236 :         hSpMusClas->lt_music_hangover--;
    2786             :     }
    2787             : 
    2788             :     /* state machine for strong speech */
    2789       60545 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_speech_state == 0 && hSpMusClas->lt_speech_hangover == 0 &&
    2790        1870 :          cor > 0.40f && dft < 0.1f && voi_fv > 2 * cor_map_sum_fv + 0.12f &&
    2791         304 :          t2 < cor && t3 < cor && tL < cor && cor_map_sum_fv < cor && voi_fv > cor && voi_fv > 0.76f )
    2792             :     {
    2793          84 :         hSpMusClas->lt_speech_state = 1;
    2794          84 :         hSpMusClas->lt_speech_hangover = 6;
    2795             :     }
    2796       60461 :     else if ( hSpMusClas->lt_speech_state == 1 && hSpMusClas->lt_speech_hangover == 0 && cor < 0.40f )
    2797             :     {
    2798          75 :         hSpMusClas->lt_speech_state = 0;
    2799          75 :         hSpMusClas->lt_speech_hangover = 6;
    2800             :     }
    2801             : 
    2802       60545 :     if ( hSpMusClas->lt_speech_hangover > 0 )
    2803             :     {
    2804         845 :         hSpMusClas->lt_speech_hangover--;
    2805             :     }
    2806             : 
    2807             :     /* final decision */
    2808       60545 :     if ( st->sp_aud_decision1 == 1 && hSpMusClas->lt_speech_state == 1 )
    2809             :     {
    2810             :         /* strong speech - probably error in speech/music classification */
    2811         440 :         st->sp_aud_decision1 = 0;
    2812         440 :         st->sp_aud_decision2 = 0;
    2813             :     }
    2814       60105 :     else if ( st->sp_aud_decision1 == 0 && hSpMusClas->lt_music_state == 1 )
    2815             :     {
    2816             :         /* strong music - probably error in speech/music classification */
    2817         111 :         st->sp_aud_decision1 = 1;
    2818         111 :         st->sp_aud_decision2 = 1;
    2819             :     }
    2820             : 
    2821             :     /* update the buffer of past decisions */
    2822       60545 :     hSpMusClas->lt_old_mode[2] = st->sp_aud_decision1;
    2823             : 
    2824       60545 :     return;
    2825             : }
    2826             : 
    2827             : /*---------------------------------------------------------------------*
    2828             :  * detect_sparseness()
    2829             :  *
    2830             :  *
    2831             :  *---------------------------------------------------------------------*/
    2832             : 
    2833       19856 : static void detect_sparseness(
    2834             :     Encoder_State *st,             /* i/o: encoder state structure                */
    2835             :     const int16_t localVAD_HE_SAD, /* i  : HE-SAD flag without hangover           */
    2836             :     const float voi_fv             /* i  : scaled voicing feature                 */
    2837             : )
    2838             : {
    2839             :     float sum;
    2840             :     float ftmp;
    2841             :     float ftmp1;
    2842             :     float S1[128];
    2843             :     int16_t i, j;
    2844       19856 :     int16_t hb_sp_high_flag = 0;
    2845       19856 :     int16_t lb_sp_high_flag = 0;
    2846             :     float sumh;
    2847             :     float sparse;
    2848             :     float tmp_buf[4];
    2849       19856 :     float Mlpe = 0.0f;
    2850       19856 :     float Mv = 0.0f;
    2851             :     float Msp;
    2852             : 
    2853       19856 :     SP_MUS_CLAS_HANDLE hSpMusClas = st->hSpMusClas;
    2854             : 
    2855       19856 :     mvr2r( st->Bin_E, S1, 128 );
    2856             : 
    2857       19856 :     sum = 0;
    2858     1608336 :     for ( i = 0; i < 80; i++ )
    2859             :     {
    2860     1588480 :         if ( S1[i] < 0 )
    2861             :         {
    2862      248810 :             S1[i] = 0;
    2863             :         }
    2864     1588480 :         sum += S1[i];
    2865             :     }
    2866             : 
    2867       19856 :     sumh = 0;
    2868      972944 :     for ( i = 80; i < 128; i++ )
    2869             :     {
    2870      953088 :         if ( S1[i] < 0 )
    2871             :         {
    2872      249209 :             S1[i] = 0;
    2873             :         }
    2874      953088 :         sumh += S1[i];
    2875             :     }
    2876             : 
    2877       19856 :     sum += sumh;
    2878             : 
    2879             :     /* order spectral from max to min */
    2880       19856 :     order_spectrum( S1, 128 );
    2881             : 
    2882             :     /* calculate spectral sparseness in the range 0 - 6.4 kHz */
    2883       19856 :     j = 0;
    2884       19856 :     ftmp = 0.0f;
    2885       19856 :     ftmp1 = 0.75f * sum;
    2886     1121218 :     for ( i = 0; i < 128; i++ )
    2887             :     {
    2888     1121169 :         ftmp += S1[i];
    2889     1121169 :         if ( ftmp > ftmp1 )
    2890             :         {
    2891       19807 :             j = i;
    2892       19807 :             break;
    2893             :         }
    2894             :     }
    2895             : 
    2896      158848 :     for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2897             :     {
    2898      138992 :         hSpMusClas->sparse_buf[i] = hSpMusClas->sparse_buf[i + 1];
    2899             :     }
    2900             : 
    2901       19856 :     sparse = (float) j;
    2902       19856 :     hSpMusClas->sparse_buf[i] = sparse;
    2903             : 
    2904       19856 :     if ( st->bwidth == WB )
    2905             :     {
    2906        2736 :         Msp = mean( hSpMusClas->sparse_buf, 8 );
    2907             : 
    2908             :         /* find long-term smoothed sparseness */
    2909        2736 :         if ( hSpMusClas->last_vad_spa == 0 )
    2910             :         {
    2911         203 :             set_f( &hSpMusClas->sparse_buf[0], sparse, HANG_LEN_INIT - 1 );
    2912         203 :             hSpMusClas->LT_sparse = sparse;
    2913             :         }
    2914             :         else
    2915             :         {
    2916        2533 :             set_f( tmp_buf, 0.0f, 4 );
    2917             : 
    2918       22797 :             for ( i = 0; i < HANG_LEN_INIT; i++ )
    2919             :             {
    2920       57896 :                 for ( j = 0; j < 4; j++ )
    2921             :                 {
    2922       52576 :                     if ( hSpMusClas->sparse_buf[i] > tmp_buf[j] )
    2923             :                     {
    2924       14944 :                         mvr2r( &tmp_buf[j], &tmp_buf[j + 1], 3 - j );
    2925       14944 :                         tmp_buf[j] = hSpMusClas->sparse_buf[i];
    2926       14944 :                         break;
    2927             :                     }
    2928             :                 }
    2929             :             }
    2930             : 
    2931        2533 :             ftmp = 0.25f * ( HANG_LEN_INIT * Msp - sum_f( tmp_buf, 4 ) ) - hSpMusClas->LT_sparse;
    2932             : 
    2933        2533 :             hSpMusClas->LT_sparse = hSpMusClas->LT_sparse + 0.25f * ftmp;
    2934             :         }
    2935             : 
    2936             :         /* find high-band sparseness */
    2937        2736 :         mvr2r( st->Bin_E + 80, S1, 48 );
    2938        2736 :         order_spectrum( S1, 48 );
    2939             : 
    2940       21888 :         for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2941             :         {
    2942       19152 :             hSpMusClas->hf_spar_buf[i] = hSpMusClas->hf_spar_buf[i + 1];
    2943             :         }
    2944        2736 :         hSpMusClas->hf_spar_buf[i] = sum_f( S1, 5 ) / ( sumh + 0.1f );
    2945        2736 :         if ( mean( hSpMusClas->hf_spar_buf, 8 ) > 0.2f )
    2946             :         {
    2947         850 :             hb_sp_high_flag = 1;
    2948             :         }
    2949             : 
    2950             :         /* find low-band sparseness */
    2951        2736 :         mvr2r( st->Bin_E, S1, 60 );
    2952        2736 :         order_spectrum( S1, 60 );
    2953             : 
    2954        2736 :         if ( sum_f( S1, 5 ) / sum_f( S1, 60 ) > 0.18f )
    2955             :         {
    2956        1377 :             lb_sp_high_flag = 1;
    2957             :         }
    2958             : 
    2959             :         /* find smoothed linear prediction efficiency */
    2960       21888 :         for ( i = 0; i < 7; i++ )
    2961             :         {
    2962       19152 :             hSpMusClas->lpe_buf[i] = hSpMusClas->lpe_buf[i + 1];
    2963             :         }
    2964             : 
    2965        2736 :         hSpMusClas->lpe_buf[i] = hSpMusClas->past_epsP2;
    2966        2736 :         Mlpe = mean( hSpMusClas->lpe_buf, 8 );
    2967             : 
    2968             :         /* find smoothed voicing */
    2969       21888 :         for ( i = 0; i < HANG_LEN_INIT - 1; i++ )
    2970             :         {
    2971       19152 :             hSpMusClas->voicing_buf[i] = hSpMusClas->voicing_buf[i + 1];
    2972             :         }
    2973             : 
    2974        2736 :         hSpMusClas->voicing_buf[i] = voi_fv;
    2975        2736 :         Mv = mean( hSpMusClas->voicing_buf, 8 );
    2976             :     }
    2977             : 
    2978             :     /* avoid using LR-MDCT on sparse spectra */
    2979       19856 :     if ( st->sp_aud_decision1 == 1 )
    2980             :     {
    2981        7204 :         if ( st->bwidth == WB )
    2982             :         {
    2983         717 :             ftmp = 90;
    2984             :         }
    2985             :         else
    2986             :         {
    2987        6487 :             ftmp = 91;
    2988             :         }
    2989        7204 :         if ( sparse > ftmp )
    2990             :         {
    2991           0 :             st->sp_aud_decision1 = 0;
    2992           0 :             st->sp_aud_decision2 = 1;
    2993           0 :             hSpMusClas->gsc_hangover = 1;
    2994             :         }
    2995        7204 :         else if ( hSpMusClas->gsc_hangover == 1 )
    2996             :         {
    2997          53 :             if ( sparse > 85 )
    2998             :             {
    2999           0 :                 st->sp_aud_decision1 = 0;
    3000           0 :                 st->sp_aud_decision2 = 1;
    3001             :             }
    3002          53 :             else if ( fabs( sparse - mean( &hSpMusClas->sparse_buf[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) < 7.0f )
    3003             :             {
    3004          52 :                 st->sp_aud_decision1 = 0;
    3005          52 :                 st->sp_aud_decision2 = 1;
    3006             :             }
    3007             :         }
    3008             : 
    3009        7204 :         if ( st->bwidth == WB )
    3010             :         {
    3011         717 :             if ( hSpMusClas->LT_sparse > 60 && sparse > 50 && Mlpe < -1.3f && Mv > 0.85f &&
    3012          58 :                  lb_sp_high_flag == 0 && ( ( hb_sp_high_flag == 0 && sumh > 0.15f * sum ) || sumh <= 0.15f * sum ) )
    3013             :             {
    3014           9 :                 st->sp_aud_decision1 = 0;
    3015           9 :                 st->sp_aud_decision2 = 1;
    3016           9 :                 hSpMusClas->gsc_hangover = 1;
    3017             :             }
    3018         708 :             else if ( hSpMusClas->gsc_hangover == 1 && !( st->sp_aud_decision1 == 0 && st->sp_aud_decision2 == 1 ) )
    3019             :             {
    3020           1 :                 if ( fabs( sparse - mean( &hSpMusClas->sparse_buf[HANG_LEN_INIT - 1 - hSpMusClas->gsc_cnt], hSpMusClas->gsc_cnt ) ) < 7.0f )
    3021             :                 {
    3022           0 :                     st->sp_aud_decision1 = 0;
    3023           0 :                     st->sp_aud_decision2 = 1;
    3024             :                 }
    3025             :             }
    3026             :         }
    3027             :     }
    3028             : 
    3029             :     /* update the counter of consecutive GSC frames with sparse spectrum */
    3030       19856 :     if ( st->sp_aud_decision1 == 0 && st->sp_aud_decision2 == 1 )
    3031             :     {
    3032          54 :         ( hSpMusClas->gsc_cnt )++;
    3033          54 :         if ( hSpMusClas->gsc_cnt > 7 )
    3034             :         {
    3035          46 :             hSpMusClas->gsc_cnt = 7;
    3036             :         }
    3037             :     }
    3038             :     else
    3039             :     {
    3040       19802 :         hSpMusClas->gsc_cnt = 0;
    3041       19802 :         hSpMusClas->gsc_hangover = 0;
    3042             :     }
    3043             : 
    3044       19856 :     hSpMusClas->last_vad_spa = localVAD_HE_SAD;
    3045             : 
    3046       19856 :     return;
    3047             : }
    3048             : 
    3049             : 
    3050             : /*---------------------------------------------------------------------*
    3051             :  * order_spectrum()
    3052             :  *
    3053             :  *
    3054             :  *---------------------------------------------------------------------*/
    3055             : 
    3056       25328 : static void order_spectrum(
    3057             :     float *vec,
    3058             :     const int16_t len )
    3059             : {
    3060             :     int16_t i, j, imax, imin;
    3061             :     float temp;
    3062             : 
    3063     1443856 :     for ( i = 0; i < len / 2; i++ )
    3064             :     {
    3065     1418528 :         imax = i;
    3066     1418528 :         imin = i;
    3067    88205568 :         for ( j = i; j < len - i; j++ )
    3068             :         {
    3069    86787040 :             if ( vec[j] > vec[imax] )
    3070             :             {
    3071     3895998 :                 imax = j;
    3072             :             }
    3073             :             else
    3074             :             {
    3075    82891042 :                 if ( vec[j] < vec[imin] )
    3076             :                 {
    3077     5551876 :                     imin = j;
    3078             :                 }
    3079             :             }
    3080             :         }
    3081             : 
    3082     1418528 :         temp = vec[i];
    3083     1418528 :         vec[i] = vec[imax];
    3084     1418528 :         vec[imax] = temp;
    3085             : 
    3086     1418528 :         if ( imin == i )
    3087             :         {
    3088      186851 :             imin = imax;
    3089             :         }
    3090             : 
    3091     1418528 :         temp = vec[len - i - 1];
    3092     1418528 :         vec[len - i - 1] = vec[imin];
    3093     1418528 :         vec[imin] = temp;
    3094             :     }
    3095             : 
    3096       25328 :     return;
    3097             : }

Generated by: LCOV version 1.14