LCOV - code coverage report
Current view: top level - lib_enc - mdct_selector.c (source / functions) Hit Total Coverage
Test: Coverage on main -- short test vectors @ 6c9ddc4024a9c0e1ecb8f643f114a84a0e26ec6b Lines: 68 88 77.3 %
Date: 2025-05-23 08:37:30 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #ifdef DEBUGGING
      40             : #include "debug.h"
      41             : #endif
      42             : #include <math.h>
      43             : #include "cnst.h"
      44             : #include "rom_enc.h"
      45             : #include "rom_com.h"
      46             : #include "prot.h"
      47             : #include "wmc_auto.h"
      48             : 
      49             : /*-----------------------------------------------------------------*
      50             :  * Local constants
      51             :  *-----------------------------------------------------------------*/
      52             : 
      53             : #define MDCT_SW_SIG_LINE_THR       2.85f /* Significant spectral line threshold above Etot (dB)                               */
      54             : #define MDCT_SW_SIG_PEAK_THR       36.0f /* Significant peak threshold below Etot (dB)                                        */
      55             : #define MDCT_SW_HI_SPARSE_THR      0.25f /* Max. ratio of significant spectral lines for the spectrum to be considered sparse */
      56             : #define MDCT_SW_HI_ENER_LO_THR     7.5f  /* Hi band low energy threshold (dB)                                                 */
      57             : #define MDCT_SW_1_VOICING_THR      0.9f  /* Voicing threshold                                                                 */
      58             : #define MDCT_SW_1_HI_ENER_LO_THR   12.5f /* Hi band low energy threshold (dB)                                                 */
      59             : #define MDCT_SW_1_SIG_HI_LEVEL_THR 28.0f /* High signal level threshold above noise floor (dB)                                */
      60             : #define MDCT_SW_1_SIG_LO_LEVEL_THR 22.5f /* Low signal level threshold above noise floor (dB)                                 */
      61             : #define MDCT_SW_1_COR_THR          80.0f /* Threshold on cor_map_sum to indicate strongly tonal signal                        */
      62             : #define MDCT_SW_1_SPARSENESS_THR   0.65f /* Threshold on spectrum sparseness                                                  */
      63             : 
      64             : #define MDCT_SW_2_VOICING_THR      0.6f  /* Voicing threshold                                                                 */
      65             : #define MDCT_SW_2_HI_ENER_LO_THR   9.5f  /* Hi band low energy threshold (dB)                                                 */
      66             : #define MDCT_SW_2_SIG_HI_LEVEL_THR 19.0f /* High signal level threshold above noise floor (dB)                                */
      67             : #define MDCT_SW_2_SIG_LO_LEVEL_THR 23.5f /* Low signal level threshold above noise floor (dB)                                 */
      68             : #define MDCT_SW_2_COR_THR          62.5f /* Threshold on cor_map_sum to indicate strongly tonal signal                        */
      69             : #define MDCT_SW_2_SPARSENESS_THR   0.4f  /* Threshold on spectrum sparseness                                                  */
      70             : 
      71             : #define MDCT_SW_HYST_FAC 0.8f /* Hysteresis tolerance factor                                                       */
      72             : 
      73             : 
      74             : /*--------------------------------------------------------------------------*
      75             :  * get_sparseness()
      76             :  *
      77             :  *
      78             :  *--------------------------------------------------------------------------*/
      79             : 
      80         309 : static float get_sparseness(
      81             :     const float Bin_E[],
      82             :     int16_t n,
      83             :     float thr )
      84             : {
      85             :     int16_t num_max, i;
      86             : 
      87         309 :     thr = (float) ( thr * log( 10 ) ); /* Convert to 10*log() domain from 10*log10() domain */
      88             : 
      89         309 :     thr = max( thr, 3.0f ); /* Set an absolute minimum for close to silent signals */
      90             : 
      91         309 :     num_max = 0;
      92       39243 :     for ( i = 1; i < n - 1; ++i )
      93             :     {
      94       38934 :         if ( Bin_E[i] > max( max( Bin_E[i - 1], Bin_E[i + 1] ), thr ) )
      95             :         {
      96        4880 :             ++num_max;
      97             :         }
      98             :     }
      99             : 
     100         309 :     return 1.0f - num_max / (float) ( ( n - 2 ) / 2 );
     101             : }
     102             : 
     103             : /*--------------------------------------------------------------------------*
     104             :  * MDCT_selector()
     105             :  *
     106             :  *
     107             :  *--------------------------------------------------------------------------*/
     108             : 
     109         309 : void MDCT_selector(
     110             :     Encoder_State *st,       /* i/o: Encoder State           */
     111             :     const float sp_floor,    /* i  : Noise floor estimate    */
     112             :     const float Etot,        /* i  : Total energy            */
     113             :     const float cor_map_sum, /* i  : sum of correlation map  */
     114             :     const float enerBuffer[] /* i  : energy buffer           */
     115             : )
     116             : {
     117         309 :     TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
     118             : 
     119         309 :     if ( st->mdct_sw_enable == MODE1 || st->mdct_sw_enable == MODE2 )
     120             :     {
     121             :         float hi_ener, frame_voicing, sparseness;
     122             :         int16_t peak_count;
     123             :         int16_t prefer_tcx, prefer_hq_core, switching_point, hi_sparse, sparse;
     124             :         int16_t lob_cldfb, hib_cldfb, lob_fft, hib_fft;
     125             :         int16_t i;
     126             :         float sig_lo_level_thr, sig_hi_level_thr, cor_thr, voicing_thr, sparseness_thr, hi_ener_lo_thr;
     127             :         int16_t last_core;
     128             : 
     129         309 :         if ( st->bwidth == NB )
     130             :         {
     131           0 :             lob_cldfb = 3200 / 400;
     132           0 :             hib_cldfb = 4000 / 400;
     133           0 :             lob_fft = ( L_FFT / 2 ) / 2;           /* 3.2 KHz */
     134           0 :             hib_fft = ( 40 * ( L_FFT / 2 ) ) / 64; /* 4.0 KHz */
     135             :         }
     136         309 :         else if ( st->bwidth == WB )
     137             :         {
     138           0 :             lob_cldfb = 4800 / 400;
     139           0 :             hib_cldfb = 8000 / 400;
     140           0 :             lob_fft = 3 * L_FFT / 2 / 4; /* 4.8 KHz */
     141           0 :             hib_fft = L_FFT / 2;         /* 6.4 KHz (should be 8 KHz) */
     142             :         }
     143             :         else
     144             :         {
     145         309 :             lob_cldfb = 6400 / 400;
     146         309 :             hib_cldfb = 16000 / 400;
     147         309 :             if ( st->bwidth == FB )
     148             :             {
     149           0 :                 hib_cldfb = 24000 / 400;
     150             :             }
     151         309 :             lob_fft = L_FFT / 2; /* 6.4 KHz */
     152         309 :             hib_fft = L_FFT / 2; /* 6.4 KHz (should be 8 KHz) */
     153             :         }
     154             : 
     155             :         /* st->last_core is reset to TCX_20_CORE in init_acelp() => fix it here */
     156         309 :         last_core = st->last_core;
     157         309 :         if ( st->last_codec_mode == MODE1 && last_core == TCX_20_CORE )
     158             :         {
     159           0 :             last_core = HQ_CORE;
     160             :         }
     161             : 
     162             :         /* Voicing */
     163         309 :         frame_voicing = ( st->voicing[0] + st->voicing[1] ) * 0.5f;
     164             : 
     165             :         /* Spectral sparseness */
     166         309 :         sparseness = get_sparseness( st->Bin_E, lob_fft, Etot - MDCT_SW_SIG_PEAK_THR );
     167             : 
     168             :         /* Hi band energy */
     169         309 :         hi_ener = (float) log10( mean( &enerBuffer[lob_cldfb], hib_cldfb - lob_cldfb ) + 0.0001f );
     170             : 
     171             :         /* Hi band sparseness */
     172         309 :         if ( st->bwidth >= SWB )
     173             :         {
     174             :             /* For SWB, assume hi band sparseness based on 4.8 KHz-6.4 KHz band */
     175         309 :             lob_fft = 3 * L_FFT / 2 / 4; /* 4.8 KHz */
     176             :         }
     177             : 
     178         309 :         peak_count = 0;
     179       10197 :         for ( i = lob_fft; i < hib_fft; ++i )
     180             :         {
     181        9888 :             if ( st->Bin_E[i] >= Etot + MDCT_SW_SIG_LINE_THR * LOG_10 )
     182             :             {
     183        1564 :                 ++peak_count;
     184             :             }
     185             :         }
     186         309 :         hi_sparse = peak_count <= anint( ( hib_fft - lob_fft ) * MDCT_SW_HI_SPARSE_THR );
     187         309 :         sparse = peak_count <= anint( ( hib_fft - lob_fft ) * MDCT_SW_HI_SPARSE_THR / MDCT_SW_HYST_FAC );
     188             : 
     189             :         /* Hysteresis */
     190         309 :         if ( hTcxEnc->prev_hi_sparse > 0 && sparse > 0 && min( min( st->voicing[0], st->voicing[1] ), st->voicing[2] ) >= MDCT_SW_1_VOICING_THR )
     191             :         {
     192          98 :             hi_sparse = 1;
     193             :         }
     194             : 
     195             :         /* Allowed switching point? */
     196         278 :         switching_point = ( last_core != HQ_CORE && last_core != TCX_20_CORE ) ||                                                                                             /* previous core was non-MDCT */
     197         288 :                           ( hTcxEnc->prev_hi_ener <= MDCT_SW_HI_ENER_LO_THR || hi_ener <= MDCT_SW_HI_ENER_LO_THR ) ||                                                         /* hi band is close to silent */
     198         652 :                           ( last_core == HQ_CORE && ( st->mdct_sw_enable == MODE1 || ( hi_sparse > 0 && hTcxEnc->prev_hi_sparse >= 0 && hTcxEnc->prev_hi_sparse <= 1 ) ) ) || /* HQ_CORE and hi band became sparse */
     199          34 :                           ( last_core == TCX_20_CORE && ( hi_sparse == 0 && hTcxEnc->prev_hi_sparse > 0 ) );                                                                  /* TCX and hi band became dense */
     200             : 
     201         309 :         if ( st->mdct_sw_enable == MODE1 )
     202             :         {
     203         309 :             sig_lo_level_thr = MDCT_SW_1_SIG_LO_LEVEL_THR;
     204         309 :             sig_hi_level_thr = MDCT_SW_1_SIG_HI_LEVEL_THR;
     205         309 :             cor_thr = MDCT_SW_1_COR_THR;
     206         309 :             voicing_thr = MDCT_SW_1_VOICING_THR;
     207         309 :             sparseness_thr = MDCT_SW_1_SPARSENESS_THR;
     208         309 :             hi_ener_lo_thr = MDCT_SW_1_HI_ENER_LO_THR;
     209             :         }
     210             :         else
     211             :         {
     212             :             /* st->mdct_sw_enable == MODE2 */
     213           0 :             sig_lo_level_thr = MDCT_SW_2_SIG_LO_LEVEL_THR;
     214           0 :             sig_hi_level_thr = MDCT_SW_2_SIG_HI_LEVEL_THR;
     215           0 :             cor_thr = MDCT_SW_2_COR_THR;
     216           0 :             voicing_thr = MDCT_SW_2_VOICING_THR;
     217           0 :             sparseness_thr = MDCT_SW_2_SPARSENESS_THR;
     218           0 :             hi_ener_lo_thr = MDCT_SW_2_HI_ENER_LO_THR;
     219             :         }
     220             : 
     221         258 :         prefer_tcx = ( Etot - sp_floor >= sig_hi_level_thr ) &&                                                    /* noise floor is low */
     222         567 :                      ( cor_map_sum >= cor_thr || frame_voicing >= voicing_thr || sparseness >= sparseness_thr ) && /* strong tonal components */
     223           0 :                      ( hi_ener <= hi_ener_lo_thr || hi_sparse > 0 );                                               /* high freqs have low energy or are sparse */
     224             : 
     225         913 :         prefer_hq_core = ( Etot - sp_floor < sig_lo_level_thr ) ||                                                                                                           /* noise floor is very high */
     226         599 :                          ( cor_map_sum < cor_thr * MDCT_SW_HYST_FAC && frame_voicing < voicing_thr * MDCT_SW_HYST_FAC && sparseness < sparseness_thr * MDCT_SW_HYST_FAC ) || /* too weak tonal components */
     227         290 :                          ( st->mdct_sw_enable == MODE1 && !prefer_tcx && st->hTranDet->transientDetector.bIsAttackPresent );
     228             : 
     229             :         /* Prefer HQ_CORE on transients */
     230         309 :         if ( st->mdct_sw_enable == MODE2 && st->hTranDet->transientDetector.bIsAttackPresent )
     231             :         {
     232           0 :             prefer_tcx = 0;
     233           0 :             prefer_hq_core = 1;
     234             :         }
     235             : 
     236         309 :         if ( switching_point && ( prefer_tcx || prefer_hq_core ) )
     237             :         {
     238         243 :             if ( prefer_tcx )
     239             :             {
     240         229 :                 st->core = TCX_20_CORE;
     241             :             }
     242             :             else /* prefer_hq_core */
     243             :             {
     244          14 :                 st->core = HQ_CORE;
     245             :             }
     246             :         }
     247          66 :         else if ( last_core == HQ_CORE || last_core == TCX_20_CORE )
     248             :         {
     249          62 :             st->core = last_core;
     250             :         }
     251             : 
     252             :         /* Prevent the usage of HQ_CORE on noisy-speech or inactive */
     253         309 :         if ( st->mdct_sw_enable == MODE2 && st->core == HQ_CORE && ( st->flag_noisy_speech_snr == 1 || st->vad_flag == 0 ) )
     254             :         {
     255           0 :             st->core = TCX_20_CORE;
     256             :         }
     257             : 
     258             : 
     259             :         /* Update memories */
     260         309 :         if ( hi_sparse <= 0 )
     261             :         {
     262          66 :             hTcxEnc->prev_hi_sparse = hi_sparse;
     263             :         }
     264             :         else
     265             :         {
     266         243 :             hTcxEnc->prev_hi_sparse += hi_sparse;
     267         243 :             if ( hTcxEnc->prev_hi_sparse >= 2 )
     268             :             {
     269         196 :                 hTcxEnc->prev_hi_sparse = 2;
     270             :             }
     271             :         }
     272         309 :         hTcxEnc->prev_hi_ener = hi_ener;
     273             :     }
     274             : 
     275         309 :     return;
     276             : }
     277             : 
     278             : /*--------------------------------------------------------------------------*
     279             :  * MDCT_selector_reset()
     280             :  *
     281             :  * reset MDCT selector memories
     282             :  *--------------------------------------------------------------------------*/
     283             : 
     284       10682 : void MDCT_selector_reset(
     285             :     TCX_ENC_HANDLE hTcxEnc /* i/o: TCX Encoder Handle              */
     286             : )
     287             : {
     288       10682 :     hTcxEnc->prev_hi_ener = 0;
     289       10682 :     hTcxEnc->prev_hi_sparse = -1;
     290             : 
     291       10682 :     return;
     292             : }

Generated by: LCOV version 1.14