LCOV - code coverage report
Current view: top level - lib_dec - FEC_scale_syn.c (source / functions) Hit Total Coverage
Test: Coverage on main -- short test vectors @ 6c9ddc4024a9c0e1ecb8f643f114a84a0e26ec6b Lines: 104 144 72.2 %
Date: 2025-05-23 08:37:30 Functions: 1 1 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #ifdef DEBUGGING
      40             : #include "debug.h"
      41             : #endif
      42             : #include <math.h>
      43             : #include "cnst.h"
      44             : #include "prot.h"
      45             : #include "wmc_auto.h"
      46             : 
      47             : #define SCLSYN_LAMBDA 0.3f
      48             : 
      49             : /*-------------------------------------------------------------------*
      50             :  * FEC_scale_syn()
      51             :  *
      52             :  * Smooth speech energy evolution when recovering after erasure(s)
      53             :  *-------------------------------------------------------------------*/
      54             : 
      55      461775 : void FEC_scale_syn(
      56             :     const int16_t L_frame,                     /* i  : length of the frame                     */
      57             :     int16_t clas,                              /* i/o: frame classification                    */
      58             :     const int16_t last_good,                   /* i  :   last good frame classification        */
      59             :     float *synth,                              /* i/o: synthesized speech at Fs = 12k8 Hz      */
      60             :     const float *pitch,                        /* i  :   pitch values for each subframe        */
      61             :     float enr_old,                             /* i  :   energy at the end of previous frame   */
      62             :     float enr_q,                               /* i  :   transmitted energy for current frame  */
      63             :     const int16_t coder_type,                  /* i  :   coder type                            */
      64             :     const int16_t LSF_Q_prediction,            /* i  : LSF prediction mode                     */
      65             :     int16_t *scaling_flag,                     /* i/o: flag to indicate energy control of syn  */
      66             :     float *lp_ener_FEC_av,                     /* i/o: averaged voiced signal energy           */
      67             :     float *lp_ener_FEC_max,                    /* i/o: averaged voiced signal energy           */
      68             :     const int16_t bfi,                         /* i  :   current  frame BFI                    */
      69             :     const int32_t total_brate,                 /* i  :   total bitrate                         */
      70             :     const int16_t prev_bfi,                    /* i  :   previous frame BFI                    */
      71             :     const int32_t last_core_brate,             /* i  :   previous frame core bitrate           */
      72             :     float *exc,                                /* i/o: excitation signal without enhancement   */
      73             :     float *exc2,                               /* i/o: excitation signal with enhancement      */
      74             :     const float Aq[],                          /* i  :   LP filter coefs                       */
      75             :     float *old_enr_LP,                         /* i/o: LP filter E of last good voiced frame   */
      76             :     const float *mem_tmp,                      /* i  :   temp. initial synthesis filter states */
      77             :     float *mem_syn,                            /* o  :   initial synthesis filter states       */
      78             :     const int16_t avoid_lpc_burst_on_recovery, /* i  : if true the excitation energy is limited if LP has big gain */
      79             :     const int16_t force_scaling                /* i  : force scaling                           */
      80             : )
      81             : {
      82             :     int16_t i;
      83             :     float enr1, enr2, gain1, gain2;
      84             :     float scaling, ener_max, enr2_av, enr2_max;
      85             :     float enr_LP;
      86             :     float h1[L_FRAME / 2], tilt, rr0, rr1, pitch_dist, mean_pitch;
      87             :     int16_t k;
      88             : 
      89      461775 :     gain2 = 0.0f;
      90      461775 :     gain1 = 0.0f;
      91      461775 :     scaling = 1.0f;
      92      461775 :     enr_LP = 0.0f;
      93             : 
      94             :     /*-----------------------------------------------------------------*
      95             :      * Find the synthesis filter impulse response on voiced
      96             :      *-----------------------------------------------------------------*/
      97             : 
      98      461775 :     if ( clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS )
      99             :     {
     100      265935 :         if ( L_frame == L_FRAME )
     101             :         {
     102      147465 :             enr_LP = enr_1_Az( Aq + ( NB_SUBFR - 1 ) * ( M + 1 ), L_SUBFR );
     103             :         }
     104             :         else /* L_frame == L_FRAME16k */
     105             :         {
     106      118470 :             enr_LP = enr_1_Az( Aq + ( NB_SUBFR16k - 1 ) * ( M + 1 ), L_SUBFR );
     107             :         }
     108             :     }
     109             : 
     110             :     /*-----------------------------------------------------------------*
     111             :      * Define when to scale the synthesis
     112             :      *-----------------------------------------------------------------*/
     113             : 
     114      461775 :     if ( bfi )
     115             :     {
     116           9 :         *scaling_flag = 1; /* Always check synthesis on bad frames */
     117             :     }
     118      461766 :     else if ( prev_bfi )
     119             :     {
     120        7221 :         if ( ( LSF_Q_prediction == AUTO_REGRESSIVE ) || ( LSF_Q_prediction == MOVING_AVERAGE ) )
     121             :         {
     122        6063 :             *scaling_flag = 2; /* Decoded LSFs affected  */
     123             :         }
     124        1158 :         else if ( coder_type != TRANSITION )
     125             :         {
     126         498 :             *scaling_flag = 1; /* SN, but not TC mode - LSF still affected by the interpolation */
     127             :         }
     128             :         else
     129             :         {
     130         660 :             *scaling_flag = 0; /* LSF still possibly affected due to interpolation */
     131             :         }
     132        7221 :         scaling = 1.5f;
     133             :     }
     134             :     else
     135             :     {
     136      454545 :         if ( ( LSF_Q_prediction == AUTO_REGRESSIVE ) && ( *scaling_flag == 2 ) )
     137             :         {
     138        2619 :             *scaling_flag = 2; /* Continue with energy control till the end of AR prediction */
     139             :         }
     140      451926 :         else if ( *scaling_flag > 0 )
     141             :         {
     142        9291 :             ( *scaling_flag )--; /* If scaling flag was equal to 2, add one control frame to account for the LSF interpolation */
     143             :         }
     144      454545 :         scaling = 2.0f;
     145             :     }
     146             : 
     147             :     /*-----------------------------------------------------------------*
     148             :      * Find the energy/gain at the end of the frame
     149             :      *-----------------------------------------------------------------*/
     150             : 
     151      461775 :     fer_energy( L_frame, clas, synth, pitch[( L_frame >> 6 ) - 1], &enr2, L_frame );
     152             : 
     153      461775 :     if ( bfi || ( total_brate == ACELP_7k20 ) || ( total_brate == ACELP_8k00 ) )
     154             :     {
     155             :         /* previous frame erased and no TC frame */
     156         174 :         if ( *scaling_flag > 0 )
     157             :         {
     158           9 :             enr2 += 0.01f;
     159             : 
     160           9 :             if ( bfi ) /* In all bad frames, limit the gain to 1  */
     161             :             {
     162           9 :                 gain2 = (float) sqrt( enr_old / enr2 );
     163           9 :                 if ( gain2 > 1.0f )
     164             :                 {
     165           9 :                     gain2 = 1.0f;
     166             :                 }
     167             : 
     168             :                 /* find the energy/gain at the beginning of the frame */
     169           9 :                 fer_energy( L_frame, clas, synth, pitch[0], &enr1, 0 );
     170             : 
     171           9 :                 enr1 += 0.1f;
     172           9 :                 gain1 = (float) sqrt( enr_old / enr1 );
     173           9 :                 if ( gain1 > 1.0f )
     174             :                 {
     175           0 :                     gain1 = 1.0f;
     176             :                 }
     177             :             }
     178             :             else /* good frame  */
     179             :             {
     180           0 :                 if ( enr_q == 0 ) /* If E info (FEC protection bits) is not available in the bitstream */
     181             :                 {
     182           0 :                     enr_q = enr2;
     183             : 
     184           0 :                     set_f( h1, 0.0f, L_FRAME / 2 );
     185           0 :                     h1[0] = 1.0f;
     186           0 :                     syn_filt( Aq + ( 3 * ( M + 1 ) ), M, h1, h1, L_FRAME / 2, h1 + ( M + 1 ), 0 );
     187           0 :                     rr0 = dotp( h1, h1, L_FRAME / 2 - 1 ) + 0.001f;
     188           0 :                     rr1 = dotp( h1, h1 + 1, L_FRAME / 2 - 1 );
     189           0 :                     tilt = rr1 / rr0;
     190             : 
     191           0 :                     pitch_dist = 0.0f;
     192           0 :                     mean_pitch = pitch[0];
     193           0 :                     for ( k = 0; k < ( NB_SUBFR - 1 ); k++ )
     194             :                     {
     195           0 :                         pitch_dist += abs( (int16_t) ( pitch[k + 1] + 0.5f ) - (int16_t) ( pitch[k] + 0.5f ) );
     196           0 :                         mean_pitch += pitch[k + 1];
     197             :                     }
     198           0 :                     pitch_dist /= (float) ( NB_SUBFR - 1 );
     199           0 :                     mean_pitch /= (float) ( NB_SUBFR );
     200             : 
     201           0 :                     if ( ( tilt > 0.7f ) &&                                       /* HF resonnant filter */
     202           0 :                          ( ( pitch_dist > 8.0f ) || ( mean_pitch < PIT_MIN ) ) && /* pitch unstable or very short      */
     203           0 :                          ( ( prev_bfi ) || ( ( coder_type == GENERIC ) && ( LSF_Q_prediction == AUTO_REGRESSIVE ) ) ) )
     204             :                     {
     205           0 :                         if ( enr_q > scaling * enr_old )
     206             :                         {
     207           0 :                             enr_q = scaling * enr_old;
     208             :                         }
     209             :                     }
     210             :                     else
     211             :                     {
     212           0 :                         if ( ( clas <= VOICED_TRANSITION ) || ( clas >= INACTIVE_CLAS ) )
     213             :                         {
     214           0 :                             ener_max = *lp_ener_FEC_av;
     215             :                         }
     216             :                         else
     217             :                         {
     218           0 :                             ener_max = *lp_ener_FEC_max;
     219             :                         }
     220             : 
     221           0 :                         if ( enr_old > ener_max )
     222             :                         {
     223           0 :                             ener_max = enr_old;
     224             :                         }
     225           0 :                         if ( enr_q > scaling * ener_max )
     226             :                         {
     227           0 :                             enr_q = scaling * ener_max;
     228             :                         }
     229             :                     }
     230             :                 }
     231             : 
     232           0 :                 gain2 = (float) sqrt( enr_q / enr2 );
     233             : 
     234             : 
     235             :                 /*-----------------------------------------------------------------*
     236             :                  * Find the energy/gain at the beginning of the frame to ensure smooth transition after erasure(s)
     237             :                  *-----------------------------------------------------------------*/
     238             : 
     239           0 :                 if ( ( ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && ( clas == UNVOICED_CLAS || clas == INACTIVE_CLAS ) ) ||
     240           0 :                        last_core_brate == SID_1k75 || last_core_brate == SID_2k40 || last_core_brate == FRAME_NO_DATA ) &&
     241             :                      prev_bfi )
     242             :                 {
     243             :                     /* voiced -> unvoiced signal transition */
     244             :                     /* CNG -> active signal transition */
     245           0 :                     gain1 = gain2;
     246             :                 }
     247             :                 else
     248             :                 {
     249             :                     /* find the energy at the beginning of the frame */
     250           0 :                     fer_energy( L_frame, clas, synth, pitch[0], &enr1, 0 );
     251             : 
     252           0 :                     enr1 += 0.1f;
     253           0 :                     gain1 = (float) sqrt( enr_old / enr1 );
     254           0 :                     if ( gain1 > 1.2f )
     255             :                     {
     256             :                         /* prevent clipping */
     257           0 :                         gain1 = 1.2f;
     258             :                     }
     259             : 
     260             :                     /* prevent amplifying the unvoiced or inactive part of the frame in case an offset is followed by an onset */
     261           0 :                     if ( clas == ONSET && gain1 > gain2 && prev_bfi )
     262             :                     {
     263           0 :                         gain1 = gain2;
     264             :                     }
     265             :                 }
     266             : 
     267           0 :                 enr2 = enr_q; /* Set the end frame energy to the scaled energy, to be used in the lp_ener_FEC  */
     268             :             }
     269             : 
     270             :             /*------------------------------------------------------------------------------*
     271             :              * Smooth the energy evolution by exponentially evolving from gain1 to gain2
     272             :              *------------------------------------------------------------------------------*/
     273             : 
     274           9 :             gain2 *= ( 1.0f - AGC );
     275        2313 :             for ( i = 0; i < L_frame; i++ )
     276             :             {
     277        2304 :                 gain1 = gain1 * AGC + gain2;
     278        2304 :                 exc[i] *= gain1;
     279        2304 :                 exc2[i] *= gain1;
     280             :             }
     281             : 
     282             :             /* smoothing is done in excitation domain, so redo synthesis */
     283           9 :             mvr2r( mem_tmp, mem_syn, M );
     284           9 :             syn_12k8( L_frame, Aq, exc2, synth, mem_syn, 1 );
     285             :         }
     286             :     }
     287             :     else
     288             :     {
     289             :         /* previous frame erased and no TC frame */
     290      461601 :         if ( prev_bfi && coder_type != TRANSITION )
     291             :         {
     292        6561 :             enr2 += 0.01f;
     293        6561 :             if ( enr_q == 0 ) /* If E info (FEC protection bits) is not available in the bitstream */
     294             :             {
     295        2877 :                 enr_q = enr2;
     296             : 
     297        2877 :                 set_f( h1, 0.0f, L_FRAME / 2 );
     298        2877 :                 h1[0] = 1.0f;
     299        2877 :                 syn_filt( Aq + ( 3 * ( M + 1 ) ), M, h1, h1, L_FRAME / 2, h1 + ( M + 1 ), 0 );
     300        2877 :                 rr0 = dotp( h1, h1, L_FRAME / 2 - 1 ) + 0.001f;
     301        2877 :                 rr1 = dotp( h1, h1 + 1, L_FRAME / 2 - 1 );
     302        2877 :                 tilt = rr1 / rr0;
     303             : 
     304        2877 :                 if ( ( ( ( total_brate == ACELP_13k20 ) || ( total_brate == ACELP_12k85 ) || ( total_brate == ACELP_12k15 ) || ( total_brate == ACELP_11k60 ) ||
     305          48 :                          ( total_brate == ACELP_9k60 ) ) &&
     306          42 :                        ( tilt > 0.7f ) &&                                             /* HF resonnant filter */
     307          36 :                        ( ( clas == UNVOICED_CLAS ) || ( clas == INACTIVE_CLAS ) ) ) ) /* unvoiced classification */
     308             :                 {
     309           9 :                     if ( enr_q > scaling * enr_old )
     310             :                     {
     311           3 :                         enr_q = scaling * enr_old;
     312             :                     }
     313             :                 }
     314        2868 :                 else if ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS )
     315             :                 {
     316             :                     /* voiced -> voiced recovery */
     317        1656 :                     if ( ( *old_enr_LP != 0.0f ) && ( enr_LP > 2 * *old_enr_LP ) )
     318             :                     {
     319         219 :                         enr_q /= enr_LP;
     320         219 :                         enr_q *= 2 * *old_enr_LP;
     321             :                     }
     322        1437 :                     else if ( avoid_lpc_burst_on_recovery && enr_LP > 20.0f )
     323             :                     {
     324           9 :                         enr_q = (float) ( enr_q * sqrt( 20.0f / enr_LP ) );
     325             :                     }
     326             :                 }
     327             : 
     328        2877 :                 if ( ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS ) || force_scaling )
     329             :                 {
     330        1656 :                     if ( enr_q > enr_old )
     331             :                     {
     332         669 :                         enr_q = ( 1 - SCLSYN_LAMBDA ) * enr_old + SCLSYN_LAMBDA * enr_q;
     333             :                     }
     334             :                 }
     335             :             }
     336             : 
     337        6561 :             gain2 = (float) sqrt( enr_q / enr2 );
     338             : 
     339             :             /* do not allow E increase if enr_q index == 0 (lower end Q clipping) */
     340        6561 :             if ( enr_q < 1.1f )
     341             :             {
     342         195 :                 if ( gain2 > 1.0f )
     343             :                 {
     344           3 :                     gain2 = 1.0f;
     345             :                 }
     346             :             }
     347             :             else
     348             :             {
     349        6366 :                 if ( gain2 > 1.2f )
     350             :                 {
     351        1311 :                     gain2 = 1.2f;
     352             :                 }
     353             :             }
     354             : 
     355             :             /*-----------------------------------------------------------------*
     356             :              * Find the energy/gain at the beginning of the frame to ensure smooth transition after erasure(s)
     357             :              *-----------------------------------------------------------------*/
     358             : 
     359        6561 :             if ( clas == SIN_ONSET )
     360             :             {
     361             :                 /* allow only slow increase */
     362          69 :                 gain1 = 0.5f * gain2;
     363             :             }
     364        6492 :             else if ( ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && ( clas == UNVOICED_CLAS || clas == INACTIVE_CLAS ) ) || last_core_brate == SID_1k75 || last_core_brate == SID_2k40 || last_core_brate == FRAME_NO_DATA )
     365             :             {
     366             :                 /* voiced -> unvoiced signal transition */
     367             :                 /* CNG -> active signal transition */
     368         603 :                 gain1 = gain2;
     369             :             }
     370             :             else
     371             :             {
     372             :                 /* find the energy at the beginning of the frame */
     373        5889 :                 fer_energy( L_frame, clas, synth, pitch[0], &enr1, 0 );
     374             : 
     375        5889 :                 enr1 += 0.1f;
     376        5889 :                 gain1 = (float) sqrt( enr_old / enr1 );
     377        5889 :                 if ( gain1 > 1.2f )
     378             :                 {
     379             :                     /* prevent clipping */
     380        1710 :                     gain1 = 1.2f;
     381             :                 }
     382        5889 :                 if ( avoid_lpc_burst_on_recovery && ( enr_LP > 20.0f ) && ( enr_LP <= 2 * *old_enr_LP ) && ( gain1 > 1.0f ) )
     383             :                 {
     384           9 :                     gain1 = 1.0f;
     385             :                 }
     386             : 
     387             :                 /* prevent amplifying the unvoiced or inactive part of the frame in case an offset is followed by an onset */
     388        5889 :                 if ( clas == ONSET && gain1 > gain2 )
     389             :                 {
     390          39 :                     gain1 = gain2;
     391             :                 }
     392             :             }
     393             : 
     394             :             /*-----------------------------------------------------------------*
     395             :              * Smooth the energy evolution by exponentially evolving from gain1 to gain2
     396             :              *-----------------------------------------------------------------*/
     397             : 
     398        6561 :             gain2 *= ( 1.0f - AGC );
     399     1933665 :             for ( i = 0; i < L_frame; i++ )
     400             :             {
     401     1927104 :                 gain1 = gain1 * AGC + gain2;
     402     1927104 :                 exc[i] *= gain1;
     403     1927104 :                 exc2[i] *= gain1;
     404             :             }
     405             : 
     406             :             /* smoothing is done in excitation domain, so redo synthesis */
     407        6561 :             mvr2r( mem_tmp, mem_syn, M );
     408        6561 :             syn_12k8( L_frame, Aq, exc2, synth, mem_syn, 1 );
     409             :         }
     410             :     }
     411             : 
     412             :     /*-----------------------------------------------------------------*
     413             :      * Update low-pass filtered energy for voiced frames
     414             :      *-----------------------------------------------------------------*/
     415             : 
     416      461775 :     if ( !bfi && ( clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS ) )
     417             :     {
     418      265926 :         if ( clas == VOICED_TRANSITION )
     419             :         {
     420       19173 :             enr2_av = enr2;
     421       19173 :             fer_energy( L_frame, VOICED_CLAS, synth, pitch[( L_frame >> 6 ) - 1], &enr2_max, L_frame );
     422             :         }
     423             :         else
     424             :         {
     425      246753 :             enr2_max = enr2;
     426      246753 :             fer_energy( L_frame, UNVOICED_CLAS, synth, pitch[( L_frame >> 6 ) - 1], &enr2_av, L_frame );
     427             :         }
     428             : 
     429      265926 :         *lp_ener_FEC_av = 0.05f * enr2_av + 0.95f * *lp_ener_FEC_av;
     430      265926 :         *lp_ener_FEC_max = 0.05f * enr2_max + 0.95f * *lp_ener_FEC_max;
     431             :     }
     432             : 
     433             :     /*-----------------------------------------------------------------*
     434             :      * Update the LP filter energy for voiced frames
     435             :      *-----------------------------------------------------------------*/
     436             : 
     437      461775 :     if ( clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS )
     438             :     {
     439      265935 :         *old_enr_LP = enr_LP;
     440             :     }
     441             : 
     442             : 
     443      461775 :     return;
     444             : }

Generated by: LCOV version 1.14