LCOV - code coverage report
Current view: top level - lib_enc - pitch_ol2.c (source / functions) Hit Total Coverage
Test: Coverage on main @ 6baab0c613aa6c7100498ed7b93676aa8198a493 Lines: 121 121 100.0 %
Date: 2025-05-28 04:28:20 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #ifdef DEBUGGING
      40             : #include "debug.h"
      41             : #endif
      42             : #include "cnst.h"
      43             : #include "rom_enc.h"
      44             : #include "prot.h"
      45             : #include "wmc_auto.h"
      46             : 
      47             : /*-------------------------------------------------------------------*
      48             :  * Local constants
      49             :  *-------------------------------------------------------------------*/
      50             : 
      51             : #define MAX_DELTA   16 /* half-length of the delta search      */
      52             : #define COR_BUF_LEN ( L_INTERPOL1 * 2 + MAX_DELTA * 2 + 1 )
      53             : 
      54             : /*-------------------------------------------------------------------*
      55             :  * pitch_ol2()
      56             :  *
      57             :  * Open-loop pitch precision improvement with 1/4 resolution
      58             :  * The pitch is searched in the interval <pitch_ol-delta, pitch_ol+delta),
      59             :  * i.e. the value pitch_ol + delta is not a part of the interval
      60             :  *-------------------------------------------------------------------*/
      61             : 
      62    14049996 : void pitch_ol2(
      63             :     const int16_t pit_min,  /* i  : pit_min value                                    */
      64             :     const int16_t pitch_ol, /* i  : pitch to be improved                             */
      65             :     float *pitch_fr,        /* o  : adjusted 1/4 fractional pitch                    */
      66             :     float *voicing_fr,      /* o  : adjusted 1/4 fractional voicing                  */
      67             :     const int16_t pos,      /* i  : position in frame where to calculate the improv. */
      68             :     const float *wsp,       /* i  : weighted speech for current frame and look-ahead */
      69             :     const int16_t delta     /* i  : delta for pitch search                           */
      70             : )
      71             : {
      72             :     int16_t i, t, t0, t1, step, fraction, t0_min, t0_max, t_min, t_max;
      73             :     float temp, cor_max, enr_wsp, enr_old, cor[COR_BUF_LEN], *pt_cor, wsp_fr[L_SUBFR];
      74             :     const float *pt_wsp;
      75             : 
      76    14049996 :     t0_min = pitch_ol - delta;
      77    14049996 :     t0_max = pitch_ol + delta - 1;
      78             : 
      79    14049996 :     if ( t0_min < pit_min )
      80             :     {
      81     1867212 :         t0_min = pit_min;
      82             :     }
      83    14049996 :     t_min = t0_min - L_INTERPOL1;
      84             : 
      85    14049996 :     if ( t0_max > PIT_MAX )
      86             :     {
      87      308338 :         t0_max = PIT_MAX;
      88             :     }
      89    14049996 :     t_max = t0_max + L_INTERPOL1;
      90             : 
      91    14049996 :     pt_wsp = wsp + pos;
      92    14049996 :     pt_cor = cor;
      93   312754314 :     for ( t = t_min; t <= t_max; t++ )
      94             :     {
      95   298704318 :         *pt_cor++ = dotp( pt_wsp, pt_wsp - t, L_SUBFR );
      96             :     }
      97             : 
      98    14049996 :     pt_cor = cor + L_INTERPOL1;
      99    14049996 :     cor_max = *pt_cor++;
     100    14049996 :     t1 = t0_min;
     101   186304350 :     for ( t = t0_min + 1; t <= t0_max; t++ )
     102             :     {
     103   172254354 :         if ( *pt_cor > cor_max )
     104             :         {
     105    62908184 :             cor_max = *pt_cor;
     106    62908184 :             t1 = t;
     107             :         }
     108   172254354 :         pt_cor++;
     109             :     }
     110             : 
     111             :     /*------------------------------------------------------------------*
     112             :      * Search fractional pitch with 1/4 subsample resolution.
     113             :      * search the fractions around t0 and choose the one which maximizes
     114             :      * the interpolated normalized correlation.
     115             :      *-----------------------------------------------------------------*/
     116             : 
     117    14049996 :     pt_cor = cor + L_INTERPOL1 - t0_min;
     118    14049996 :     t0 = t1;
     119             : 
     120    14049996 :     step = 1; /* 1/4 subsample resolution */
     121    14049996 :     fraction = 1;
     122             : 
     123    14049996 :     if ( t0 == t0_min ) /* Limit case */
     124             :     {
     125     1422077 :         fraction = 0;
     126     1422077 :         cor_max = interpolation( &pt_cor[t0], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     127             :     }
     128             :     else /* Process negative fractions */
     129             :     {
     130    12627919 :         t0--;
     131    12627919 :         cor_max = interpolation( &pt_cor[t0], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     132    37883757 :         for ( i = ( fraction + step ); i <= 3; i = i + step )
     133             :         {
     134    25255838 :             temp = interpolation( &pt_cor[t0], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     135    25255838 :             if ( temp > cor_max )
     136             :             {
     137    23734824 :                 cor_max = temp;
     138    23734824 :                 fraction = i;
     139             :             }
     140             :         }
     141             :     }
     142             : 
     143    70249980 :     for ( i = 0; i <= 3; i = i + step ) /* Process positive fractions */
     144             :     {
     145    56199984 :         temp = interpolation( &pt_cor[t1], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     146    56199984 :         if ( temp > cor_max )
     147             :         {
     148    15287878 :             cor_max = temp;
     149    15287878 :             fraction = i;
     150    15287878 :             t0 = t1;
     151             :         }
     152             :     }
     153             : 
     154    14049996 :     *pitch_fr = t0 + (float) fraction / 4.0f;
     155    14049996 :     pred_lt4( pt_wsp, wsp_fr, t0, fraction, L_SUBFR, E_ROM_inter4_1, 4, PIT_UP_SAMP );
     156             : 
     157    14049996 :     enr_wsp = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     158    14049996 :     enr_old = dotp( wsp_fr, wsp_fr, L_SUBFR ) + 0.01f;
     159    14049996 :     *voicing_fr = cor_max * inv_sqrt( enr_wsp * enr_old );
     160             : 
     161    14049996 :     return;
     162             : }
     163             : 
     164             : 
     165             : /*-------------------------------------------------------------------*
     166             :  * StableHighPitchDetect()
     167             :  *
     168             :  * Very short stable pitch detection
     169             :  *-------------------------------------------------------------------*/
     170             : 
     171    16209600 : void StableHighPitchDetect(
     172             :     int16_t *flag_spitch,      /* o  : flag to indicate very short stable pitch*/
     173             :     int16_t pitch[],           /* i/o: OL pitch buffer                         */
     174             :     const float voicing[],     /* i  : OL pitch gains                          */
     175             :     const float Bin_E[],       /* i  : per bin log energy spectrum             */
     176             :     const float wsp[],         /* i  : weighted speech                         */
     177             :     const int16_t localVAD,    /* i  : local VAD flag                          */
     178             :     float *voicing_sm,         /* i/o: smoothed open-loop pitch gains          */
     179             :     float *voicing0_sm,        /* i/o: smoothed high pitch gains               */
     180             :     float *LF_EnergyRatio_sm,  /* i/o: smoothed [0, 300Hz] relative peak energy*/
     181             :     int16_t *predecision_flag, /* i/o: predecision flag                        */
     182             :     float *diff_sm,            /* i/o: smoothed pitch frequency difference     */
     183             :     float *energy_sm           /* i/o: smoothed energy around pitch frequency  */
     184             : )
     185             : {
     186             :     int16_t i, pitch_freq_point, pit_min_up;
     187             :     int16_t T, Tp, pit_min;
     188             : 
     189             :     float voicing_m;
     190             :     float energy0, energy1, ratio, cor_max, diff, sum_energy;
     191             :     const float *pt_wsp;
     192             : 
     193    16209600 :     voicing_m = mean( voicing, 3 );
     194    16209600 :     *voicing_sm = 0.75f * ( *voicing_sm ) + 0.25f * voicing_m;
     195             : 
     196             : 
     197             :     /* initial short pitch possibility pre-decision */
     198    16209600 :     pitch_freq_point = (int16_t) ( L_FFT / pitch[1] + 0.5f );
     199    16209600 :     diff = 0.0f;
     200    16209600 :     sum_energy = 0.0f;
     201             : 
     202   148717146 :     for ( i = 1; i < 2 * pitch_freq_point; i++ )
     203             :     {
     204   132507546 :         diff += ( Bin_E[pitch_freq_point] - Bin_E[i] );
     205   132507546 :         sum_energy += Bin_E[i];
     206             :     }
     207    16209600 :     sum_energy /= ( 2 * pitch_freq_point - 1 );
     208             : 
     209    16209600 :     *diff_sm = 0.2f * diff + 0.8f * *diff_sm;
     210    16209600 :     *energy_sm = 0.2f * sum_energy + 0.8f * *energy_sm;
     211    16209600 :     diff /= sum_energy;
     212             : 
     213    16209600 :     if ( *diff_sm < -10 && *energy_sm < 38.5 && diff < -0.8 )
     214             :     {
     215      295471 :         *predecision_flag = 1;
     216             :     }
     217             : 
     218    16209600 :     if ( *diff_sm > 10 && *energy_sm > 83 && diff > 0.5 )
     219             :     {
     220      691019 :         *predecision_flag = 0;
     221             :     }
     222             : 
     223             :     /* short pitch possiblity pre-decision */
     224    16209600 :     maximum( Bin_E, 7, &energy0 );
     225    16209600 :     maximum( Bin_E + 8, 7, &energy1 );
     226    16209600 :     ratio = max( energy1 - energy0, 0 );
     227    16209600 :     ratio *= max( voicing_m, 0 );
     228             : 
     229    16209600 :     *LF_EnergyRatio_sm = ( 15 * ( *LF_EnergyRatio_sm ) + ratio ) / 16;
     230             : 
     231    16209600 :     if ( *LF_EnergyRatio_sm > 35 || ratio > 50 )
     232             :     {
     233      648786 :         *predecision_flag = 1;
     234             :     }
     235             : 
     236    16209600 :     if ( *LF_EnergyRatio_sm < 16 )
     237             :     {
     238    15222056 :         *predecision_flag = 0;
     239             :     }
     240             : 
     241             :     /* short pitch candidate detection */
     242    16209600 :     Tp = pitch[1];
     243    16209600 :     cor_max = 0;
     244             : 
     245    16209600 :     pt_wsp = wsp + 3 * L_SUBFR;
     246    16209600 :     pit_min = PIT_MIN_DOUBLEEXTEND;
     247    16209600 :     pit_min_up = PIT_MIN;
     248             : 
     249   307982400 :     for ( T = pit_min; T <= pit_min_up; T++ )
     250             :     {
     251   291772800 :         energy1 = dotp( pt_wsp, pt_wsp - T, L_SUBFR );
     252             : 
     253   291772800 :         if ( energy1 > cor_max || T == pit_min )
     254             :         {
     255    80184261 :             cor_max = energy1;
     256    80184261 :             Tp = T;
     257             :         }
     258             :     }
     259             : 
     260    16209600 :     energy0 = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     261    16209600 :     energy1 = dotp( pt_wsp - Tp, pt_wsp - Tp, L_SUBFR ) + 0.01f;
     262    16209600 :     cor_max *= inv_sqrt( energy0 * energy1 );
     263    16209600 :     *voicing0_sm = 0.75f * ( *voicing0_sm ) + 0.25f * cor_max;
     264             : 
     265             :     /* final short pitch correction */
     266    16209600 :     *flag_spitch = 0;
     267    16209600 :     if ( localVAD && *predecision_flag && *voicing0_sm > 0.65f && *voicing0_sm > 0.7f * ( *voicing_sm ) )
     268             :     {
     269      815365 :         *flag_spitch = 1;
     270             : 
     271      815365 :         pitch[0] = Tp;
     272      815365 :         pitch[1] = Tp;
     273      815365 :         pitch[2] = Tp;
     274             :     }
     275             : 
     276    16209600 :     return;
     277             : }
     278             : 
     279             : /*-------------------------------------------------------------------*
     280             :  * pitchDoubling_det()
     281             :  * Multiple pitch doubling detector
     282             :  *
     283             :  *-------------------------------------------------------------------*/
     284             : 
     285        2371 : void pitchDoubling_det(
     286             :     const float *wspeech,
     287             :     int16_t *pitch_ol,
     288             :     float *pitch_fr,
     289             :     float *voicing_fr )
     290             : {
     291             :     float new_op_fr[2];
     292             :     float new_voicing[2];
     293             :     int16_t new_Top[2];
     294             :     int16_t m, T;
     295             : 
     296             :     /*save initial values*/
     297        2371 :     new_Top[0] = pitch_ol[0];
     298        2371 :     new_Top[1] = pitch_ol[1];
     299        9484 :     for ( m = 2; m < 5; m++ )
     300             :     {
     301        7113 :         T = pitch_ol[0] / m;
     302        7113 :         if ( T >= PIT_MIN_12k8 )
     303             :         {
     304        2784 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 0, wspeech, 2 );
     305        2784 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], L_SUBFR, wspeech, 2 );
     306             : 
     307        2784 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[0] + voicing_fr[1] ) )
     308             :             {
     309          71 :                 new_Top[0] = T;
     310          71 :                 pitch_fr[0] = new_op_fr[0];
     311          71 :                 pitch_fr[1] = new_op_fr[1];
     312          71 :                 voicing_fr[0] = new_voicing[0];
     313          71 :                 voicing_fr[1] = new_voicing[1];
     314             :             }
     315             :         }
     316             : 
     317        7113 :         T = pitch_ol[1] / m;
     318        7113 :         if ( T >= PIT_MIN_12k8 )
     319             :         {
     320        2840 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 2 * L_SUBFR, wspeech, 2 );
     321        2840 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], 3 * L_SUBFR, wspeech, 2 );
     322             : 
     323        2840 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[2] + voicing_fr[3] ) )
     324             :             {
     325          74 :                 new_Top[1] = T;
     326          74 :                 pitch_fr[2] = new_op_fr[0];
     327          74 :                 pitch_fr[3] = new_op_fr[1];
     328          74 :                 voicing_fr[2] = new_voicing[0];
     329          74 :                 voicing_fr[3] = new_voicing[1];
     330             :             }
     331             :         }
     332             :     }
     333        2371 :     pitch_ol[0] = new_Top[0];
     334        2371 :     pitch_ol[1] = new_Top[1];
     335             : 
     336        2371 :     return;
     337             : }

Generated by: LCOV version 1.14