LCOV - code coverage report
Current view: top level - lib_enc - pitch_ol2.c (source / functions) Hit Total Coverage
Test: Coverage on main -- short test vectors @ a53d7b5498aebe2d66400d10e953da7c3789f796 Lines: 111 121 91.7 %
Date: 2026-01-26 05:19:21 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2026 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #ifdef DEBUGGING
      40             : #include "debug.h"
      41             : #endif
      42             : #include "cnst.h"
      43             : #include "rom_enc.h"
      44             : #include "prot.h"
      45             : #include "wmc_auto.h"
      46             : 
      47             : /*-------------------------------------------------------------------*
      48             :  * Local constants
      49             :  *-------------------------------------------------------------------*/
      50             : 
      51             : #define MAX_DELTA   16 /* half-length of the delta search      */
      52             : #define COR_BUF_LEN ( L_INTERPOL1 * 2 + MAX_DELTA * 2 + 1 )
      53             : 
      54             : /*-------------------------------------------------------------------*
      55             :  * pitch_ol2()
      56             :  *
      57             :  * Open-loop pitch precision improvement with 1/4 resolution
      58             :  * The pitch is searched in the interval <pitch_ol-delta, pitch_ol+delta),
      59             :  * i.e. the value pitch_ol + delta is not a part of the interval
      60             :  *-------------------------------------------------------------------*/
      61             : 
      62     2559444 : void pitch_ol2(
      63             :     const int16_t pit_min,  /* i  : pit_min value                                    */
      64             :     const int16_t pitch_ol, /* i  : pitch to be improved                             */
      65             :     float *pitch_fr,        /* o  : adjusted 1/4 fractional pitch                    */
      66             :     float *voicing_fr,      /* o  : adjusted 1/4 fractional voicing                  */
      67             :     const int16_t pos,      /* i  : position in frame where to calculate the improv. */
      68             :     const float *wsp,       /* i  : weighted speech for current frame and look-ahead */
      69             :     const int16_t delta     /* i  : delta for pitch search                           */
      70             : )
      71             : {
      72             :     int16_t i, t, t0, t1, step, fraction, t0_min, t0_max, t_min, t_max;
      73             :     float temp, cor_max, enr_wsp, enr_old, cor[COR_BUF_LEN], *pt_cor, wsp_fr[L_SUBFR];
      74             :     const float *pt_wsp;
      75             :     int16_t base_idx;
      76     2559444 :     t0_min = pitch_ol - delta;
      77     2559444 :     t0_max = pitch_ol + delta - 1;
      78             : 
      79     2559444 :     if ( t0_min < pit_min )
      80             :     {
      81      263484 :         t0_min = pit_min;
      82             :     }
      83     2559444 :     t_min = t0_min - L_INTERPOL1;
      84             : 
      85     2559444 :     if ( t0_max > PIT_MAX )
      86             :     {
      87       52724 :         t0_max = PIT_MAX;
      88             :     }
      89     2559444 :     t_max = t0_max + L_INTERPOL1;
      90             : 
      91     2559444 :     pt_wsp = wsp + pos;
      92     2559444 :     pt_cor = cor;
      93    57459692 :     for ( t = t_min; t <= t_max; t++ )
      94             :     {
      95    54900248 :         *pt_cor++ = dotp( pt_wsp, pt_wsp - t, L_SUBFR );
      96             :     }
      97             : 
      98     2559444 :     pt_cor = cor + L_INTERPOL1;
      99     2559444 :     cor_max = *pt_cor++;
     100     2559444 :     t1 = t0_min;
     101    34424696 :     for ( t = t0_min + 1; t <= t0_max; t++ )
     102             :     {
     103    31865252 :         if ( *pt_cor > cor_max )
     104             :         {
     105    12611401 :             cor_max = *pt_cor;
     106    12611401 :             t1 = t;
     107             :         }
     108    31865252 :         pt_cor++;
     109             :     }
     110             : 
     111             :     /*------------------------------------------------------------------*
     112             :      * Search fractional pitch with 1/4 subsample resolution.
     113             :      * search the fractions around t0 and choose the one which maximizes
     114             :      * the interpolated normalized correlation.
     115             :      *-----------------------------------------------------------------*/
     116             : 
     117     2559444 :     t0 = t1;
     118     2559444 :     base_idx = L_INTERPOL1 - t0_min;
     119     2559444 :     step = 1; /* 1/4 subsample resolution */
     120     2559444 :     fraction = 1;
     121             : 
     122     2559444 :     if ( t0 == t0_min ) /* Limit case */
     123             :     {
     124      220527 :         fraction = 0;
     125      220527 :         cor_max = interpolation( &cor[t0 + base_idx], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     126             :     }
     127             :     else /* Process negative fractions */
     128             :     {
     129     2338917 :         t0--;
     130     2338917 :         cor_max = interpolation( &cor[t0 + base_idx], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     131     7016751 :         for ( i = ( fraction + step ); i <= 3; i = i + step )
     132             :         {
     133     4677834 :             temp = interpolation( &cor[t0 + base_idx], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     134     4677834 :             if ( temp > cor_max )
     135             :             {
     136     4381161 :                 cor_max = temp;
     137     4381161 :                 fraction = i;
     138             :             }
     139             :         }
     140             :     }
     141             : 
     142    12797220 :     for ( i = 0; i <= 3; i = i + step ) /* Process positive fractions */
     143             :     {
     144    10237776 :         temp = interpolation( &cor[t1 + base_idx], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     145    10237776 :         if ( temp > cor_max )
     146             :         {
     147     2967914 :             cor_max = temp;
     148     2967914 :             fraction = i;
     149     2967914 :             t0 = t1;
     150             :         }
     151             :     }
     152             : 
     153     2559444 :     *pitch_fr = t0 + (float) fraction / 4.0f;
     154     2559444 :     pred_lt4( pt_wsp, wsp_fr, t0, fraction, L_SUBFR, E_ROM_inter4_1, 4, PIT_UP_SAMP );
     155             : 
     156     2559444 :     enr_wsp = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     157     2559444 :     enr_old = dotp( wsp_fr, wsp_fr, L_SUBFR ) + 0.01f;
     158     2559444 :     *voicing_fr = cor_max * inv_sqrt( enr_wsp * enr_old );
     159             : 
     160     2559444 :     return;
     161             : }
     162             : 
     163             : 
     164             : /*-------------------------------------------------------------------*
     165             :  * StableHighPitchDetect()
     166             :  *
     167             :  * Very short stable pitch detection
     168             :  *-------------------------------------------------------------------*/
     169             : 
     170     3144290 : void StableHighPitchDetect(
     171             :     int16_t *flag_spitch,      /* o  : flag to indicate very short stable pitch*/
     172             :     int16_t pitch[],           /* i/o: OL pitch buffer                         */
     173             :     const float voicing[],     /* i  : OL pitch gains                          */
     174             :     const float Bin_E[],       /* i  : per bin log energy spectrum             */
     175             :     const float wsp[],         /* i  : weighted speech                         */
     176             :     const int16_t localVAD,    /* i  : local VAD flag                          */
     177             :     float *voicing_sm,         /* i/o: smoothed open-loop pitch gains          */
     178             :     float *voicing0_sm,        /* i/o: smoothed high pitch gains               */
     179             :     float *LF_EnergyRatio_sm,  /* i/o: smoothed [0, 300Hz] relative peak energy*/
     180             :     int16_t *predecision_flag, /* i/o: predecision flag                        */
     181             :     float *diff_sm,            /* i/o: smoothed pitch frequency difference     */
     182             :     float *energy_sm           /* i/o: smoothed energy around pitch frequency  */
     183             : )
     184             : {
     185             :     int16_t i, pitch_freq_point, pit_min_up;
     186             :     int16_t T, Tp, pit_min;
     187             : 
     188             :     float voicing_m;
     189             :     float energy0, energy1, ratio, cor_max, diff, sum_energy;
     190             :     const float *pt_wsp;
     191             : 
     192     3144290 :     voicing_m = mean( voicing, 3 );
     193     3144290 :     *voicing_sm = 0.75f * ( *voicing_sm ) + 0.25f * voicing_m;
     194             : 
     195             : 
     196             :     /* initial short pitch possibility pre-decision */
     197     3144290 :     pitch_freq_point = (int16_t) ( L_FFT / pitch[1] + 0.5f );
     198     3144290 :     diff = 0.0f;
     199     3144290 :     sum_energy = 0.0f;
     200             : 
     201    28120926 :     for ( i = 1; i < 2 * pitch_freq_point; i++ )
     202             :     {
     203    24976636 :         diff += ( Bin_E[pitch_freq_point] - Bin_E[i] );
     204    24976636 :         sum_energy += Bin_E[i];
     205             :     }
     206     3144290 :     sum_energy /= ( 2 * pitch_freq_point - 1 );
     207             : 
     208     3144290 :     *diff_sm = 0.2f * diff + 0.8f * *diff_sm;
     209     3144290 :     *energy_sm = 0.2f * sum_energy + 0.8f * *energy_sm;
     210     3144290 :     diff /= sum_energy;
     211             : 
     212     3144290 :     if ( *diff_sm < -10 && *energy_sm < 38.5 && diff < -0.8 )
     213             :     {
     214       44372 :         *predecision_flag = 1;
     215             :     }
     216             : 
     217     3144290 :     if ( *diff_sm > 10 && *energy_sm > 83 && diff > 0.5 )
     218             :     {
     219      297670 :         *predecision_flag = 0;
     220             :     }
     221             : 
     222             :     /* short pitch possiblity pre-decision */
     223     3144290 :     maximum( Bin_E, 7, &energy0 );
     224     3144290 :     maximum( Bin_E + 8, 7, &energy1 );
     225     3144290 :     ratio = max( energy1 - energy0, 0 );
     226     3144290 :     ratio *= max( voicing_m, 0 );
     227             : 
     228     3144290 :     *LF_EnergyRatio_sm = ( 15 * ( *LF_EnergyRatio_sm ) + ratio ) / 16;
     229             : 
     230     3144290 :     if ( *LF_EnergyRatio_sm > 35 || ratio > 50 )
     231             :     {
     232       32817 :         *predecision_flag = 1;
     233             :     }
     234             : 
     235     3144290 :     if ( *LF_EnergyRatio_sm < 16 )
     236             :     {
     237     3079041 :         *predecision_flag = 0;
     238             :     }
     239             : 
     240             :     /* short pitch candidate detection */
     241     3144290 :     Tp = pitch[1];
     242     3144290 :     cor_max = 0;
     243             : 
     244     3144290 :     pt_wsp = wsp + 3 * L_SUBFR;
     245     3144290 :     pit_min = PIT_MIN_DOUBLEEXTEND;
     246     3144290 :     pit_min_up = PIT_MIN;
     247             : 
     248    59741510 :     for ( T = pit_min; T <= pit_min_up; T++ )
     249             :     {
     250    56597220 :         energy1 = dotp( pt_wsp, pt_wsp - T, L_SUBFR );
     251             : 
     252    56597220 :         if ( energy1 > cor_max || T == pit_min )
     253             :         {
     254    17248627 :             cor_max = energy1;
     255    17248627 :             Tp = T;
     256             :         }
     257             :     }
     258             : 
     259     3144290 :     energy0 = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     260     3144290 :     energy1 = dotp( pt_wsp - Tp, pt_wsp - Tp, L_SUBFR ) + 0.01f;
     261     3144290 :     cor_max *= inv_sqrt( energy0 * energy1 );
     262     3144290 :     *voicing0_sm = 0.75f * ( *voicing0_sm ) + 0.25f * cor_max;
     263             : 
     264             :     /* final short pitch correction */
     265     3144290 :     *flag_spitch = 0;
     266     3144290 :     if ( localVAD && *predecision_flag && *voicing0_sm > 0.65f && *voicing0_sm > 0.7f * ( *voicing_sm ) )
     267             :     {
     268       42654 :         *flag_spitch = 1;
     269             : 
     270       42654 :         pitch[0] = Tp;
     271       42654 :         pitch[1] = Tp;
     272       42654 :         pitch[2] = Tp;
     273             :     }
     274             : 
     275     3144290 :     return;
     276             : }
     277             : 
     278             : /*-------------------------------------------------------------------*
     279             :  * pitchDoubling_det()
     280             :  * Multiple pitch doubling detector
     281             :  *
     282             :  *-------------------------------------------------------------------*/
     283             : 
     284         360 : void pitchDoubling_det(
     285             :     const float *wspeech,
     286             :     int16_t *pitch_ol,
     287             :     float *pitch_fr,
     288             :     float *voicing_fr )
     289             : {
     290             :     float new_op_fr[2];
     291             :     float new_voicing[2];
     292             :     int16_t new_Top[2];
     293             :     int16_t m, T;
     294             : 
     295             :     /*save initial values*/
     296         360 :     new_Top[0] = pitch_ol[0];
     297         360 :     new_Top[1] = pitch_ol[1];
     298        1440 :     for ( m = 2; m < 5; m++ )
     299             :     {
     300        1080 :         T = pitch_ol[0] / m;
     301        1080 :         if ( T >= PIT_MIN_12k8 )
     302             :         {
     303         102 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 0, wspeech, 2 );
     304         102 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], L_SUBFR, wspeech, 2 );
     305             : 
     306         102 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[0] + voicing_fr[1] ) )
     307             :             {
     308           0 :                 new_Top[0] = T;
     309           0 :                 pitch_fr[0] = new_op_fr[0];
     310           0 :                 pitch_fr[1] = new_op_fr[1];
     311           0 :                 voicing_fr[0] = new_voicing[0];
     312           0 :                 voicing_fr[1] = new_voicing[1];
     313             :             }
     314             :         }
     315             : 
     316        1080 :         T = pitch_ol[1] / m;
     317        1080 :         if ( T >= PIT_MIN_12k8 )
     318             :         {
     319         120 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 2 * L_SUBFR, wspeech, 2 );
     320         120 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], 3 * L_SUBFR, wspeech, 2 );
     321             : 
     322         120 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[2] + voicing_fr[3] ) )
     323             :             {
     324           0 :                 new_Top[1] = T;
     325           0 :                 pitch_fr[2] = new_op_fr[0];
     326           0 :                 pitch_fr[3] = new_op_fr[1];
     327           0 :                 voicing_fr[2] = new_voicing[0];
     328           0 :                 voicing_fr[3] = new_voicing[1];
     329             :             }
     330             :         }
     331             :     }
     332         360 :     pitch_ol[0] = new_Top[0];
     333         360 :     pitch_ol[1] = new_Top[1];
     334             : 
     335         360 :     return;
     336             : }

Generated by: LCOV version 1.14