LCOV - code coverage report
Current view: top level - lib_enc - pitch_ol2.c (source / functions) Hit Total Coverage
Test: Coverage on main -- short test vectors @ 6c9ddc4024a9c0e1ecb8f643f114a84a0e26ec6b Lines: 94 121 77.7 %
Date: 2025-05-23 08:37:30 Functions: 2 3 66.7 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #ifdef DEBUGGING
      40             : #include "debug.h"
      41             : #endif
      42             : #include "cnst.h"
      43             : #include "rom_enc.h"
      44             : #include "prot.h"
      45             : #include "wmc_auto.h"
      46             : 
      47             : /*-------------------------------------------------------------------*
      48             :  * Local constants
      49             :  *-------------------------------------------------------------------*/
      50             : 
      51             : #define MAX_DELTA   16 /* half-length of the delta search      */
      52             : #define COR_BUF_LEN ( L_INTERPOL1 * 2 + MAX_DELTA * 2 + 1 )
      53             : 
      54             : /*-------------------------------------------------------------------*
      55             :  * pitch_ol2()
      56             :  *
      57             :  * Open-loop pitch precision improvement with 1/4 resolution
      58             :  * The pitch is searched in the interval <pitch_ol-delta, pitch_ol+delta),
      59             :  * i.e. the value pitch_ol + delta is not a part of the interval
      60             :  *-------------------------------------------------------------------*/
      61             : 
      62     1187304 : void pitch_ol2(
      63             :     const int16_t pit_min,  /* i  : pit_min value                                    */
      64             :     const int16_t pitch_ol, /* i  : pitch to be improved                             */
      65             :     float *pitch_fr,        /* o  : adjusted 1/4 fractional pitch                    */
      66             :     float *voicing_fr,      /* o  : adjusted 1/4 fractional voicing                  */
      67             :     const int16_t pos,      /* i  : position in frame where to calculate the improv. */
      68             :     const float *wsp,       /* i  : weighted speech for current frame and look-ahead */
      69             :     const int16_t delta     /* i  : delta for pitch search                           */
      70             : )
      71             : {
      72             :     int16_t i, t, t0, t1, step, fraction, t0_min, t0_max, t_min, t_max;
      73             :     float temp, cor_max, enr_wsp, enr_old, cor[COR_BUF_LEN], *pt_cor, wsp_fr[L_SUBFR];
      74             :     const float *pt_wsp;
      75             : 
      76     1187304 :     t0_min = pitch_ol - delta;
      77     1187304 :     t0_max = pitch_ol + delta - 1;
      78             : 
      79     1187304 :     if ( t0_min < pit_min )
      80             :     {
      81      104832 :         t0_min = pit_min;
      82             :     }
      83     1187304 :     t_min = t0_min - L_INTERPOL1;
      84             : 
      85     1187304 :     if ( t0_max > PIT_MAX )
      86             :     {
      87       31382 :         t0_max = PIT_MAX;
      88             :     }
      89     1187304 :     t_max = t0_max + L_INTERPOL1;
      90             : 
      91     1187304 :     pt_wsp = wsp + pos;
      92     1187304 :     pt_cor = cor;
      93    26733326 :     for ( t = t_min; t <= t_max; t++ )
      94             :     {
      95    25546022 :         *pt_cor++ = dotp( pt_wsp, pt_wsp - t, L_SUBFR );
      96             :     }
      97             : 
      98     1187304 :     pt_cor = cor + L_INTERPOL1;
      99     1187304 :     cor_max = *pt_cor++;
     100     1187304 :     t1 = t0_min;
     101    16047590 :     for ( t = t0_min + 1; t <= t0_max; t++ )
     102             :     {
     103    14860286 :         if ( *pt_cor > cor_max )
     104             :         {
     105     5946549 :             cor_max = *pt_cor;
     106     5946549 :             t1 = t;
     107             :         }
     108    14860286 :         pt_cor++;
     109             :     }
     110             : 
     111             :     /*------------------------------------------------------------------*
     112             :      * Search fractional pitch with 1/4 subsample resolution.
     113             :      * search the fractions around t0 and choose the one which maximizes
     114             :      * the interpolated normalized correlation.
     115             :      *-----------------------------------------------------------------*/
     116             : 
     117     1187304 :     pt_cor = cor + L_INTERPOL1 - t0_min;
     118     1187304 :     t0 = t1;
     119             : 
     120     1187304 :     step = 1; /* 1/4 subsample resolution */
     121     1187304 :     fraction = 1;
     122             : 
     123     1187304 :     if ( t0 == t0_min ) /* Limit case */
     124             :     {
     125       84554 :         fraction = 0;
     126       84554 :         cor_max = interpolation( &pt_cor[t0], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     127             :     }
     128             :     else /* Process negative fractions */
     129             :     {
     130     1102750 :         t0--;
     131     1102750 :         cor_max = interpolation( &pt_cor[t0], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     132     3308250 :         for ( i = ( fraction + step ); i <= 3; i = i + step )
     133             :         {
     134     2205500 :             temp = interpolation( &pt_cor[t0], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     135     2205500 :             if ( temp > cor_max )
     136             :             {
     137     2066220 :                 cor_max = temp;
     138     2066220 :                 fraction = i;
     139             :             }
     140             :         }
     141             :     }
     142             : 
     143     5936520 :     for ( i = 0; i <= 3; i = i + step ) /* Process positive fractions */
     144             :     {
     145     4749216 :         temp = interpolation( &pt_cor[t1], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     146     4749216 :         if ( temp > cor_max )
     147             :         {
     148     1364397 :             cor_max = temp;
     149     1364397 :             fraction = i;
     150     1364397 :             t0 = t1;
     151             :         }
     152             :     }
     153             : 
     154     1187304 :     *pitch_fr = t0 + (float) fraction / 4.0f;
     155     1187304 :     pred_lt4( pt_wsp, wsp_fr, t0, fraction, L_SUBFR, E_ROM_inter4_1, 4, PIT_UP_SAMP );
     156             : 
     157     1187304 :     enr_wsp = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     158     1187304 :     enr_old = dotp( wsp_fr, wsp_fr, L_SUBFR ) + 0.01f;
     159     1187304 :     *voicing_fr = cor_max * inv_sqrt( enr_wsp * enr_old );
     160             : 
     161     1187304 :     return;
     162             : }
     163             : 
     164             : 
     165             : /*-------------------------------------------------------------------*
     166             :  * StableHighPitchDetect()
     167             :  *
     168             :  * Very short stable pitch detection
     169             :  *-------------------------------------------------------------------*/
     170             : 
     171     1153834 : void StableHighPitchDetect(
     172             :     int16_t *flag_spitch,      /* o  : flag to indicate very short stable pitch*/
     173             :     int16_t pitch[],           /* i/o: OL pitch buffer                         */
     174             :     const float voicing[],     /* i  : OL pitch gains                          */
     175             :     const float Bin_E[],       /* i  : per bin log energy spectrum             */
     176             :     const float wsp[],         /* i  : weighted speech                         */
     177             :     const int16_t localVAD,    /* i  : local VAD flag                          */
     178             :     float *voicing_sm,         /* i/o: smoothed open-loop pitch gains          */
     179             :     float *voicing0_sm,        /* i/o: smoothed high pitch gains               */
     180             :     float *LF_EnergyRatio_sm,  /* i/o: smoothed [0, 300Hz] relative peak energy*/
     181             :     int16_t *predecision_flag, /* i/o: predecision flag                        */
     182             :     float *diff_sm,            /* i/o: smoothed pitch frequency difference     */
     183             :     float *energy_sm           /* i/o: smoothed energy around pitch frequency  */
     184             : )
     185             : {
     186             :     int16_t i, pitch_freq_point, pit_min_up;
     187             :     int16_t T, Tp, pit_min;
     188             : 
     189             :     float voicing_m;
     190             :     float energy0, energy1, ratio, cor_max, diff, sum_energy;
     191             :     const float *pt_wsp;
     192             : 
     193     1153834 :     voicing_m = mean( voicing, 3 );
     194     1153834 :     *voicing_sm = 0.75f * ( *voicing_sm ) + 0.25f * voicing_m;
     195             : 
     196             : 
     197             :     /* initial short pitch possibility pre-decision */
     198     1153834 :     pitch_freq_point = (int16_t) ( L_FFT / pitch[1] + 0.5f );
     199     1153834 :     diff = 0.0f;
     200     1153834 :     sum_energy = 0.0f;
     201             : 
     202    10117144 :     for ( i = 1; i < 2 * pitch_freq_point; i++ )
     203             :     {
     204     8963310 :         diff += ( Bin_E[pitch_freq_point] - Bin_E[i] );
     205     8963310 :         sum_energy += Bin_E[i];
     206             :     }
     207     1153834 :     sum_energy /= ( 2 * pitch_freq_point - 1 );
     208             : 
     209     1153834 :     *diff_sm = 0.2f * diff + 0.8f * *diff_sm;
     210     1153834 :     *energy_sm = 0.2f * sum_energy + 0.8f * *energy_sm;
     211     1153834 :     diff /= sum_energy;
     212             : 
     213     1153834 :     if ( *diff_sm < -10 && *energy_sm < 38.5 && diff < -0.8 )
     214             :     {
     215       21644 :         *predecision_flag = 1;
     216             :     }
     217             : 
     218     1153834 :     if ( *diff_sm > 10 && *energy_sm > 83 && diff > 0.5 )
     219             :     {
     220       88247 :         *predecision_flag = 0;
     221             :     }
     222             : 
     223             :     /* short pitch possiblity pre-decision */
     224     1153834 :     maximum( Bin_E, 7, &energy0 );
     225     1153834 :     maximum( Bin_E + 8, 7, &energy1 );
     226     1153834 :     ratio = max( energy1 - energy0, 0 );
     227     1153834 :     ratio *= max( voicing_m, 0 );
     228             : 
     229     1153834 :     *LF_EnergyRatio_sm = ( 15 * ( *LF_EnergyRatio_sm ) + ratio ) / 16;
     230             : 
     231     1153834 :     if ( *LF_EnergyRatio_sm > 35 || ratio > 50 )
     232             :     {
     233       28267 :         *predecision_flag = 1;
     234             :     }
     235             : 
     236     1153834 :     if ( *LF_EnergyRatio_sm < 16 )
     237             :     {
     238     1104103 :         *predecision_flag = 0;
     239             :     }
     240             : 
     241             :     /* short pitch candidate detection */
     242     1153834 :     Tp = pitch[1];
     243     1153834 :     cor_max = 0;
     244             : 
     245     1153834 :     pt_wsp = wsp + 3 * L_SUBFR;
     246     1153834 :     pit_min = PIT_MIN_DOUBLEEXTEND;
     247     1153834 :     pit_min_up = PIT_MIN;
     248             : 
     249    21922846 :     for ( T = pit_min; T <= pit_min_up; T++ )
     250             :     {
     251    20769012 :         energy1 = dotp( pt_wsp, pt_wsp - T, L_SUBFR );
     252             : 
     253    20769012 :         if ( energy1 > cor_max || T == pit_min )
     254             :         {
     255     6157981 :             cor_max = energy1;
     256     6157981 :             Tp = T;
     257             :         }
     258             :     }
     259             : 
     260     1153834 :     energy0 = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     261     1153834 :     energy1 = dotp( pt_wsp - Tp, pt_wsp - Tp, L_SUBFR ) + 0.01f;
     262     1153834 :     cor_max *= inv_sqrt( energy0 * energy1 );
     263     1153834 :     *voicing0_sm = 0.75f * ( *voicing0_sm ) + 0.25f * cor_max;
     264             : 
     265             :     /* final short pitch correction */
     266     1153834 :     *flag_spitch = 0;
     267     1153834 :     if ( localVAD && *predecision_flag && *voicing0_sm > 0.65f && *voicing0_sm > 0.7f * ( *voicing_sm ) )
     268             :     {
     269       37177 :         *flag_spitch = 1;
     270             : 
     271       37177 :         pitch[0] = Tp;
     272       37177 :         pitch[1] = Tp;
     273       37177 :         pitch[2] = Tp;
     274             :     }
     275             : 
     276     1153834 :     return;
     277             : }
     278             : 
     279             : /*-------------------------------------------------------------------*
     280             :  * pitchDoubling_det()
     281             :  * Multiple pitch doubling detector
     282             :  *
     283             :  *-------------------------------------------------------------------*/
     284             : 
     285           0 : void pitchDoubling_det(
     286             :     const float *wspeech,
     287             :     int16_t *pitch_ol,
     288             :     float *pitch_fr,
     289             :     float *voicing_fr )
     290             : {
     291             :     float new_op_fr[2];
     292             :     float new_voicing[2];
     293             :     int16_t new_Top[2];
     294             :     int16_t m, T;
     295             : 
     296             :     /*save initial values*/
     297           0 :     new_Top[0] = pitch_ol[0];
     298           0 :     new_Top[1] = pitch_ol[1];
     299           0 :     for ( m = 2; m < 5; m++ )
     300             :     {
     301           0 :         T = pitch_ol[0] / m;
     302           0 :         if ( T >= PIT_MIN_12k8 )
     303             :         {
     304           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 0, wspeech, 2 );
     305           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], L_SUBFR, wspeech, 2 );
     306             : 
     307           0 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[0] + voicing_fr[1] ) )
     308             :             {
     309           0 :                 new_Top[0] = T;
     310           0 :                 pitch_fr[0] = new_op_fr[0];
     311           0 :                 pitch_fr[1] = new_op_fr[1];
     312           0 :                 voicing_fr[0] = new_voicing[0];
     313           0 :                 voicing_fr[1] = new_voicing[1];
     314             :             }
     315             :         }
     316             : 
     317           0 :         T = pitch_ol[1] / m;
     318           0 :         if ( T >= PIT_MIN_12k8 )
     319             :         {
     320           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 2 * L_SUBFR, wspeech, 2 );
     321           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], 3 * L_SUBFR, wspeech, 2 );
     322             : 
     323           0 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[2] + voicing_fr[3] ) )
     324             :             {
     325           0 :                 new_Top[1] = T;
     326           0 :                 pitch_fr[2] = new_op_fr[0];
     327           0 :                 pitch_fr[3] = new_op_fr[1];
     328           0 :                 voicing_fr[2] = new_voicing[0];
     329           0 :                 voicing_fr[3] = new_voicing[1];
     330             :             }
     331             :         }
     332             :     }
     333           0 :     pitch_ol[0] = new_Top[0];
     334           0 :     pitch_ol[1] = new_Top[1];
     335             : 
     336           0 :     return;
     337             : }

Generated by: LCOV version 1.14