LCOV - code coverage report
Current view: top level - lib_enc - pitch_ol2.c (source / functions) Hit Total Coverage
Test: Coverage on main -- conformance test test_26252.py @ 0c5691e6405a865cd50088c4936e8acb16f658a1 Lines: 94 121 77.7 %
Date: 2025-12-18 05:24:35 Functions: 2 3 66.7 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : #include <stdint.h>
      38             : #include "options.h"
      39             : #ifdef DEBUGGING
      40             : #include "debug.h"
      41             : #endif
      42             : #include "cnst.h"
      43             : #include "rom_enc.h"
      44             : #include "prot.h"
      45             : #include "wmc_auto.h"
      46             : 
      47             : /*-------------------------------------------------------------------*
      48             :  * Local constants
      49             :  *-------------------------------------------------------------------*/
      50             : 
      51             : #define MAX_DELTA   16 /* half-length of the delta search      */
      52             : #define COR_BUF_LEN ( L_INTERPOL1 * 2 + MAX_DELTA * 2 + 1 )
      53             : 
      54             : /*-------------------------------------------------------------------*
      55             :  * pitch_ol2()
      56             :  *
      57             :  * Open-loop pitch precision improvement with 1/4 resolution
      58             :  * The pitch is searched in the interval <pitch_ol-delta, pitch_ol+delta),
      59             :  * i.e. the value pitch_ol + delta is not a part of the interval
      60             :  *-------------------------------------------------------------------*/
      61             : 
      62      979340 : void pitch_ol2(
      63             :     const int16_t pit_min,  /* i  : pit_min value                                    */
      64             :     const int16_t pitch_ol, /* i  : pitch to be improved                             */
      65             :     float *pitch_fr,        /* o  : adjusted 1/4 fractional pitch                    */
      66             :     float *voicing_fr,      /* o  : adjusted 1/4 fractional voicing                  */
      67             :     const int16_t pos,      /* i  : position in frame where to calculate the improv. */
      68             :     const float *wsp,       /* i  : weighted speech for current frame and look-ahead */
      69             :     const int16_t delta     /* i  : delta for pitch search                           */
      70             : )
      71             : {
      72             :     int16_t i, t, t0, t1, step, fraction, t0_min, t0_max, t_min, t_max;
      73             :     float temp, cor_max, enr_wsp, enr_old, cor[COR_BUF_LEN], *pt_cor, wsp_fr[L_SUBFR];
      74             :     const float *pt_wsp;
      75             : #ifdef FIX_2271_OOB_INDEXING_IN_PIT_OL2
      76             :     int16_t base_idx;
      77             : #endif
      78      979340 :     t0_min = pitch_ol - delta;
      79      979340 :     t0_max = pitch_ol + delta - 1;
      80             : 
      81      979340 :     if ( t0_min < pit_min )
      82             :     {
      83      105922 :         t0_min = pit_min;
      84             :     }
      85      979340 :     t_min = t0_min - L_INTERPOL1;
      86             : 
      87      979340 :     if ( t0_max > PIT_MAX )
      88             :     {
      89       24484 :         t0_max = PIT_MAX;
      90             :     }
      91      979340 :     t_max = t0_max + L_INTERPOL1;
      92             : 
      93      979340 :     pt_wsp = wsp + pos;
      94      979340 :     pt_cor = cor;
      95    21918046 :     for ( t = t_min; t <= t_max; t++ )
      96             :     {
      97    20938706 :         *pt_cor++ = dotp( pt_wsp, pt_wsp - t, L_SUBFR );
      98             :     }
      99             : 
     100      979340 :     pt_cor = cor + L_INTERPOL1;
     101      979340 :     cor_max = *pt_cor++;
     102      979340 :     t1 = t0_min;
     103    13103986 :     for ( t = t0_min + 1; t <= t0_max; t++ )
     104             :     {
     105    12124646 :         if ( *pt_cor > cor_max )
     106             :         {
     107     4751290 :             cor_max = *pt_cor;
     108     4751290 :             t1 = t;
     109             :         }
     110    12124646 :         pt_cor++;
     111             :     }
     112             : 
     113             :     /*------------------------------------------------------------------*
     114             :      * Search fractional pitch with 1/4 subsample resolution.
     115             :      * search the fractions around t0 and choose the one which maximizes
     116             :      * the interpolated normalized correlation.
     117             :      *-----------------------------------------------------------------*/
     118             : 
     119             : #ifndef FIX_2271_OOB_INDEXING_IN_PIT_OL2
     120             :     pt_cor = cor + L_INTERPOL1 - t0_min;
     121             : #endif
     122      979340 :     t0 = t1;
     123             : #ifdef FIX_2271_OOB_INDEXING_IN_PIT_OL2
     124      979340 :     base_idx = L_INTERPOL1 - t0_min;
     125             : #endif
     126      979340 :     step = 1; /* 1/4 subsample resolution */
     127      979340 :     fraction = 1;
     128             : 
     129      979340 :     if ( t0 == t0_min ) /* Limit case */
     130             :     {
     131       94111 :         fraction = 0;
     132             : #ifndef FIX_2271_OOB_INDEXING_IN_PIT_OL2
     133             :         cor_max = interpolation( &pt_cor[t0], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     134             : #else
     135       94111 :         cor_max = interpolation( &cor[t0 + base_idx], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     136             : #endif
     137             :     }
     138             :     else /* Process negative fractions */
     139             :     {
     140      885229 :         t0--;
     141             : #ifndef FIX_2271_OOB_INDEXING_IN_PIT_OL2
     142             :         cor_max = interpolation( &pt_cor[t0], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     143             : #else
     144      885229 :         cor_max = interpolation( &cor[t0 + base_idx], E_ROM_inter4_1, fraction, PIT_UP_SAMP, 4 );
     145             : #endif
     146     2655687 :         for ( i = ( fraction + step ); i <= 3; i = i + step )
     147             :         {
     148             : #ifndef FIX_2271_OOB_INDEXING_IN_PIT_OL2
     149             :             temp = interpolation( &pt_cor[t0], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     150             : #else
     151     1770458 :             temp = interpolation( &cor[t0 + base_idx], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     152             : #endif
     153     1770458 :             if ( temp > cor_max )
     154             :             {
     155     1658114 :                 cor_max = temp;
     156     1658114 :                 fraction = i;
     157             :             }
     158             :         }
     159             :     }
     160             : 
     161     4896700 :     for ( i = 0; i <= 3; i = i + step ) /* Process positive fractions */
     162             :     {
     163             : #ifndef FIX_2271_OOB_INDEXING_IN_PIT_OL2
     164             :         temp = interpolation( &pt_cor[t1], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     165             : #else
     166     3917360 :         temp = interpolation( &cor[t1 + base_idx], E_ROM_inter4_1, i, PIT_UP_SAMP, 4 );
     167             : #endif
     168     3917360 :         if ( temp > cor_max )
     169             :         {
     170     1104558 :             cor_max = temp;
     171     1104558 :             fraction = i;
     172     1104558 :             t0 = t1;
     173             :         }
     174             :     }
     175             : 
     176      979340 :     *pitch_fr = t0 + (float) fraction / 4.0f;
     177      979340 :     pred_lt4( pt_wsp, wsp_fr, t0, fraction, L_SUBFR, E_ROM_inter4_1, 4, PIT_UP_SAMP );
     178             : 
     179      979340 :     enr_wsp = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     180      979340 :     enr_old = dotp( wsp_fr, wsp_fr, L_SUBFR ) + 0.01f;
     181      979340 :     *voicing_fr = cor_max * inv_sqrt( enr_wsp * enr_old );
     182             : 
     183      979340 :     return;
     184             : }
     185             : 
     186             : 
     187             : /*-------------------------------------------------------------------*
     188             :  * StableHighPitchDetect()
     189             :  *
     190             :  * Very short stable pitch detection
     191             :  *-------------------------------------------------------------------*/
     192             : 
     193      851621 : void StableHighPitchDetect(
     194             :     int16_t *flag_spitch,      /* o  : flag to indicate very short stable pitch*/
     195             :     int16_t pitch[],           /* i/o: OL pitch buffer                         */
     196             :     const float voicing[],     /* i  : OL pitch gains                          */
     197             :     const float Bin_E[],       /* i  : per bin log energy spectrum             */
     198             :     const float wsp[],         /* i  : weighted speech                         */
     199             :     const int16_t localVAD,    /* i  : local VAD flag                          */
     200             :     float *voicing_sm,         /* i/o: smoothed open-loop pitch gains          */
     201             :     float *voicing0_sm,        /* i/o: smoothed high pitch gains               */
     202             :     float *LF_EnergyRatio_sm,  /* i/o: smoothed [0, 300Hz] relative peak energy*/
     203             :     int16_t *predecision_flag, /* i/o: predecision flag                        */
     204             :     float *diff_sm,            /* i/o: smoothed pitch frequency difference     */
     205             :     float *energy_sm           /* i/o: smoothed energy around pitch frequency  */
     206             : )
     207             : {
     208             :     int16_t i, pitch_freq_point, pit_min_up;
     209             :     int16_t T, Tp, pit_min;
     210             : 
     211             :     float voicing_m;
     212             :     float energy0, energy1, ratio, cor_max, diff, sum_energy;
     213             :     const float *pt_wsp;
     214             : 
     215      851621 :     voicing_m = mean( voicing, 3 );
     216      851621 :     *voicing_sm = 0.75f * ( *voicing_sm ) + 0.25f * voicing_m;
     217             : 
     218             : 
     219             :     /* initial short pitch possibility pre-decision */
     220      851621 :     pitch_freq_point = (int16_t) ( L_FFT / pitch[1] + 0.5f );
     221      851621 :     diff = 0.0f;
     222      851621 :     sum_energy = 0.0f;
     223             : 
     224     7518314 :     for ( i = 1; i < 2 * pitch_freq_point; i++ )
     225             :     {
     226     6666693 :         diff += ( Bin_E[pitch_freq_point] - Bin_E[i] );
     227     6666693 :         sum_energy += Bin_E[i];
     228             :     }
     229      851621 :     sum_energy /= ( 2 * pitch_freq_point - 1 );
     230             : 
     231      851621 :     *diff_sm = 0.2f * diff + 0.8f * *diff_sm;
     232      851621 :     *energy_sm = 0.2f * sum_energy + 0.8f * *energy_sm;
     233      851621 :     diff /= sum_energy;
     234             : 
     235      851621 :     if ( *diff_sm < -10 && *energy_sm < 38.5 && diff < -0.8 )
     236             :     {
     237       16290 :         *predecision_flag = 1;
     238             :     }
     239             : 
     240      851621 :     if ( *diff_sm > 10 && *energy_sm > 83 && diff > 0.5 )
     241             :     {
     242       69627 :         *predecision_flag = 0;
     243             :     }
     244             : 
     245             :     /* short pitch possiblity pre-decision */
     246      851621 :     maximum( Bin_E, 7, &energy0 );
     247      851621 :     maximum( Bin_E + 8, 7, &energy1 );
     248      851621 :     ratio = max( energy1 - energy0, 0 );
     249      851621 :     ratio *= max( voicing_m, 0 );
     250             : 
     251      851621 :     *LF_EnergyRatio_sm = ( 15 * ( *LF_EnergyRatio_sm ) + ratio ) / 16;
     252             : 
     253      851621 :     if ( *LF_EnergyRatio_sm > 35 || ratio > 50 )
     254             :     {
     255       19427 :         *predecision_flag = 1;
     256             :     }
     257             : 
     258      851621 :     if ( *LF_EnergyRatio_sm < 16 )
     259             :     {
     260      815344 :         *predecision_flag = 0;
     261             :     }
     262             : 
     263             :     /* short pitch candidate detection */
     264      851621 :     Tp = pitch[1];
     265      851621 :     cor_max = 0;
     266             : 
     267      851621 :     pt_wsp = wsp + 3 * L_SUBFR;
     268      851621 :     pit_min = PIT_MIN_DOUBLEEXTEND;
     269      851621 :     pit_min_up = PIT_MIN;
     270             : 
     271    16180799 :     for ( T = pit_min; T <= pit_min_up; T++ )
     272             :     {
     273    15329178 :         energy1 = dotp( pt_wsp, pt_wsp - T, L_SUBFR );
     274             : 
     275    15329178 :         if ( energy1 > cor_max || T == pit_min )
     276             :         {
     277     4546884 :             cor_max = energy1;
     278     4546884 :             Tp = T;
     279             :         }
     280             :     }
     281             : 
     282      851621 :     energy0 = dotp( pt_wsp, pt_wsp, L_SUBFR ) + 0.01f;
     283      851621 :     energy1 = dotp( pt_wsp - Tp, pt_wsp - Tp, L_SUBFR ) + 0.01f;
     284      851621 :     cor_max *= inv_sqrt( energy0 * energy1 );
     285      851621 :     *voicing0_sm = 0.75f * ( *voicing0_sm ) + 0.25f * cor_max;
     286             : 
     287             :     /* final short pitch correction */
     288      851621 :     *flag_spitch = 0;
     289      851621 :     if ( localVAD && *predecision_flag && *voicing0_sm > 0.65f && *voicing0_sm > 0.7f * ( *voicing_sm ) )
     290             :     {
     291       25438 :         *flag_spitch = 1;
     292             : 
     293       25438 :         pitch[0] = Tp;
     294       25438 :         pitch[1] = Tp;
     295       25438 :         pitch[2] = Tp;
     296             :     }
     297             : 
     298      851621 :     return;
     299             : }
     300             : 
     301             : /*-------------------------------------------------------------------*
     302             :  * pitchDoubling_det()
     303             :  * Multiple pitch doubling detector
     304             :  *
     305             :  *-------------------------------------------------------------------*/
     306             : 
     307           0 : void pitchDoubling_det(
     308             :     const float *wspeech,
     309             :     int16_t *pitch_ol,
     310             :     float *pitch_fr,
     311             :     float *voicing_fr )
     312             : {
     313             :     float new_op_fr[2];
     314             :     float new_voicing[2];
     315             :     int16_t new_Top[2];
     316             :     int16_t m, T;
     317             : 
     318             :     /*save initial values*/
     319           0 :     new_Top[0] = pitch_ol[0];
     320           0 :     new_Top[1] = pitch_ol[1];
     321           0 :     for ( m = 2; m < 5; m++ )
     322             :     {
     323           0 :         T = pitch_ol[0] / m;
     324           0 :         if ( T >= PIT_MIN_12k8 )
     325             :         {
     326           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 0, wspeech, 2 );
     327           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], L_SUBFR, wspeech, 2 );
     328             : 
     329           0 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[0] + voicing_fr[1] ) )
     330             :             {
     331           0 :                 new_Top[0] = T;
     332           0 :                 pitch_fr[0] = new_op_fr[0];
     333           0 :                 pitch_fr[1] = new_op_fr[1];
     334           0 :                 voicing_fr[0] = new_voicing[0];
     335           0 :                 voicing_fr[1] = new_voicing[1];
     336             :             }
     337             :         }
     338             : 
     339           0 :         T = pitch_ol[1] / m;
     340           0 :         if ( T >= PIT_MIN_12k8 )
     341             :         {
     342           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[0], &new_voicing[0], 2 * L_SUBFR, wspeech, 2 );
     343           0 :             pitch_ol2( PIT_MIN_SHORTER, T, &new_op_fr[1], &new_voicing[1], 3 * L_SUBFR, wspeech, 2 );
     344             : 
     345           0 :             if ( ( new_voicing[0] + new_voicing[1] ) > ( voicing_fr[2] + voicing_fr[3] ) )
     346             :             {
     347           0 :                 new_Top[1] = T;
     348           0 :                 pitch_fr[2] = new_op_fr[0];
     349           0 :                 pitch_fr[3] = new_op_fr[1];
     350           0 :                 voicing_fr[2] = new_voicing[0];
     351           0 :                 voicing_fr[3] = new_voicing[1];
     352             :             }
     353             :         }
     354             :     }
     355           0 :     pitch_ol[0] = new_Top[0];
     356           0 :     pitch_ol[1] = new_Top[1];
     357             : 
     358           0 :     return;
     359             : }

Generated by: LCOV version 1.14