LCOV - code coverage report
Current view: top level - lib_dec - jbm_pcmdsp_apa.c (source / functions) Hit Total Coverage
Test: Coverage on main -- short test vectors @ 6c9ddc4024a9c0e1ecb8f643f114a84a0e26ec6b Lines: 351 415 84.6 %
Date: 2025-05-23 08:37:30 Functions: 19 20 95.0 %

          Line data    Source code
       1             : /******************************************************************************************************
       2             : 
       3             :    (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
       4             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
       5             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
       6             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
       7             :    contributors to this repository. All Rights Reserved.
       8             : 
       9             :    This software is protected by copyright law and by international treaties.
      10             :    The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
      11             :    Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
      12             :    Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
      13             :    Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
      14             :    contributors to this repository retain full ownership rights in their respective contributions in
      15             :    the software. This notice grants no license of any kind, including but not limited to patent
      16             :    license, nor is any license granted by implication, estoppel or otherwise.
      17             : 
      18             :    Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
      19             :    contributions.
      20             : 
      21             :    This software is provided "AS IS", without any express or implied warranties. The software is in the
      22             :    development stage. It is intended exclusively for experts who have experience with such software and
      23             :    solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
      24             :    and fitness for a particular purpose are hereby disclaimed and excluded.
      25             : 
      26             :    Any dispute, controversy or claim arising under or in relation to providing this software shall be
      27             :    submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
      28             :    accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
      29             :    the United Nations Convention on Contracts on the International Sales of Goods.
      30             : 
      31             : *******************************************************************************************************/
      32             : 
      33             : /*====================================================================================
      34             :     EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
      35             :   ====================================================================================*/
      36             : 
      37             : /*! @file jbm_pcmdsp_apa.c Adaptive Playout for Audio (apa). */
      38             : 
      39             : /* system headers */
      40             : #include <assert.h>
      41             : #include <math.h>
      42             : #include <stdlib.h>
      43             : #include <stdio.h>
      44             : #include <stdint.h>
      45             : #include "options.h"
      46             : #include "prot.h"
      47             : #ifdef DEBUGGING
      48             : #include "debug.h"
      49             : #endif
      50             : #include "wmc_auto.h"
      51             : /* local headers */
      52             : #include "jbm_pcmdsp_apa.h"
      53             : #include "jbm_pcmdsp_similarityestimation.h"
      54             : #include "jbm_pcmdsp_window.h"
      55             : #include "cnst.h"
      56             : 
      57             : 
      58             : /*---------------------------------------------------------------------*
      59             :  * Local state structure
      60             :  *---------------------------------------------------------------------*/
      61             : 
      62             : /* maximum number of segments/iterations in extend_frm() */
      63             : #define MAXN 10
      64             : 
      65             : /* definition of state struct */
      66             : struct apa_state_t
      67             : {
      68             :     /* output buffer */
      69             :     bool evs_compat_mode;
      70             :     float *buf_out;
      71             :     uint16_t buf_out_capacity;
      72             :     uint16_t l_buf_out;
      73             : 
      74             :     /* Hann window */
      75             :     float win[APA_BUF_PER_CHANNEL];
      76             :     uint16_t l_halfwin;
      77             : 
      78             :     /* sampling rate [Hz] */
      79             :     uint16_t rate;
      80             : 
      81             :     /* length of a segment [samples] */
      82             :     uint16_t l_seg;
      83             : 
      84             :     /* length of a frame [samples] */
      85             :     uint16_t l_frm;
      86             : 
      87             :     /* total number of processed input samples since apa_reset() */
      88             :     uint32_t l_in_total;
      89             : 
      90             :     /* time resolution in samples of the IVAS renderer*/
      91             :     uint16_t l_ts;
      92             : 
      93             :     /* samples already available in the renderer buffer */
      94             :     uint16_t l_r_buf;
      95             : 
      96             :     /* sum of inserted/removed samples since last apa_set_scale() */
      97             :     int32_t diffSinceSetScale;
      98             :     /* number of input frames since last apa_set_scale() */
      99             :     uint32_t nFramesSinceSetScale;
     100             : 
     101             :     /* current and previous  scaling ratio [%] */
     102             :     uint16_t scale;
     103             : 
     104             :     /* minimum pitch length [samples] */
     105             :     uint16_t p_min;
     106             : 
     107             :     /* search length [samples] */
     108             :     uint16_t l_search;
     109             : 
     110             :     uint16_t wss; /* waveform subsampling per channel */
     111             :     uint16_t css; /* correlation subsampling per channel */
     112             : 
     113             :     float targetQuality;
     114             :     uint16_t qualityred;  /* quality reduction threshold */
     115             :     uint16_t qualityrise; /* quality rising for adaptive quality thresholds */
     116             : 
     117             :     uint16_t last_pitch;       /* last pitch/sync position */
     118             :     uint16_t bad_frame_count;  /* # frames before quality threshold is lowered */
     119             :     uint16_t good_frame_count; /* # scaled frames */
     120             : 
     121             :     uint16_t num_channels; /* number of input/output channels */
     122             : };
     123             : 
     124             : 
     125             : /*---------------------------------------------------------------------*
     126             :  * Local function prototypes
     127             :  *---------------------------------------------------------------------*/
     128             : 
     129             : static float apa_corrEnergy2dB( float energy, uint16_t corr_len );
     130             : 
     131             : static float apa_getQualityIncreaseForLowEnergy( float energydB );
     132             : 
     133             : static bool logarithmic_search( const apa_state_t *ps, const float *signal, int16_t s_start, uint16_t inlen, uint16_t offset, uint16_t fixed_pos, uint16_t corr_len, uint16_t wss, uint16_t css, int16_t *synchpos );
     134             : 
     135             : static bool find_synch( apa_state_t *ps, const float *in, uint16_t l_in, int16_t s_start, uint16_t s_len, int16_t fixed_pos, uint16_t corr_len, uint16_t offset, float *energy, float *quality, int16_t *synch_pos );
     136             : 
     137             : static bool copy_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
     138             : 
     139             : static bool shrink_frm( apa_state_t *ps, const float frm_in[], uint16_t maxScaling, float frm_out[], uint16_t *l_frm_out );
     140             : 
     141             : static bool extend_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
     142             : 
     143             : /*---------------------------------------------------------------------*
     144             :  * Public functions
     145             :  *---------------------------------------------------------------------*/
     146             : 
     147             : /* Allocates memory for state struct and initializes elements. */
     148         105 : ivas_error apa_init(
     149             :     apa_state_t **pps,
     150             :     const int32_t num_channels )
     151             : {
     152         105 :     apa_state_t *ps = NULL;
     153             : 
     154             :     /* make sure pointer is valid */
     155         105 :     if ( !pps )
     156             :     {
     157           0 :         return 1;
     158             :     }
     159             : 
     160             :     /* allocate state struct */
     161         105 :     if ( ( ps = (apa_state_t *) malloc( sizeof( apa_state_t ) ) ) == NULL )
     162             :     {
     163           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for JBM\n" ) );
     164             :     }
     165             : 
     166         105 :     ps->num_channels = (uint16_t) num_channels;
     167         105 :     ps->buf_out_capacity = (uint16_t) ( APA_BUF_PER_CHANNEL * num_channels );
     168         105 :     if ( ( ps->buf_out = malloc( sizeof( float ) * ps->buf_out_capacity ) ) == NULL )
     169             :     {
     170           0 :         return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for JBM\n" ) );
     171             :     }
     172             : 
     173         105 :     ps->evs_compat_mode = false;
     174             : 
     175         105 :     apa_reset( ps );
     176         105 :     *pps = ps;
     177             : 
     178         105 :     return IVAS_ERR_OK;
     179             : }
     180             : 
     181             : 
     182             : /* Sets state variables to initial value. */
     183         210 : void apa_reset(
     184             :     apa_state_t *ps )
     185             : {
     186             :     /* init state struct */
     187         210 :     ps->l_buf_out = 0;
     188         210 :     ps->l_halfwin = 0;
     189         210 :     ps->rate = 0;
     190         210 :     ps->l_seg = 0;
     191         210 :     ps->l_frm = 0;
     192         210 :     ps->l_in_total = 0;
     193         210 :     ps->diffSinceSetScale = 0;
     194         210 :     ps->nFramesSinceSetScale = 0;
     195         210 :     ps->scale = 100;
     196         210 :     ps->p_min = 0;
     197         210 :     ps->l_search = 0;
     198         210 :     ps->wss = 1;
     199         210 :     ps->css = 1;
     200         210 :     ps->targetQuality = 0.0f;
     201         210 :     ps->qualityred = 0;
     202         210 :     ps->qualityrise = 0;
     203         210 :     ps->last_pitch = 0;
     204         210 :     ps->bad_frame_count = 0;
     205         210 :     ps->good_frame_count = 0;
     206             : 
     207         210 :     ps->l_ts = 1;
     208         210 :     ps->l_r_buf = 0;
     209         210 :     return;
     210             : }
     211             : 
     212        3303 : uint8_t apa_reconfigure(
     213             :     apa_state_t *ps,
     214             :     uint16_t num_channels,
     215             :     uint16_t l_ts )
     216             : {
     217             : 
     218             :     /* realloc buffer */
     219        3303 :     free( ps->buf_out );
     220        3303 :     ps->num_channels = (uint16_t) num_channels;
     221        3303 :     ps->buf_out_capacity = (uint16_t) ( APA_BUF_PER_CHANNEL * num_channels );
     222        3303 :     ps->buf_out = (float *) malloc( sizeof( float ) * ps->buf_out_capacity );
     223        3303 :     if ( !ps->buf_out )
     224             :     {
     225           0 :         return 2;
     226             :     }
     227        3303 :     ps->l_buf_out = 0;
     228        3303 :     ps->l_in_total = 0;
     229        3303 :     ps->l_ts = ps->num_channels * l_ts;
     230             : 
     231             :     /* set everything else dependent on the number of channels */
     232             :     /* set segment size */
     233             :     /* in the order of a pitch, set to 160 samples at 16 kHz */
     234             :     /* used for windowing and as the correlation length, i.e., */
     235             :     /* the size of the template segment. */
     236        3303 :     ps->l_seg = ( ps->rate / 100 ) * ps->num_channels;
     237             : 
     238             :     /* set frame size */
     239             :     /* set to 320 samples at 16 kHz */
     240        3303 :     ps->l_frm = ( ps->rate / FRAMES_PER_SEC ) * ps->num_channels;
     241             : 
     242             :     /* set minimum pitch */
     243             :     /* set to 40 samples at 16 kHz */
     244             :     /* (defines min change in number of samples, i.e., abs(l_in-l_out) >= p_min) */
     245        3303 :     ps->p_min = ( ps->rate / 400 ) * ps->num_channels;
     246             : 
     247             :     /* set search length */
     248             :     /* must cover one pitch, set to 200 samples at 16 kHz */
     249             :     /* (the resulting maximum pitch is then p_min+l_search = 240 samples at 16 kHz) */
     250        3303 :     ps->l_search = ( ps->rate / 80 ) * ps->num_channels;
     251             : 
     252        3303 :     return 0;
     253             : }
     254             : 
     255             : 
     256             : /* Sets the audio configuration. */
     257         105 : bool apa_set_rate(
     258             :     apa_state_t *ps,
     259             :     const int32_t output_Fs )
     260             : {
     261             :     /* make sure pointer is valid */
     262         105 :     if ( ps == NULL )
     263             :     {
     264           0 :         return 1;
     265             :     }
     266             : 
     267             :     /* check range */
     268         105 :     if ( ( output_Fs < APA_MIN_RATE ) || ( output_Fs > APA_MAX_RATE ) )
     269             :     {
     270           0 :         return 1;
     271             :     }
     272             : 
     273             :     /* reset state struct */
     274         105 :     apa_reset( ps );
     275             : 
     276             :     /* copy rate to state struct */
     277         105 :     ps->rate = (uint16_t) output_Fs;
     278             : 
     279         105 :     if ( ps->num_channels > APA_MAX_NUM_CHANNELS )
     280             :     {
     281           0 :         return 1;
     282             :     }
     283             : 
     284             :     /*
     285             :      * several other parameters depend on the sampling rate
     286             :      * and are set below. Some "magic numbers" are used here
     287             :      * which are based on typical values of a "pitch" in
     288             :      * human voice. The pitch length is the period of the
     289             :      * base frequency and is usually assumed to be 40-240
     290             :      * samples at 16 kHz.
     291             :      */
     292             : 
     293             :     /* set segment size */
     294             :     /* in the order of a pitch, set to 160 samples at 16 kHz */
     295             :     /* used for windowing and as the correlation length, i.e., */
     296             :     /* the size of the template segment. */
     297         105 :     ps->l_seg = ( ps->rate / 100 ) * ps->num_channels;
     298             : 
     299             :     /* init Hann window */
     300             :     /* Note: l_win < APA_BUF_PER_CHANNEL is required */
     301             :     /* Length of Hann window should be independent of
     302             :      * number of channels - same window applied to all channels */
     303         105 :     ps->l_halfwin = ps->rate / 100;
     304         105 :     hannWindow( ps->l_halfwin * 2, ps->win );
     305             : 
     306             :     /* set frame size */
     307             :     /* set to 320 samples at 16 kHz */
     308         105 :     ps->l_frm = ( ps->rate / FRAMES_PER_SEC ) * ps->num_channels;
     309             : 
     310             :     /* set minimum pitch */
     311             :     /* set to 40 samples at 16 kHz */
     312             :     /* (defines min change in number of samples, i.e., abs(l_in-l_out) >= p_min) */
     313         105 :     ps->p_min = ( ps->rate / 400 ) * ps->num_channels;
     314             : 
     315             :     /* set search length */
     316             :     /* must cover one pitch, set to 200 samples at 16 kHz */
     317             :     /* (the resulting maximum pitch is then p_min+l_search = 240 samples at 16 kHz) */
     318         105 :     ps->l_search = ( ps->rate / 80 ) * ps->num_channels;
     319             : 
     320         105 :     return 0;
     321             : }
     322             : 
     323             : 
     324             : /* Set scaling. */
     325       76596 : bool apa_set_scale(
     326             :     apa_state_t *ps,
     327             :     uint16_t scale )
     328             : {
     329             :     /* make sure pointer is valid */
     330       76596 :     if ( ps == NULL )
     331             :     {
     332           0 :         return 1;
     333             :     }
     334             : 
     335             :     /* check range */
     336       76596 :     if ( ( scale < APA_MIN_SCALE ) || ( scale > APA_MAX_SCALE ) )
     337             :     {
     338           0 :         return 1;
     339             :     }
     340             : 
     341             :     /* do nothing if same scale is set multiple times */
     342             :     /* (otherwise scale control is confused) */
     343       76596 :     if ( ps->scale == scale )
     344             :     {
     345       76224 :         return 0;
     346             :     }
     347             : 
     348             :     /* copy to state struct */
     349         372 :     ps->scale = scale;
     350             : 
     351             :     /* reset scaling statistics */
     352         372 :     ps->diffSinceSetScale = 0;
     353         372 :     ps->nFramesSinceSetScale = 0;
     354             : 
     355         372 :     return 0;
     356             : }
     357             : 
     358         105 : bool apa_set_renderer_granularity(
     359             :     apa_state_t *ps,
     360             :     uint16_t l_ts )
     361             : {
     362             :     /* make sure pointer is valid */
     363         105 :     if ( ps == NULL )
     364             :     {
     365           0 :         return 1;
     366             :     }
     367             : 
     368             : 
     369             :     /* copy to state struct */
     370         105 :     ps->l_ts = l_ts * ps->num_channels;
     371         105 :     return 0;
     372             : }
     373             : 
     374       76596 : bool apa_set_renderer_residual_samples(
     375             :     apa_state_t *ps,
     376             :     uint16_t l_r_buf )
     377             : {
     378             :     /* make sure pointer is valid */
     379       76596 :     if ( ps == NULL )
     380             :     {
     381           0 :         return 1;
     382             :     }
     383             : 
     384             : 
     385             :     /* copy to state struct */
     386       76596 :     ps->l_r_buf = l_r_buf * ps->num_channels;
     387       76596 :     return 0;
     388             : }
     389             : 
     390           0 : bool apa_set_evs_compat_mode(
     391             :     apa_state_t *ps,
     392             :     bool mode )
     393             : {
     394             :     /* make sure pointer is valid */
     395           0 :     if ( ps == NULL )
     396             :     {
     397           0 :         return 1;
     398             :     }
     399             : 
     400           0 :     ps->evs_compat_mode = mode;
     401             : 
     402           0 :     return 0;
     403             : }
     404             : 
     405             : /*
     406             : ********************************************************************************
     407             : *
     408             : *     Function        : apa_set_quality
     409             : *     Tables          : <none>
     410             : *     Compile Defines : <none>
     411             : *     Return          : 0 on success, 1 on failure
     412             : *     Information     : Set quality thresholds.
     413             : *
     414             : *                       quality is lower limit for minimum quality
     415             : *                       Range is [-2;2] - where positive values allow
     416             : *                       only pasting with same phase information
     417             : *                       Negative values would yield cross phased pasting
     418             : *
     419             : *                       qualityred allows dynamic lowering of lower quality
     420             : *                       bound - this gives better results for rhythmic signals
     421             : *                       Range is [0;20], meaning 0.1 lowering*qualityred
     422             : *
     423             : *                       undocumented: qualityrise (same as qualityred - other
     424             : *                       direction)
     425             : *
     426             : ********************************************************************************
     427             : */
     428         105 : bool apa_set_quality(
     429             :     apa_state_t *ps,
     430             :     float quality,
     431             :     uint16_t qualityred,
     432             :     uint16_t qualityrise )
     433             : {
     434         105 :     assert( ps != NULL );
     435         105 :     assert( -2.0f <= quality && quality <= 3.1f );
     436         105 :     assert( qualityred > 0 && qualityred <= 20 );
     437         105 :     assert( qualityrise > 0 && qualityrise <= 20 );
     438             : 
     439         105 :     ps->targetQuality = quality;
     440         105 :     ps->qualityred = qualityred;
     441         105 :     ps->qualityrise = qualityrise;
     442         105 :     ps->bad_frame_count = 0;
     443         105 :     ps->good_frame_count = 0;
     444             : 
     445         105 :     return 0;
     446             : }
     447             : 
     448             : /*
     449             : ********************************************************************************
     450             : *
     451             : *     Function        : apa_set_complexity_options
     452             : *     Tables          : <none>
     453             : *     Compile Defines : <none>
     454             : *     Return          : 0 on success, 1 on failure
     455             : *     Information     : Set complexity options
     456             : *                       Waveform subsampling computes the correlation function
     457             : *                         for certain positions only
     458             : *                       Correlation function subsampling computes the maxima
     459             : *                         for certain positions only
     460             : *
     461             : ********************************************************************************
     462             : */
     463         105 : bool apa_set_complexity_options(
     464             :     apa_state_t *ps,
     465             :     uint16_t wss,
     466             :     uint16_t css )
     467             : {
     468             :     /* make sure pointer is valid */
     469         105 :     if ( ps == NULL )
     470             :     {
     471           0 :         return 1;
     472             :     }
     473             : 
     474         105 :     if ( wss == 0 || wss > 1000 )
     475             :     {
     476           0 :         return 1;
     477             :     }
     478             : 
     479         105 :     if ( css == 0 || css > 1000 )
     480             :     {
     481           0 :         return 1;
     482             :     }
     483             : 
     484         105 :     ps->wss = wss;
     485         105 :     ps->css = css;
     486             : 
     487         105 :     return 0;
     488             : }
     489             : 
     490             : /*
     491             : ********************************************************************************
     492             : *
     493             : *     Function        : apa_exit
     494             : *     Tables          : <none>
     495             : *     Compile Defines : <none>
     496             : *     Return          : 0 on success, 1 on failure
     497             : *     Information     : The memory used for storing the state is freed.
     498             : *                       The state struct pointer is set to NULL.
     499             : *
     500             : ********************************************************************************
     501             : */
     502        1881 : bool apa_exit(
     503             :     apa_state_t **pps )
     504             : {
     505             :     /* ignore NULL pointer input */
     506        1881 :     if ( *pps == NULL )
     507             :     {
     508        1776 :         return 0;
     509             :     }
     510             : 
     511             :     /* deallocate state struct members */
     512         105 :     free( ( *pps )->buf_out );
     513             : 
     514             :     /* deallocate state struct */
     515         105 :     free( *pps );
     516             : 
     517             :     /* set pointer to NULL */
     518         105 :     *pps = NULL;
     519             : 
     520         105 :     return 0;
     521             : }
     522             : 
     523             : /*
     524             : ********************************************************************************
     525             : *
     526             : *     Function        : apa_exec
     527             : *     Tables          : <none>
     528             : *     Compile Defines : <none>
     529             : *     Return          : 0 on success, 1 on failure
     530             : *     Information     : Execute adaptive playout for audio, i.e., audio scaling.
     531             : *                       Will take l_in input samples from a_in[] and
     532             : *                       try to extend/shrink the amount of samples according
     533             : *                       to the last scaling set by using apa_set_scale().
     534             : *                       The actual amount of samples after scaling may vary
     535             : *                       and is given in l_out. The scaled audio samples
     536             : *                       are contained in a_out[]. Note that the scaling is
     537             : *                       achieved only in average. The input buffer must be
     538             : *                       filled with 20ms audio. The output buffer must be
     539             : *                       allocated externally and must be at least of size
     540             : *                       APA_BUF.
     541             : *                       Scaling can only be performed when a sampling rate
     542             : *                       is specified using apa_set_rate(). Otherwise,
     543             : *                       an error is returned.
     544             : *
     545             : *                       The amount of scaling is achieved by controlling the
     546             : *                       frequency of scaling. Note that the exact amount of
     547             : *                       scaling is signal dependent and is an integer
     548             : *                       multiple of a pitch. Hence, when we want to achieve
     549             : *                       a scaling of e.g. 110% then the APA module will typically
     550             : *                       forward several frames without any modification and
     551             : *                       then scale one frame by a higher amount, e.g. 143%.
     552             : *
     553             : ********************************************************************************
     554             : */
     555       76596 : uint8_t apa_exec(
     556             :     apa_state_t *ps,     /* i/o: state struct                                  */
     557             :     const float a_in[],  /* i  : input samples                                 */
     558             :     uint16_t l_in,       /* i  : number of input samples                       */
     559             :     uint16_t maxScaling, /* i  : allowed number of inserted/removed samples    */
     560             :     float a_out[],       /* o  : output samples                                */
     561             :     uint16_t *l_out      /* o  : number of output samples                      */
     562             : )
     563             : {
     564             :     uint16_t i;
     565             :     float frm_in[APA_BUF]; /* TODO(mcjbm): this buffer could be smaller - always allocates space for 16 channels */
     566             :     uint16_t l_frm_out;
     567             :     int16_t l_rem;
     568             :     int32_t dl_scaled, dl_copied, l_frm_out_target;
     569             :     int32_t expScaling, actScaling;
     570             :     uint32_t statsResetThreshold, statsResetShift;
     571             : 
     572       76596 :     statsResetThreshold = 1637;
     573       76596 :     statsResetShift = 2;
     574             : 
     575             :     /* Convert max_scaling from "per channel" to total */
     576       76596 :     maxScaling *= ps->num_channels;
     577             : 
     578             :     /* make sure no invalid output is used */
     579       76596 :     *l_out = 0;
     580       76596 :     l_frm_out = 0;
     581             : 
     582             :     /* make sure pointer is valid */
     583       76596 :     if ( ps == NULL )
     584             :     {
     585           0 :         return 1;
     586             :     }
     587             :     /* check available rate */
     588       76596 :     if ( ps->rate == 0 )
     589             :     {
     590           0 :         return 2;
     591             :     }
     592             :     /* check size of input */
     593       76596 :     if ( l_in != ps->l_frm )
     594             :     {
     595           0 :         return 3;
     596             :     }
     597             : 
     598             :     /* get target length */
     599       76596 :     if ( ps->scale > 100 )
     600             :     {
     601        1407 :         expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) + 0.5f );
     602             :     }
     603       75189 :     else if ( ps->scale < 100 )
     604             :     {
     605         804 :         expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) - 0.5f );
     606             :     }
     607             :     else
     608             :     {
     609       74385 :         expScaling = 0;
     610             :     }
     611       76596 :     actScaling = ps->diffSinceSetScale - ps->l_frm;
     612       76596 :     l_frm_out_target = expScaling - actScaling;
     613             : 
     614             :     /* Wait until we have l_frm outputs samples */
     615             :     /* (required to search for correlation in the past). */
     616             :     /* If we don't have enough samples, simply copy input to output */
     617       76596 :     if ( ps->l_buf_out < ps->l_frm )
     618             :     {
     619     8430288 :         for ( i = 0; i < ps->l_frm; i++ )
     620             :         {
     621     8426880 :             a_out[i] = a_in[i];
     622             :         }
     623        3408 :         l_frm_out = ps->l_frm;
     624             :     }
     625             :     else
     626             :     {
     627       73188 :         float *buf_out_ptr = &( ps->buf_out[ps->l_buf_out - ps->l_frm] );
     628       73188 :         float *frm_in_ptr = &( frm_in[ps->l_frm] );
     629             : 
     630             :         /* fill input frame */
     631             :         /* 1st input frame: previous output samples */
     632   160957668 :         for ( i = 0; i < ps->l_frm; i++ )
     633             :         {
     634   160884480 :             frm_in[i] = buf_out_ptr[i];
     635             :         }
     636             :         /* 2nd input frame: new input samples */
     637   160957668 :         for ( i = 0; i < ps->l_frm; i++ )
     638             :         {
     639   160884480 :             frm_in_ptr[i] = a_in[i];
     640             :         }
     641             :         /* no scaling */
     642       73188 :         if ( ps->scale == 100 )
     643             :         {
     644       71331 :             copy_frm( ps, frm_in, a_out, &l_frm_out );
     645             :         }
     646             :         /* shrink */
     647        1857 :         else if ( ps->scale < 100 )
     648             :         {
     649         768 :             shrink_frm( ps, frm_in, maxScaling, a_out, &l_frm_out );
     650             :         }
     651             :         /* extend */
     652             :         else
     653             :         {
     654        1089 :             extend_frm( ps, frm_in, a_out, &l_frm_out );
     655             :         }
     656             :         /* control the amount/frequency of scaling */
     657       73188 :         if ( l_frm_out != ps->l_frm )
     658             :         {
     659         312 :             if ( maxScaling != 0U &&
     660         312 :                  abs( (int16_t) ( ps->l_frm - l_frm_out ) ) > maxScaling )
     661             :             {
     662             :                 /* maxScaling exceeded -> discard scaled frame */
     663         102 :                 copy_frm( ps, frm_in, a_out, &l_frm_out );
     664             :             }
     665         210 :             else if ( abs( l_frm_out_target ) > ps->l_frm ) /* ignore small difference */
     666             :             {
     667         168 :                 dl_copied = l_frm_out_target - (int32_t) ps->l_frm;
     668         168 :                 dl_scaled = l_frm_out_target - (int32_t) l_frm_out;
     669             :                 /* discard scaled frame if copied frame is closer to target length */
     670         168 :                 if ( abs( dl_copied ) < abs( dl_scaled ) )
     671             :                 {
     672           9 :                     copy_frm( ps, frm_in, a_out, &l_frm_out );
     673             :                 }
     674             :             }
     675             :         }
     676             :     }
     677             : 
     678             :     /* copy output to internal buffer */
     679             :     /* avoid buffer overflow: */
     680             :     /* discard old samples; always keep at least most recent l_frm samples */
     681       76596 :     if ( ( ps->l_buf_out + l_frm_out ) > ps->buf_out_capacity )
     682             :     {
     683       20085 :         float *buf_out_ptr1 = ps->buf_out;
     684             :         float *buf_out_ptr2;
     685             : 
     686       20085 :         l_rem = ( ps->l_frm - l_frm_out );
     687       20085 :         if ( l_rem < 0 )
     688             :         {
     689          81 :             l_rem = 0;
     690             :         }
     691       20085 :         buf_out_ptr2 = &( ps->buf_out[ps->l_buf_out - l_rem] );
     692       24045 :         for ( i = 0; i < l_rem; i++ )
     693             :         {
     694        3960 :             buf_out_ptr1[i] = buf_out_ptr2[i];
     695             :         }
     696       20085 :         ps->l_buf_out = l_rem;
     697             :     }
     698             :     /* append new output samples */
     699       76596 :     if ( ( ps->l_buf_out + l_frm_out ) > ps->buf_out_capacity )
     700             :     {
     701           0 :         return 5;
     702             :     }
     703             :     {
     704       76596 :         float *buf_out_ptr = &( ps->buf_out[ps->l_buf_out] );
     705   169547742 :         for ( i = 0; i < l_frm_out; i++ )
     706             :         {
     707   169471146 :             buf_out_ptr[i] = a_out[i];
     708             :         }
     709             :     }
     710       76596 :     ps->l_buf_out += l_frm_out;
     711             : 
     712       76596 :     *l_out = l_frm_out;
     713             :     /* update time */
     714       76596 :     ps->l_in_total += ps->l_frm;
     715             : 
     716       76596 :     if ( abs( ps->diffSinceSetScale ) < ( 0x7FFFFF - ( l_frm_out - ps->l_frm ) ) &&
     717       76596 :          ps->nFramesSinceSetScale < statsResetThreshold )
     718             :     {
     719       76596 :         ps->diffSinceSetScale += l_frm_out - ps->l_frm;
     720       76596 :         ++ps->nFramesSinceSetScale;
     721             :     }
     722             :     else /* scale statistics down to avoid overflow */
     723             :     {
     724           0 :         ps->diffSinceSetScale >>= statsResetShift;
     725           0 :         ps->nFramesSinceSetScale >>= statsResetShift;
     726             :     }
     727             : 
     728       76596 :     return 0;
     729             : }
     730             : 
     731             : 
     732             : /*---------------------------------------------------------------------*
     733             :  * Local functions
     734             :  *---------------------------------------------------------------------*/
     735             : 
     736             : /*
     737             : ********************************************************************************
     738             : *
     739             : *     Function        : get_scaling_quality
     740             : *     Tables          : <none>
     741             : *     Compile Defines : <none>
     742             : *     Return          : 0 on success, 1 on failure
     743             : *     Information     : Uses pitch, half pitch, three halves and double pitch
     744             : *                       to evaluate the quality of the scaled frame by checking
     745             : *                       periodicity.
     746             : *                       Silence can be detected as additional feature. This must
     747             : *                       be set in global struct apa_state.
     748             : *
     749             : *                       If search length is very narrow then use fewer points
     750             : *                       to evaluate periodicity and silence.
     751             : *
     752             : *                       Computationally not very efficient by using normalized
     753             : *                       cross-correlation: Using sqrt() for energy calculation
     754             : *                       adds complexity.
     755             : *
     756             : *     03-AUG-04  S.Doehla        initial version
     757             : *
     758             : ********************************************************************************
     759             : */
     760        1857 : static void get_scaling_quality(
     761             :     const apa_state_t *ps,
     762             :     const float *signal,
     763             :     uint16_t s_len,
     764             :     uint16_t offset,
     765             :     uint16_t corr_len,
     766             :     uint16_t pitch,
     767             :     float *energydB,
     768             :     float *quality )
     769             : {
     770        1857 :     float maxEnergy = 0.0f;
     771        1857 :     float qualityOfMaxEnergy = 0.0f; /* we measure the quality for all channels and select the one with highest energy */
     772             : 
     773        1857 :     float half_pitch_cn = 0.0f;
     774        1857 :     float pitch_cn = 0.0f;
     775        1857 :     float three_halves_pitch_cn = 0.0f;
     776        1857 :     float double_pitch_cn = 0.0f;
     777             : 
     778        1857 :     float pitch_energy = 0.0f;
     779        1857 :     float half_pitch_energy = 0.0f;
     780        1857 :     float three_halves_pitch_energy = 0.0f;
     781        1857 :     float double_pitch_energy = 0.0f;
     782             : 
     783        1857 :     uint16_t i = 0;
     784             : 
     785        6660 :     for ( i = 0; i < ps->num_channels; i++ )
     786             :     {
     787             :         float energy;
     788        4803 :         offset = 0;
     789             : 
     790        4803 :         pitch_cn = normalized_cross_correlation_self( signal, pitch + offset, offset, corr_len, ps->num_channels * 2, &pitch_energy );
     791        4803 :         if ( pitch_cn > 0.0f )
     792             :         {
     793             :             /* calculate correlation for double pitch */
     794        3390 :             if ( 2 * pitch + offset + corr_len <= s_len )
     795             :             {
     796        2427 :                 double_pitch_cn = normalized_cross_correlation_self( signal, 2 * pitch + offset, offset, corr_len, ps->num_channels * 2, &double_pitch_energy );
     797             :             }
     798             :             else
     799             :             {
     800         963 :                 double_pitch_cn = pitch_cn;
     801         963 :                 double_pitch_energy = pitch_energy;
     802             :             }
     803             :             /* calculate correlation for three/half pitch */
     804        3390 :             if ( ( 3 * pitch ) / 2 + offset + corr_len <= s_len )
     805             :             {
     806        2829 :                 three_halves_pitch_cn = normalized_cross_correlation_self( signal, ( 3 * pitch ) / 2 + offset, offset, corr_len, ps->num_channels * 2, &three_halves_pitch_energy );
     807             :             }
     808             :             else
     809             :             {
     810         561 :                 three_halves_pitch_cn = pitch_cn;
     811         561 :                 three_halves_pitch_energy = pitch_energy;
     812             :             }
     813             :             /* calculate correlation for half pitch */
     814        3390 :             if ( pitch / 2 + offset + corr_len <= s_len )
     815             :             {
     816        3390 :                 half_pitch_cn = normalized_cross_correlation_self( signal, pitch / 2 + offset, offset, corr_len, ps->num_channels * 2, &half_pitch_energy );
     817             :             }
     818             :             else
     819             :             {
     820           0 :                 half_pitch_cn = pitch_cn;
     821           0 :                 half_pitch_energy = pitch_energy;
     822             :             }
     823             : 
     824             :             /* combine correlation results */
     825        3390 :             *quality = ( half_pitch_cn * three_halves_pitch_cn ) + ( pitch_cn * double_pitch_cn );
     826        3390 :             energy = pitch_energy + half_pitch_energy + three_halves_pitch_energy + double_pitch_energy;
     827             :         }
     828             :         else
     829             :         {
     830        1413 :             *quality = pitch_cn; /* value is negative, thus pass it */
     831        1413 :             energy = pitch_energy;
     832             :         }
     833             : 
     834             :         /* update the quality by the quality of the signal with the highest energy */
     835        4803 :         if ( energy > maxEnergy )
     836             :         {
     837        2667 :             qualityOfMaxEnergy = *quality;
     838        2667 :             maxEnergy = energy;
     839             :         }
     840             : 
     841             :         /* go to next channel */
     842        4803 :         ++signal;
     843             :     }
     844        1857 :     *quality = qualityOfMaxEnergy;
     845             : 
     846             :     /* increase calculated quality of signals with low energy */
     847        1857 :     *energydB = apa_corrEnergy2dB( maxEnergy, corr_len );
     848        1857 :     *quality += apa_getQualityIncreaseForLowEnergy( *energydB );
     849             : 
     850        1857 :     return;
     851             : }
     852             : 
     853             : 
     854             : /* Converts the correlation energy to dB. */
     855        1857 : static float apa_corrEnergy2dB(
     856             :     float energy,
     857             :     uint16_t corr_len )
     858             : {
     859        1857 :     float energydB = 10.0f * (float) log10( energy / ( PCM16_TO_FLT_FAC * PCM16_TO_FLT_FAC * corr_len * 4.0f ) );
     860             : 
     861        1857 :     return energydB;
     862             : }
     863             : 
     864             : 
     865             : /* Increases the calculated quality of signals with low energy. */
     866        1857 : static float apa_getQualityIncreaseForLowEnergy(
     867             :     float energydB )
     868             : {
     869        1857 :     const float qualIncreaseMinEnergy = -65;
     870        1857 :     const float qualIncreaseMaxEnergy = -40;
     871        1857 :     float qualIncForLowEnergy = 0;
     872             : 
     873        1857 :     if ( energydB < qualIncreaseMaxEnergy )
     874             :     {
     875         447 :         qualIncForLowEnergy = energydB;
     876         447 :         if ( qualIncForLowEnergy < qualIncreaseMinEnergy )
     877             :         {
     878           0 :             qualIncForLowEnergy = qualIncreaseMinEnergy;
     879             :         }
     880         447 :         if ( qualIncForLowEnergy > qualIncreaseMaxEnergy )
     881             :         {
     882           0 :             qualIncForLowEnergy = qualIncreaseMaxEnergy;
     883             :         }
     884         447 :         qualIncForLowEnergy = ( qualIncForLowEnergy - qualIncreaseMaxEnergy ) /
     885         447 :                               ( qualIncreaseMinEnergy - qualIncreaseMaxEnergy ) * 2;
     886         447 :         assert( qualIncForLowEnergy >= 0 && qualIncForLowEnergy <= 2 );
     887             :     }
     888             : 
     889        1857 :     return qualIncForLowEnergy;
     890             : }
     891             : 
     892             : 
     893             : /*
     894             : ********************************************************************************
     895             : *
     896             : *     Function        : logarithmic_search
     897             : *     Tables          : <none>
     898             : *     Compile Defines : <none>
     899             : *     Return          : 0 on success, 1 on failure
     900             : *     Information     : Search for best match of a template segment using
     901             : *                       hierarchical search method:
     902             : *                       Parameter css is used for sampling every css'd correlation
     903             : *                       value. The area around the best match so far is used for
     904             : *                       further correlation value with half css-value until css=1.
     905             : *                       Search area length is always half previous search length.
     906             : *                       Parameter wss is passed to the correlation computation
     907             : *                       If the search area passes the boundaries, the search
     908             : *                       window is reduced so that it's entirely inside the
     909             : *                       boundaries.
     910             : *
     911             : ********************************************************************************
     912             : */
     913        1857 : static bool logarithmic_search(
     914             :     const apa_state_t *ps,
     915             :     const float *signal,
     916             :     int16_t s_start,
     917             :     uint16_t inlen,
     918             :     uint16_t offset,
     919             :     uint16_t fixed_pos,
     920             :     uint16_t corr_len,
     921             :     uint16_t wss,
     922             :     uint16_t css,
     923             :     int16_t *synchpos )
     924             : {
     925             :     int16_t i;
     926             :     float coeff;
     927             :     float coeff_max;
     928        1857 :     int16_t s_start_old = 0;
     929        1857 :     uint16_t s_len_old = 0;
     930             : 
     931             :     do
     932             :     {
     933        1857 :         coeff_max = -FLT_MAX; /* will always be overwritten with result of first correlation */
     934      311817 :         for ( i = s_start; i < s_start + inlen; i += css * ps->num_channels )
     935             :         {
     936      309960 :             if ( ( wss == 1 ) && ( ps->num_channels == 1 ) )
     937             :             {
     938           0 :                 coeff = cross_correlation_self( signal, i + offset, fixed_pos + offset, corr_len );
     939             :             }
     940             :             else
     941             :             {
     942      309960 :                 coeff = cross_correlation_subsampled_self( signal, i + offset, fixed_pos + offset, corr_len, wss * ps->num_channels );
     943             :             }
     944             : 
     945             :             /* update max corr */
     946      309960 :             if ( ps->scale < 100 )
     947             :             {
     948             :                 /* shrinking: prefer greater synchpos for equal coeff */
     949       92160 :                 if ( coeff >= coeff_max )
     950             :                 {
     951       13047 :                     coeff_max = coeff;
     952       13047 :                     *synchpos = i;
     953             :                 }
     954             :             }
     955             :             else
     956             :             {
     957             :                 /* extending: prefer smaller synchpos for equal coeff */
     958      217800 :                 if ( coeff > coeff_max )
     959             :                 {
     960       20832 :                     coeff_max = coeff;
     961       20832 :                     *synchpos = i;
     962             :                 }
     963             :             }
     964             :         }
     965             :         /* backup old search range */
     966        1857 :         s_start_old = s_start;
     967        1857 :         s_len_old = inlen;
     968             : 
     969        1857 :         css = css / 2;
     970        1857 :         inlen = inlen / 2;
     971        1857 :         s_start = *synchpos - inlen / 2;
     972        1857 :         if ( s_start < s_start_old )
     973             :         {
     974         525 :             s_start = s_start_old;
     975             :         }
     976        1857 :         if ( ( s_start + inlen ) > ( s_start_old + s_len_old ) )
     977             :         {
     978         432 :             inlen = s_start_old - s_start + s_len_old;
     979             :         }
     980        1857 :     } while ( css > 2 );
     981             : 
     982        1857 :     return 0;
     983             : }
     984             : 
     985             : 
     986             : /*
     987             : ********************************************************************************
     988             : *
     989             : *     Function        : find_synch
     990             : *     Tables          : <none>
     991             : *     Compile Defines : <none>
     992             : *     Return          : 0 on success, 1 on failure
     993             : *     Information     : Find the best match of an template segment within
     994             : *                       a search region by similarity measures.
     995             : *
     996             : *                       Typical example:
     997             : *
     998             : *         0         10        20        30        40        50        60
     999             : *  in[] = abcdefghijk_abcdefghijk_abcdefghijk_abcdEFGHIJk_abcdefghijk_a
    1000             : *  l_in = 61
    1001             : *  offset = 30                          |
    1002             : *  s_start = -20    <-------------------|
    1003             : *  s_len   = 15     <------------->     |
    1004             : *  search range:    ***************     |
    1005             : *  fixed_pos = 10                       |--------->
    1006             : *  corr_len = 6                         |         <---->
    1007             : *  template segment:                    |         ******
    1008             : *  synch_pos: -14         <-------------|
    1009             : *
    1010             : *                       All positions are given relative to offset. The
    1011             : *                       search region starts at offset+s_start and ends
    1012             : *                       at offset+s_start+s_len. The template segment
    1013             : *                       starts at offset+fixed_pos and ends at
    1014             : *                       offset+fixed_pos+corr_len. For correlation, the
    1015             : *                       template segment (EFGHIJ) is matched against the
    1016             : *                       segment in the search region, e.g., against (k_abcd)
    1017             : *                       in the first search position. The search position
    1018             : *                       with the best match (-14: EFGHIJ <-> efghij) is
    1019             : *                       returned.
    1020             : *
    1021             : *      19-JUN-03  N.Faerber       initial version
    1022             : *      23-APR-04  S.Doehla        added subsampling
    1023             : *
    1024             : ********************************************************************************
    1025             : */
    1026        1857 : static bool find_synch(
    1027             :     apa_state_t *ps,
    1028             :     const float *in,
    1029             :     uint16_t l_in,
    1030             :     int16_t s_start,
    1031             :     uint16_t s_len,
    1032             :     int16_t fixed_pos,
    1033             :     uint16_t corr_len,
    1034             :     uint16_t offset,
    1035             :     float *energy,
    1036             :     float *quality,
    1037             :     int16_t *synch_pos )
    1038             : {
    1039        1857 :     assert( ( corr_len - 1 + s_start + s_len - 1 + offset ) < l_in );
    1040        1857 :     assert( ( corr_len - 1 + fixed_pos + offset ) < l_in );
    1041             : 
    1042             :     /* pass last pitch to search function as prediction value */
    1043        1857 :     *synch_pos = ps->last_pitch;
    1044             : 
    1045        1857 :     logarithmic_search( ps, in, s_start, s_len, offset, fixed_pos, corr_len, ps->wss, ps->css, synch_pos );
    1046             : 
    1047             :     /* assert synch_pos is cleanly divisible by number of channels */
    1048        1857 :     assert( *synch_pos % ps->num_channels == 0 );
    1049             : 
    1050        1857 :     *quality = 0;
    1051        1857 :     get_scaling_quality( ps, in, l_in, offset, corr_len, (uint16_t) abs( fixed_pos - *synch_pos ), energy, quality );
    1052             : 
    1053        1857 :     ps->last_pitch = *synch_pos;
    1054             : 
    1055        1857 :     return 0;
    1056             : }
    1057             : 
    1058             : 
    1059             : /*
    1060             : ********************************************************************************
    1061             : *
    1062             : *     Function        : copy_frm
    1063             : *     Tables          : <none>
    1064             : *     Compile Defines : <none>
    1065             : *     Return          : 0 on success, 1 on failure
    1066             : *     Information     : Copy an audio.
    1067             : *
    1068             : *                       The frame size is fixed to ps->l_frm. The input data
    1069             : *                       is stored in frm_in[], where the first ps->l_frm samples
    1070             : *                       shall include the previous output frame and the second
    1071             : *                       ps->l_frm samples shall contain the current input frame.
    1072             : *                       The output frame is stored in frm_out[] and contains
    1073             : *                       l_frm_out = ps->l_frm.
    1074             : *
    1075             : *                       The first ps->l_frm input samples are not used by
    1076             : *                       this function and are only provided for a consistent
    1077             : *                       function call with shrink_frm() and extend_frm().
    1078             : *
    1079             : ********************************************************************************
    1080             : */
    1081       71442 : static bool copy_frm(
    1082             :     apa_state_t *ps,
    1083             :     const float frm_in[],
    1084             :     float frm_out[],
    1085             :     uint16_t *l_frm_out )
    1086             : {
    1087             :     uint16_t i;
    1088             : 
    1089             :     /* only 2nd input frame is used */
    1090       71442 :     frm_in += ps->l_frm;
    1091             : 
    1092             :     /* copy frame */
    1093   157047762 :     for ( i = 0; i < ps->l_frm; i++ )
    1094             :     {
    1095   156976320 :         frm_out[i] = frm_in[i];
    1096             :     }
    1097             : 
    1098             :     /* set output length */
    1099       71442 :     *l_frm_out = ps->l_frm;
    1100             : 
    1101       71442 :     return 0;
    1102             : }
    1103             : 
    1104             : 
    1105             : /*
    1106             : ********************************************************************************
    1107             : *
    1108             : *     Function        : shrink_frm
    1109             : *     Tables          : <none>
    1110             : *     Compile Defines : <none>
    1111             : *     Return          : 0 on success, 1 on failure
    1112             : *     Information     : Shrink the length of an audio frame using the WSOLA
    1113             : *                       algorithm.
    1114             : *
    1115             : *                       The frame size is fixed to ps->l_frm. The input data
    1116             : *                       is stored in frm_in[], where the first ps->l_frm samples
    1117             : *                       shall include the previous output frame and the second
    1118             : *                       ps->l_frm samples shall contain the current input frame.
    1119             : *                       The output frame is stored in frm_out[] and contains
    1120             : *                       l_frm_out samples. The amount of shrinking is signal
    1121             : *                       dependent.
    1122             : *
    1123             : *                       The first ps->l_frm input samples are not used by
    1124             : *                       this function and are only provided for a consistent
    1125             : *                       function call with extend_frm().
    1126             : *
    1127             : ********************************************************************************
    1128             : */
    1129         768 : static bool shrink_frm(
    1130             :     apa_state_t *ps,
    1131             :     const float frm_in[],
    1132             :     uint16_t maxScaling,
    1133             :     float frm_out[],
    1134             :     uint16_t *l_frm_out )
    1135             : {
    1136         768 :     bool findSynchResult = 0;
    1137             :     int16_t xtract, l_rem, s_start, s_end;
    1138             :     uint16_t i;
    1139             :     uint16_t over;
    1140         768 :     float energy, quality = 0.0f;
    1141             :     uint16_t l_frm;
    1142             :     uint16_t l_seg;
    1143             : 
    1144         768 :     l_frm = ps->l_frm;
    1145         768 :     l_seg = ps->l_seg;
    1146             : 
    1147             :     /* only 2nd input frame is used */
    1148         768 :     frm_in += l_frm;
    1149             : 
    1150             :     /* set search range */
    1151         768 :     s_start = ( ps->p_min / ps->num_channels ) * ps->num_channels;
    1152         768 :     s_end = s_start + ps->l_search;
    1153         768 :     if ( ( s_end + l_seg ) >= l_frm )
    1154             :     {
    1155         768 :         s_end = ( l_frm - l_seg );
    1156             :     }
    1157             : 
    1158             :     /* calculate overlap position */
    1159         768 :     if ( isSilence( frm_in, l_seg, 10 ) )
    1160             :     {
    1161             :         /* maximum scaling */
    1162           0 :         energy = -65;
    1163           0 :         quality = 5;
    1164           0 :         if ( ps->evs_compat_mode == false )
    1165             :         {
    1166             : 
    1167           0 :             xtract = maxScaling;
    1168             :             /* take samples already in the renderer buf into account */
    1169           0 :             xtract += ps->l_r_buf;
    1170             :             /* snap to renderer time slot borders */
    1171           0 :             xtract -= ( ps->l_ts - ( l_frm - xtract + ps->l_r_buf ) % ps->l_ts );
    1172           0 :             while ( xtract < 0 )
    1173             :             {
    1174           0 :                 xtract += ps->l_ts;
    1175             :             }
    1176           0 :             while ( xtract > ( s_end - ps->num_channels ) )
    1177             :             {
    1178             :                 /* exceeded the possible shrinking, go back one renderer ts*/
    1179           0 :                 xtract -= ps->l_ts;
    1180             :             }
    1181             :         }
    1182           0 :         else if ( maxScaling != 0U && s_end > maxScaling + 1 )
    1183             :         {
    1184           0 :             xtract = maxScaling;
    1185             :         }
    1186             :         else
    1187             :         {
    1188             :             /* set to last valid element (i.e. element[len - 1] but note for stereo last element is last pair of samples) */
    1189           0 :             xtract = s_end - ps->num_channels;
    1190             :         }
    1191             :     }
    1192             :     else
    1193             :     {
    1194             :         /* find synch */
    1195         768 :         findSynchResult = find_synch( ps, frm_in, l_frm, s_start, (uint16_t) ( s_end - s_start ), 0, l_seg, 0, &energy, &quality, &xtract );
    1196             :     }
    1197             : 
    1198             :     /* assert synch_pos is cleanly divisible by number of channels */
    1199         768 :     assert( xtract % ps->num_channels == 0 );
    1200             : 
    1201             :     /* set frame overlappable - reset if necessary */
    1202         768 :     over = 1;
    1203             : 
    1204             :     /* test whether frame has sufficient quality */
    1205         768 :     if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
    1206             :     {
    1207             :         /* not sufficient */
    1208         618 :         over = 0;
    1209         618 :         if ( ps->bad_frame_count < ps->qualityred )
    1210             :         {
    1211         189 :             ++ps->bad_frame_count;
    1212             :         }
    1213         618 :         if ( ps->good_frame_count > 0U )
    1214             :         {
    1215         159 :             --ps->good_frame_count;
    1216             :         }
    1217             :     }
    1218             :     else
    1219             :     {
    1220             :         /* sufficient quality */
    1221         150 :         if ( ps->bad_frame_count > 0U )
    1222             :         {
    1223         150 :             --ps->bad_frame_count;
    1224             :         }
    1225         150 :         if ( ps->good_frame_count < ps->qualityrise )
    1226             :         {
    1227         150 :             ++ps->good_frame_count;
    1228             :         }
    1229             :     }
    1230             : 
    1231             :     /* Calculate output data */
    1232         768 :     if ( over && xtract )
    1233             :     {
    1234         150 :         if ( findSynchResult == 1 )
    1235             :         {
    1236           0 :             return 1;
    1237             :         }
    1238         150 :         if ( ps->evs_compat_mode == true )
    1239             :         {
    1240           0 :             overlapAddEvs( frm_in, frm_in + xtract, frm_out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
    1241             :         }
    1242             :         else
    1243             :         {
    1244         150 :             overlapAdd( frm_in, frm_in + xtract, frm_out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
    1245             :         }
    1246             :     }
    1247             :     else
    1248             :     {
    1249         618 :         xtract = 0;
    1250      659658 :         for ( i = 0; i < l_seg; i++ )
    1251             :         {
    1252      659040 :             frm_out[i] = frm_in[i];
    1253             :         }
    1254             :     }
    1255             : 
    1256             :     /* append remaining samples */
    1257         768 :     l_rem = l_frm - xtract - l_seg;
    1258      722142 :     for ( i = 0; i < l_rem; i++ )
    1259             :     {
    1260      721374 :         frm_out[l_seg + i] = frm_in[l_frm - l_rem + i];
    1261             :     }
    1262             : 
    1263             :     /* set output length */
    1264         768 :     *l_frm_out = l_seg + l_rem;
    1265             : 
    1266         768 :     return 0;
    1267             : }
    1268             : 
    1269             : 
    1270             : /*
    1271             : ********************************************************************************
    1272             : *
    1273             : *     Function        : extend_frm
    1274             : *     Tables          : <none>
    1275             : *     Compile Defines : <none>
    1276             : *     Return          : 0 on success, 1 on failure
    1277             : *     Information     : Extend the length of an audio frame using the WSOLA
    1278             : *                       algorithm.
    1279             : *
    1280             : *                       The frame size is fixed to ps->l_frm. The input data
    1281             : *                       is stored in frm_in[], where the first ps->l_frm samples
    1282             : *                       shall include the previous output frame and the second
    1283             : *                       ps->l_frm samples shall contain the current input frame.
    1284             : *                       The output frame is stored in frm_out[] and contains
    1285             : *                       l_frm_out samples. The amount of extension is signal
    1286             : *                       dependent.
    1287             : *
    1288             : ********************************************************************************
    1289             : */
    1290        1089 : static bool extend_frm(
    1291             :     apa_state_t *ps,
    1292             :     const float frm_in[],
    1293             :     float frm_out[],
    1294             :     uint16_t *l_frm_out )
    1295             : {
    1296        1089 :     bool findSynchResult = 0;
    1297             :     uint16_t l_frm_out_target;
    1298             :     uint16_t n, i;
    1299             :     int16_t N;
    1300             :     int16_t s[MAXN + 2], s_max, s_min;
    1301             :     int16_t xtract[MAXN + 2], sync_start, s_end;
    1302             :     uint16_t over[MAXN + 2];
    1303             :     int16_t l_rem;
    1304        1089 :     int16_t s_start = 0;
    1305        1089 :     float energy, quality = 0.0f;
    1306             :     uint16_t l_frm, l_seg;
    1307             :     const float *fadeOut, *fadeIn;
    1308             :     float *out;
    1309             : 
    1310             : 
    1311        1089 :     l_frm = ps->l_frm;
    1312        1089 :     l_seg = ps->l_seg;
    1313             : 
    1314             :     /* number of segments/iterations */
    1315        1089 :     l_frm_out_target = (uint16_t) ( (float) l_frm * 1.5f );
    1316        1089 :     N = ( l_frm_out_target / l_seg ) - 1;
    1317        1089 :     if ( N < 1 )
    1318             :     {
    1319           0 :         N = 1;
    1320             :     }
    1321        1089 :     if ( N > MAXN )
    1322             :     {
    1323           0 :         return 1;
    1324             :     }
    1325             :     /* calculate equally spaced search regions */
    1326             :     /* s[n] are given relative to 2nd frame and point to the start of */
    1327             :     /* the search region. The first segment (n=1) will not be moved. */
    1328             :     /* Hence, the iterations will start with n=2. */
    1329        1089 :     s_min = -( ps->l_search ) - ( ps->p_min );
    1330             :     /* (make sure not to exceed array dimension) */
    1331        1089 :     if ( l_frm + s_min < 0 )
    1332             :     {
    1333           0 :         s_min = -( l_frm );
    1334             :     }
    1335        1089 :     s_max = l_frm - 2 * l_seg - ps->l_search;
    1336        1089 :     if ( s_max < s_min )
    1337             :     {
    1338           0 :         N = 1;
    1339             :     }
    1340             :     /* for just one segment start at s_min */
    1341        1089 :     if ( N == 1 )
    1342             :     {
    1343           0 :         s[2] = s_min;
    1344             :     }
    1345             :     /* else, spread linear in between s_min and s_max */
    1346             :     /* (including s_min and s_max) */
    1347             :     else
    1348             :     {
    1349        3267 :         for ( n = 2; n <= ( N + 1 ); n++ )
    1350             :         {
    1351        2178 :             s[n] = s_min + ( ( s_max - s_min ) * ( n - 2 ) ) / ( N - 1 );
    1352             :         }
    1353             :     }
    1354             : 
    1355             :     /*
    1356             :      *  Planning Phase
    1357             :      */
    1358             : 
    1359        1089 :     xtract[1] = -( l_seg ); /* make sync_start=0 in 1st iteration */
    1360        1089 :     n = 2;
    1361             : 
    1362             :     /* define synch segment (to be correlated with search region) */
    1363        1089 :     sync_start = xtract[n - 1] + l_seg;
    1364        1089 :     over[n] = 1; /* will be reset if overlap is not required */
    1365             :     /* check end of search region: should be at least p_min */
    1366             :     /* samples on the left of synch_start */
    1367        1089 :     if ( ( s[n] + ps->l_search ) < ( sync_start - ( ps->p_min ) ) )
    1368             :     {
    1369           0 :         s_start = s[n];
    1370           0 :         s_end = s_start + ps->l_search;
    1371             :     }
    1372             :     else
    1373             :     {
    1374             :         /* shrink search region to enforce minimum shift */
    1375        1089 :         s_end = sync_start - ( ps->p_min );
    1376        1089 :         if ( s[n] + ps->l_search < sync_start )
    1377             :         {
    1378        1089 :             s_start = s[n]; /* just do it with normal start position */
    1379             :         }
    1380           0 :         else if ( n == ( N + 1 ) ) /* move search region left for last segment */
    1381             :         {
    1382           0 :             s_start = s_end - ( ps->l_search - ps->p_min );
    1383             :         }
    1384             :         else
    1385             :         {
    1386           0 :             over[n] = 0; /* don't search/overlap (just copy down) */
    1387             :         }
    1388             :     }
    1389             : 
    1390        1089 :     if ( over[n] )
    1391             :     {
    1392             :         /* calculate overlap position */
    1393        1089 :         if ( isSilence( frm_in, l_seg, 10 ) )
    1394             :         {
    1395             :             /* maximum scaling */
    1396           0 :             energy = -65;
    1397           0 :             quality = 5;
    1398           0 :             xtract[n] = s_start + ps->num_channels;
    1399           0 :             if ( ps->evs_compat_mode == false )
    1400             :             {
    1401             :                 /* take renderer buffer samples into accout */
    1402           0 :                 xtract[n] += ps->l_r_buf;
    1403             :                 /* snap to next renderer time slot border to resynchronize */
    1404           0 :                 xtract[n] -= ( ( N - 1 ) * l_seg - xtract[n] + ps->l_r_buf ) % ps->l_ts;
    1405             :             }
    1406             :         }
    1407             :         else
    1408             :         {
    1409             :             /* find synch */
    1410        1089 :             findSynchResult = find_synch( ps, frm_in, 2 * l_frm, s_start, s_end - s_start, sync_start, l_seg, l_frm, &energy, &quality, &xtract[n] );
    1411             :         }
    1412             :         /* assert synch_pos is cleanly divisible by number of channels */
    1413        1089 :         assert( xtract[n] % ps->num_channels == 0 );
    1414             : 
    1415             :         /* test for sufficient quality */
    1416        1089 :         if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
    1417             :         {
    1418             :             /* not sufficient */
    1419         927 :             over[n] = 0;
    1420         927 :             xtract[n] = sync_start;
    1421         927 :             if ( ps->bad_frame_count < ps->qualityred )
    1422             :             {
    1423         369 :                 ++ps->bad_frame_count;
    1424             :             }
    1425         927 :             if ( ps->good_frame_count > 0U )
    1426             :             {
    1427          60 :                 --ps->good_frame_count;
    1428             :             }
    1429             :         }
    1430             :         else
    1431             :         {
    1432             :             /* sufficient quality */
    1433         162 :             if ( ps->bad_frame_count > 0U )
    1434             :             {
    1435         153 :                 --ps->bad_frame_count;
    1436             :             }
    1437         162 :             if ( ps->good_frame_count < ps->qualityrise )
    1438             :             {
    1439         162 :                 ++ps->good_frame_count;
    1440             :             }
    1441             :         }
    1442        1089 :         if ( findSynchResult )
    1443             :         {
    1444           0 :             return 1;
    1445             :         }
    1446             :     }
    1447             :     else
    1448             :     {
    1449           0 :         xtract[n] = sync_start;
    1450             :     }
    1451             : 
    1452             : 
    1453             :     /* Calculate output data */
    1454        2178 :     for ( n = 2; n <= N; n++ )
    1455             :     {
    1456        1089 :         if ( over[n] && xtract[n - 1] + l_seg != xtract[n] )
    1457             :         {
    1458             :             /* mix 2nd half of previous segment with 1st half of current segment */
    1459         162 :             fadeOut = frm_in + l_frm + xtract[n - 1] + l_seg;
    1460         162 :             fadeIn = frm_in + l_frm + xtract[n];
    1461         162 :             out = frm_out + ( n - 2 ) * l_seg;
    1462         162 :             if ( ps->evs_compat_mode == true )
    1463             :             {
    1464           0 :                 overlapAddEvs( fadeOut, fadeIn, out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
    1465             :             }
    1466             :             else
    1467             :             {
    1468         162 :                 overlapAdd( fadeOut, fadeIn, out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
    1469             :             }
    1470             :         }
    1471             :         else
    1472             :         {
    1473             :             /* just copy down 1st half of current segment (= 2nd half of previous segment) */
    1474             :             float *frm_out_ptr;
    1475             :             const float *frm_in_ptr;
    1476         927 :             frm_out_ptr = &( frm_out[( n - 2 ) * l_seg] );
    1477         927 :             frm_in_ptr = &( frm_in[l_frm + xtract[n]] );
    1478     1048287 :             for ( i = 0; i < l_seg; i++ )
    1479             :             {
    1480     1047360 :                 frm_out_ptr[i] = frm_in_ptr[i];
    1481             :             }
    1482             :         }
    1483             :     }
    1484             : 
    1485             :     /* append remaining samples */
    1486        1089 :     l_rem = l_frm - ( xtract[N] + l_seg );
    1487     1442289 :     for ( i = 0; i < l_rem; i++ )
    1488             :     {
    1489     1441200 :         frm_out[( N - 1 ) * l_seg + i] = frm_in[2 * l_frm - l_rem + i];
    1490             :     }
    1491             : 
    1492             :     /* set output length */
    1493        1089 :     *l_frm_out = ( N - 1 ) * l_seg + l_rem;
    1494             : 
    1495        1089 :     return 0;
    1496             : }

Generated by: LCOV version 1.14