Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : /*! @file jbm_pcmdsp_apa.c Adaptive Playout for Audio (apa). */
38 :
39 : /* system headers */
40 : #include <assert.h>
41 : #include <math.h>
42 : #include <stdlib.h>
43 : #include <stdio.h>
44 : #include <stdint.h>
45 : #include "options.h"
46 : #include "prot.h"
47 : #ifdef DEBUGGING
48 : #include "debug.h"
49 : #endif
50 : #include "wmc_auto.h"
51 : /* local headers */
52 : #include "jbm_pcmdsp_apa.h"
53 : #include "jbm_pcmdsp_similarityestimation.h"
54 : #include "jbm_pcmdsp_window.h"
55 : #include "cnst.h"
56 :
57 :
58 : /*---------------------------------------------------------------------*
59 : * Local state structure
60 : *---------------------------------------------------------------------*/
61 :
62 : /* maximum number of segments/iterations in extend_frm() */
63 : #define MAXN 10
64 :
65 : /* definition of state struct */
66 : struct apa_state_t
67 : {
68 : /* output buffer */
69 : bool evs_compat_mode;
70 : float *buf_out;
71 : uint16_t buf_out_capacity;
72 : uint16_t l_buf_out;
73 :
74 : /* Hann window */
75 : float win[APA_BUF_PER_CHANNEL];
76 : uint16_t l_halfwin;
77 :
78 : /* sampling rate [Hz] */
79 : uint16_t rate;
80 :
81 : /* length of a segment [samples] */
82 : uint16_t l_seg;
83 :
84 : /* length of a frame [samples] */
85 : uint16_t l_frm;
86 :
87 : /* total number of processed input samples since apa_reset() */
88 : uint32_t l_in_total;
89 :
90 : /* time resolution in samples of the IVAS renderer*/
91 : uint16_t l_ts;
92 :
93 : /* samples already available in the renderer buffer */
94 : uint16_t l_r_buf;
95 :
96 : /* sum of inserted/removed samples since last apa_set_scale() */
97 : int32_t diffSinceSetScale;
98 : /* number of input frames since last apa_set_scale() */
99 : uint32_t nFramesSinceSetScale;
100 :
101 : /* current and previous scaling ratio [%] */
102 : uint16_t scale;
103 :
104 : /* minimum pitch length [samples] */
105 : uint16_t p_min;
106 :
107 : /* search length [samples] */
108 : uint16_t l_search;
109 :
110 : uint16_t wss; /* waveform subsampling per channel */
111 : uint16_t css; /* correlation subsampling per channel */
112 :
113 : float targetQuality;
114 : uint16_t qualityred; /* quality reduction threshold */
115 : uint16_t qualityrise; /* quality rising for adaptive quality thresholds */
116 :
117 : uint16_t last_pitch; /* last pitch/sync position */
118 : uint16_t bad_frame_count; /* # frames before quality threshold is lowered */
119 : uint16_t good_frame_count; /* # scaled frames */
120 :
121 : uint16_t num_channels; /* number of input/output channels */
122 : };
123 :
124 :
125 : /*---------------------------------------------------------------------*
126 : * Local function prototypes
127 : *---------------------------------------------------------------------*/
128 :
129 : static float apa_corrEnergy2dB( float energy, uint16_t corr_len );
130 :
131 : static float apa_getQualityIncreaseForLowEnergy( float energydB );
132 :
133 : static bool logarithmic_search( const apa_state_t *ps, const float *signal, int16_t s_start, uint16_t inlen, uint16_t offset, uint16_t fixed_pos, uint16_t corr_len, uint16_t wss, uint16_t css, int16_t *synchpos );
134 :
135 : static bool find_synch( apa_state_t *ps, const float *in, uint16_t l_in, int16_t s_start, uint16_t s_len, int16_t fixed_pos, uint16_t corr_len, uint16_t offset, float *energy, float *quality, int16_t *synch_pos );
136 :
137 : static bool copy_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
138 :
139 : static bool shrink_frm( apa_state_t *ps, const float frm_in[], uint16_t maxScaling, float frm_out[], uint16_t *l_frm_out );
140 :
141 : static bool extend_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
142 :
143 : /*---------------------------------------------------------------------*
144 : * Public functions
145 : *---------------------------------------------------------------------*/
146 :
147 : /* Allocates memory for state struct and initializes elements. */
148 105 : ivas_error apa_init(
149 : apa_state_t **pps,
150 : const int32_t num_channels )
151 : {
152 105 : apa_state_t *ps = NULL;
153 :
154 : /* make sure pointer is valid */
155 105 : if ( !pps )
156 : {
157 0 : return 1;
158 : }
159 :
160 : /* allocate state struct */
161 105 : if ( ( ps = (apa_state_t *) malloc( sizeof( apa_state_t ) ) ) == NULL )
162 : {
163 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for JBM\n" ) );
164 : }
165 :
166 105 : ps->num_channels = (uint16_t) num_channels;
167 105 : ps->buf_out_capacity = (uint16_t) ( APA_BUF_PER_CHANNEL * num_channels );
168 105 : if ( ( ps->buf_out = malloc( sizeof( float ) * ps->buf_out_capacity ) ) == NULL )
169 : {
170 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for JBM\n" ) );
171 : }
172 :
173 105 : ps->evs_compat_mode = false;
174 :
175 105 : apa_reset( ps );
176 105 : *pps = ps;
177 :
178 105 : return IVAS_ERR_OK;
179 : }
180 :
181 :
182 : /* Sets state variables to initial value. */
183 210 : void apa_reset(
184 : apa_state_t *ps )
185 : {
186 : /* init state struct */
187 210 : ps->l_buf_out = 0;
188 210 : ps->l_halfwin = 0;
189 210 : ps->rate = 0;
190 210 : ps->l_seg = 0;
191 210 : ps->l_frm = 0;
192 210 : ps->l_in_total = 0;
193 210 : ps->diffSinceSetScale = 0;
194 210 : ps->nFramesSinceSetScale = 0;
195 210 : ps->scale = 100;
196 210 : ps->p_min = 0;
197 210 : ps->l_search = 0;
198 210 : ps->wss = 1;
199 210 : ps->css = 1;
200 210 : ps->targetQuality = 0.0f;
201 210 : ps->qualityred = 0;
202 210 : ps->qualityrise = 0;
203 210 : ps->last_pitch = 0;
204 210 : ps->bad_frame_count = 0;
205 210 : ps->good_frame_count = 0;
206 :
207 210 : ps->l_ts = 1;
208 210 : ps->l_r_buf = 0;
209 210 : return;
210 : }
211 :
212 3303 : uint8_t apa_reconfigure(
213 : apa_state_t *ps,
214 : uint16_t num_channels,
215 : uint16_t l_ts )
216 : {
217 :
218 : /* realloc buffer */
219 3303 : free( ps->buf_out );
220 3303 : ps->num_channels = (uint16_t) num_channels;
221 3303 : ps->buf_out_capacity = (uint16_t) ( APA_BUF_PER_CHANNEL * num_channels );
222 3303 : ps->buf_out = (float *) malloc( sizeof( float ) * ps->buf_out_capacity );
223 3303 : if ( !ps->buf_out )
224 : {
225 0 : return 2;
226 : }
227 3303 : ps->l_buf_out = 0;
228 3303 : ps->l_in_total = 0;
229 3303 : ps->l_ts = ps->num_channels * l_ts;
230 :
231 : /* set everything else dependent on the number of channels */
232 : /* set segment size */
233 : /* in the order of a pitch, set to 160 samples at 16 kHz */
234 : /* used for windowing and as the correlation length, i.e., */
235 : /* the size of the template segment. */
236 3303 : ps->l_seg = ( ps->rate / 100 ) * ps->num_channels;
237 :
238 : /* set frame size */
239 : /* set to 320 samples at 16 kHz */
240 3303 : ps->l_frm = ( ps->rate / FRAMES_PER_SEC ) * ps->num_channels;
241 :
242 : /* set minimum pitch */
243 : /* set to 40 samples at 16 kHz */
244 : /* (defines min change in number of samples, i.e., abs(l_in-l_out) >= p_min) */
245 3303 : ps->p_min = ( ps->rate / 400 ) * ps->num_channels;
246 :
247 : /* set search length */
248 : /* must cover one pitch, set to 200 samples at 16 kHz */
249 : /* (the resulting maximum pitch is then p_min+l_search = 240 samples at 16 kHz) */
250 3303 : ps->l_search = ( ps->rate / 80 ) * ps->num_channels;
251 :
252 3303 : return 0;
253 : }
254 :
255 :
256 : /* Sets the audio configuration. */
257 105 : bool apa_set_rate(
258 : apa_state_t *ps,
259 : const int32_t output_Fs )
260 : {
261 : /* make sure pointer is valid */
262 105 : if ( ps == NULL )
263 : {
264 0 : return 1;
265 : }
266 :
267 : /* check range */
268 105 : if ( ( output_Fs < APA_MIN_RATE ) || ( output_Fs > APA_MAX_RATE ) )
269 : {
270 0 : return 1;
271 : }
272 :
273 : /* reset state struct */
274 105 : apa_reset( ps );
275 :
276 : /* copy rate to state struct */
277 105 : ps->rate = (uint16_t) output_Fs;
278 :
279 105 : if ( ps->num_channels > APA_MAX_NUM_CHANNELS )
280 : {
281 0 : return 1;
282 : }
283 :
284 : /*
285 : * several other parameters depend on the sampling rate
286 : * and are set below. Some "magic numbers" are used here
287 : * which are based on typical values of a "pitch" in
288 : * human voice. The pitch length is the period of the
289 : * base frequency and is usually assumed to be 40-240
290 : * samples at 16 kHz.
291 : */
292 :
293 : /* set segment size */
294 : /* in the order of a pitch, set to 160 samples at 16 kHz */
295 : /* used for windowing and as the correlation length, i.e., */
296 : /* the size of the template segment. */
297 105 : ps->l_seg = ( ps->rate / 100 ) * ps->num_channels;
298 :
299 : /* init Hann window */
300 : /* Note: l_win < APA_BUF_PER_CHANNEL is required */
301 : /* Length of Hann window should be independent of
302 : * number of channels - same window applied to all channels */
303 105 : ps->l_halfwin = ps->rate / 100;
304 105 : hannWindow( ps->l_halfwin * 2, ps->win );
305 :
306 : /* set frame size */
307 : /* set to 320 samples at 16 kHz */
308 105 : ps->l_frm = ( ps->rate / FRAMES_PER_SEC ) * ps->num_channels;
309 :
310 : /* set minimum pitch */
311 : /* set to 40 samples at 16 kHz */
312 : /* (defines min change in number of samples, i.e., abs(l_in-l_out) >= p_min) */
313 105 : ps->p_min = ( ps->rate / 400 ) * ps->num_channels;
314 :
315 : /* set search length */
316 : /* must cover one pitch, set to 200 samples at 16 kHz */
317 : /* (the resulting maximum pitch is then p_min+l_search = 240 samples at 16 kHz) */
318 105 : ps->l_search = ( ps->rate / 80 ) * ps->num_channels;
319 :
320 105 : return 0;
321 : }
322 :
323 :
324 : /* Set scaling. */
325 76596 : bool apa_set_scale(
326 : apa_state_t *ps,
327 : uint16_t scale )
328 : {
329 : /* make sure pointer is valid */
330 76596 : if ( ps == NULL )
331 : {
332 0 : return 1;
333 : }
334 :
335 : /* check range */
336 76596 : if ( ( scale < APA_MIN_SCALE ) || ( scale > APA_MAX_SCALE ) )
337 : {
338 0 : return 1;
339 : }
340 :
341 : /* do nothing if same scale is set multiple times */
342 : /* (otherwise scale control is confused) */
343 76596 : if ( ps->scale == scale )
344 : {
345 76224 : return 0;
346 : }
347 :
348 : /* copy to state struct */
349 372 : ps->scale = scale;
350 :
351 : /* reset scaling statistics */
352 372 : ps->diffSinceSetScale = 0;
353 372 : ps->nFramesSinceSetScale = 0;
354 :
355 372 : return 0;
356 : }
357 :
358 105 : bool apa_set_renderer_granularity(
359 : apa_state_t *ps,
360 : uint16_t l_ts )
361 : {
362 : /* make sure pointer is valid */
363 105 : if ( ps == NULL )
364 : {
365 0 : return 1;
366 : }
367 :
368 :
369 : /* copy to state struct */
370 105 : ps->l_ts = l_ts * ps->num_channels;
371 105 : return 0;
372 : }
373 :
374 76596 : bool apa_set_renderer_residual_samples(
375 : apa_state_t *ps,
376 : uint16_t l_r_buf )
377 : {
378 : /* make sure pointer is valid */
379 76596 : if ( ps == NULL )
380 : {
381 0 : return 1;
382 : }
383 :
384 :
385 : /* copy to state struct */
386 76596 : ps->l_r_buf = l_r_buf * ps->num_channels;
387 76596 : return 0;
388 : }
389 :
390 0 : bool apa_set_evs_compat_mode(
391 : apa_state_t *ps,
392 : bool mode )
393 : {
394 : /* make sure pointer is valid */
395 0 : if ( ps == NULL )
396 : {
397 0 : return 1;
398 : }
399 :
400 0 : ps->evs_compat_mode = mode;
401 :
402 0 : return 0;
403 : }
404 :
405 : /*
406 : ********************************************************************************
407 : *
408 : * Function : apa_set_quality
409 : * Tables : <none>
410 : * Compile Defines : <none>
411 : * Return : 0 on success, 1 on failure
412 : * Information : Set quality thresholds.
413 : *
414 : * quality is lower limit for minimum quality
415 : * Range is [-2;2] - where positive values allow
416 : * only pasting with same phase information
417 : * Negative values would yield cross phased pasting
418 : *
419 : * qualityred allows dynamic lowering of lower quality
420 : * bound - this gives better results for rhythmic signals
421 : * Range is [0;20], meaning 0.1 lowering*qualityred
422 : *
423 : * undocumented: qualityrise (same as qualityred - other
424 : * direction)
425 : *
426 : ********************************************************************************
427 : */
428 105 : bool apa_set_quality(
429 : apa_state_t *ps,
430 : float quality,
431 : uint16_t qualityred,
432 : uint16_t qualityrise )
433 : {
434 105 : assert( ps != NULL );
435 105 : assert( -2.0f <= quality && quality <= 3.1f );
436 105 : assert( qualityred > 0 && qualityred <= 20 );
437 105 : assert( qualityrise > 0 && qualityrise <= 20 );
438 :
439 105 : ps->targetQuality = quality;
440 105 : ps->qualityred = qualityred;
441 105 : ps->qualityrise = qualityrise;
442 105 : ps->bad_frame_count = 0;
443 105 : ps->good_frame_count = 0;
444 :
445 105 : return 0;
446 : }
447 :
448 : /*
449 : ********************************************************************************
450 : *
451 : * Function : apa_set_complexity_options
452 : * Tables : <none>
453 : * Compile Defines : <none>
454 : * Return : 0 on success, 1 on failure
455 : * Information : Set complexity options
456 : * Waveform subsampling computes the correlation function
457 : * for certain positions only
458 : * Correlation function subsampling computes the maxima
459 : * for certain positions only
460 : *
461 : ********************************************************************************
462 : */
463 105 : bool apa_set_complexity_options(
464 : apa_state_t *ps,
465 : uint16_t wss,
466 : uint16_t css )
467 : {
468 : /* make sure pointer is valid */
469 105 : if ( ps == NULL )
470 : {
471 0 : return 1;
472 : }
473 :
474 105 : if ( wss == 0 || wss > 1000 )
475 : {
476 0 : return 1;
477 : }
478 :
479 105 : if ( css == 0 || css > 1000 )
480 : {
481 0 : return 1;
482 : }
483 :
484 105 : ps->wss = wss;
485 105 : ps->css = css;
486 :
487 105 : return 0;
488 : }
489 :
490 : /*
491 : ********************************************************************************
492 : *
493 : * Function : apa_exit
494 : * Tables : <none>
495 : * Compile Defines : <none>
496 : * Return : 0 on success, 1 on failure
497 : * Information : The memory used for storing the state is freed.
498 : * The state struct pointer is set to NULL.
499 : *
500 : ********************************************************************************
501 : */
502 1881 : bool apa_exit(
503 : apa_state_t **pps )
504 : {
505 : /* ignore NULL pointer input */
506 1881 : if ( *pps == NULL )
507 : {
508 1776 : return 0;
509 : }
510 :
511 : /* deallocate state struct members */
512 105 : free( ( *pps )->buf_out );
513 :
514 : /* deallocate state struct */
515 105 : free( *pps );
516 :
517 : /* set pointer to NULL */
518 105 : *pps = NULL;
519 :
520 105 : return 0;
521 : }
522 :
523 : /*
524 : ********************************************************************************
525 : *
526 : * Function : apa_exec
527 : * Tables : <none>
528 : * Compile Defines : <none>
529 : * Return : 0 on success, 1 on failure
530 : * Information : Execute adaptive playout for audio, i.e., audio scaling.
531 : * Will take l_in input samples from a_in[] and
532 : * try to extend/shrink the amount of samples according
533 : * to the last scaling set by using apa_set_scale().
534 : * The actual amount of samples after scaling may vary
535 : * and is given in l_out. The scaled audio samples
536 : * are contained in a_out[]. Note that the scaling is
537 : * achieved only in average. The input buffer must be
538 : * filled with 20ms audio. The output buffer must be
539 : * allocated externally and must be at least of size
540 : * APA_BUF.
541 : * Scaling can only be performed when a sampling rate
542 : * is specified using apa_set_rate(). Otherwise,
543 : * an error is returned.
544 : *
545 : * The amount of scaling is achieved by controlling the
546 : * frequency of scaling. Note that the exact amount of
547 : * scaling is signal dependent and is an integer
548 : * multiple of a pitch. Hence, when we want to achieve
549 : * a scaling of e.g. 110% then the APA module will typically
550 : * forward several frames without any modification and
551 : * then scale one frame by a higher amount, e.g. 143%.
552 : *
553 : ********************************************************************************
554 : */
555 76596 : uint8_t apa_exec(
556 : apa_state_t *ps, /* i/o: state struct */
557 : const float a_in[], /* i : input samples */
558 : uint16_t l_in, /* i : number of input samples */
559 : uint16_t maxScaling, /* i : allowed number of inserted/removed samples */
560 : float a_out[], /* o : output samples */
561 : uint16_t *l_out /* o : number of output samples */
562 : )
563 : {
564 : uint16_t i;
565 : float frm_in[APA_BUF]; /* TODO(mcjbm): this buffer could be smaller - always allocates space for 16 channels */
566 : uint16_t l_frm_out;
567 : int16_t l_rem;
568 : int32_t dl_scaled, dl_copied, l_frm_out_target;
569 : int32_t expScaling, actScaling;
570 : uint32_t statsResetThreshold, statsResetShift;
571 :
572 76596 : statsResetThreshold = 1637;
573 76596 : statsResetShift = 2;
574 :
575 : /* Convert max_scaling from "per channel" to total */
576 76596 : maxScaling *= ps->num_channels;
577 :
578 : /* make sure no invalid output is used */
579 76596 : *l_out = 0;
580 76596 : l_frm_out = 0;
581 :
582 : /* make sure pointer is valid */
583 76596 : if ( ps == NULL )
584 : {
585 0 : return 1;
586 : }
587 : /* check available rate */
588 76596 : if ( ps->rate == 0 )
589 : {
590 0 : return 2;
591 : }
592 : /* check size of input */
593 76596 : if ( l_in != ps->l_frm )
594 : {
595 0 : return 3;
596 : }
597 :
598 : /* get target length */
599 76596 : if ( ps->scale > 100 )
600 : {
601 1407 : expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) + 0.5f );
602 : }
603 75189 : else if ( ps->scale < 100 )
604 : {
605 804 : expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) - 0.5f );
606 : }
607 : else
608 : {
609 74385 : expScaling = 0;
610 : }
611 76596 : actScaling = ps->diffSinceSetScale - ps->l_frm;
612 76596 : l_frm_out_target = expScaling - actScaling;
613 :
614 : /* Wait until we have l_frm outputs samples */
615 : /* (required to search for correlation in the past). */
616 : /* If we don't have enough samples, simply copy input to output */
617 76596 : if ( ps->l_buf_out < ps->l_frm )
618 : {
619 8430288 : for ( i = 0; i < ps->l_frm; i++ )
620 : {
621 8426880 : a_out[i] = a_in[i];
622 : }
623 3408 : l_frm_out = ps->l_frm;
624 : }
625 : else
626 : {
627 73188 : float *buf_out_ptr = &( ps->buf_out[ps->l_buf_out - ps->l_frm] );
628 73188 : float *frm_in_ptr = &( frm_in[ps->l_frm] );
629 :
630 : /* fill input frame */
631 : /* 1st input frame: previous output samples */
632 160957668 : for ( i = 0; i < ps->l_frm; i++ )
633 : {
634 160884480 : frm_in[i] = buf_out_ptr[i];
635 : }
636 : /* 2nd input frame: new input samples */
637 160957668 : for ( i = 0; i < ps->l_frm; i++ )
638 : {
639 160884480 : frm_in_ptr[i] = a_in[i];
640 : }
641 : /* no scaling */
642 73188 : if ( ps->scale == 100 )
643 : {
644 71331 : copy_frm( ps, frm_in, a_out, &l_frm_out );
645 : }
646 : /* shrink */
647 1857 : else if ( ps->scale < 100 )
648 : {
649 768 : shrink_frm( ps, frm_in, maxScaling, a_out, &l_frm_out );
650 : }
651 : /* extend */
652 : else
653 : {
654 1089 : extend_frm( ps, frm_in, a_out, &l_frm_out );
655 : }
656 : /* control the amount/frequency of scaling */
657 73188 : if ( l_frm_out != ps->l_frm )
658 : {
659 312 : if ( maxScaling != 0U &&
660 312 : abs( (int16_t) ( ps->l_frm - l_frm_out ) ) > maxScaling )
661 : {
662 : /* maxScaling exceeded -> discard scaled frame */
663 102 : copy_frm( ps, frm_in, a_out, &l_frm_out );
664 : }
665 210 : else if ( abs( l_frm_out_target ) > ps->l_frm ) /* ignore small difference */
666 : {
667 168 : dl_copied = l_frm_out_target - (int32_t) ps->l_frm;
668 168 : dl_scaled = l_frm_out_target - (int32_t) l_frm_out;
669 : /* discard scaled frame if copied frame is closer to target length */
670 168 : if ( abs( dl_copied ) < abs( dl_scaled ) )
671 : {
672 9 : copy_frm( ps, frm_in, a_out, &l_frm_out );
673 : }
674 : }
675 : }
676 : }
677 :
678 : /* copy output to internal buffer */
679 : /* avoid buffer overflow: */
680 : /* discard old samples; always keep at least most recent l_frm samples */
681 76596 : if ( ( ps->l_buf_out + l_frm_out ) > ps->buf_out_capacity )
682 : {
683 20085 : float *buf_out_ptr1 = ps->buf_out;
684 : float *buf_out_ptr2;
685 :
686 20085 : l_rem = ( ps->l_frm - l_frm_out );
687 20085 : if ( l_rem < 0 )
688 : {
689 81 : l_rem = 0;
690 : }
691 20085 : buf_out_ptr2 = &( ps->buf_out[ps->l_buf_out - l_rem] );
692 24045 : for ( i = 0; i < l_rem; i++ )
693 : {
694 3960 : buf_out_ptr1[i] = buf_out_ptr2[i];
695 : }
696 20085 : ps->l_buf_out = l_rem;
697 : }
698 : /* append new output samples */
699 76596 : if ( ( ps->l_buf_out + l_frm_out ) > ps->buf_out_capacity )
700 : {
701 0 : return 5;
702 : }
703 : {
704 76596 : float *buf_out_ptr = &( ps->buf_out[ps->l_buf_out] );
705 169547742 : for ( i = 0; i < l_frm_out; i++ )
706 : {
707 169471146 : buf_out_ptr[i] = a_out[i];
708 : }
709 : }
710 76596 : ps->l_buf_out += l_frm_out;
711 :
712 76596 : *l_out = l_frm_out;
713 : /* update time */
714 76596 : ps->l_in_total += ps->l_frm;
715 :
716 76596 : if ( abs( ps->diffSinceSetScale ) < ( 0x7FFFFF - ( l_frm_out - ps->l_frm ) ) &&
717 76596 : ps->nFramesSinceSetScale < statsResetThreshold )
718 : {
719 76596 : ps->diffSinceSetScale += l_frm_out - ps->l_frm;
720 76596 : ++ps->nFramesSinceSetScale;
721 : }
722 : else /* scale statistics down to avoid overflow */
723 : {
724 0 : ps->diffSinceSetScale >>= statsResetShift;
725 0 : ps->nFramesSinceSetScale >>= statsResetShift;
726 : }
727 :
728 76596 : return 0;
729 : }
730 :
731 :
732 : /*---------------------------------------------------------------------*
733 : * Local functions
734 : *---------------------------------------------------------------------*/
735 :
736 : /*
737 : ********************************************************************************
738 : *
739 : * Function : get_scaling_quality
740 : * Tables : <none>
741 : * Compile Defines : <none>
742 : * Return : 0 on success, 1 on failure
743 : * Information : Uses pitch, half pitch, three halves and double pitch
744 : * to evaluate the quality of the scaled frame by checking
745 : * periodicity.
746 : * Silence can be detected as additional feature. This must
747 : * be set in global struct apa_state.
748 : *
749 : * If search length is very narrow then use fewer points
750 : * to evaluate periodicity and silence.
751 : *
752 : * Computationally not very efficient by using normalized
753 : * cross-correlation: Using sqrt() for energy calculation
754 : * adds complexity.
755 : *
756 : * 03-AUG-04 S.Doehla initial version
757 : *
758 : ********************************************************************************
759 : */
760 1857 : static void get_scaling_quality(
761 : const apa_state_t *ps,
762 : const float *signal,
763 : uint16_t s_len,
764 : uint16_t offset,
765 : uint16_t corr_len,
766 : uint16_t pitch,
767 : float *energydB,
768 : float *quality )
769 : {
770 1857 : float maxEnergy = 0.0f;
771 1857 : float qualityOfMaxEnergy = 0.0f; /* we measure the quality for all channels and select the one with highest energy */
772 :
773 1857 : float half_pitch_cn = 0.0f;
774 1857 : float pitch_cn = 0.0f;
775 1857 : float three_halves_pitch_cn = 0.0f;
776 1857 : float double_pitch_cn = 0.0f;
777 :
778 1857 : float pitch_energy = 0.0f;
779 1857 : float half_pitch_energy = 0.0f;
780 1857 : float three_halves_pitch_energy = 0.0f;
781 1857 : float double_pitch_energy = 0.0f;
782 :
783 1857 : uint16_t i = 0;
784 :
785 6660 : for ( i = 0; i < ps->num_channels; i++ )
786 : {
787 : float energy;
788 4803 : offset = 0;
789 :
790 4803 : pitch_cn = normalized_cross_correlation_self( signal, pitch + offset, offset, corr_len, ps->num_channels * 2, &pitch_energy );
791 4803 : if ( pitch_cn > 0.0f )
792 : {
793 : /* calculate correlation for double pitch */
794 3390 : if ( 2 * pitch + offset + corr_len <= s_len )
795 : {
796 2427 : double_pitch_cn = normalized_cross_correlation_self( signal, 2 * pitch + offset, offset, corr_len, ps->num_channels * 2, &double_pitch_energy );
797 : }
798 : else
799 : {
800 963 : double_pitch_cn = pitch_cn;
801 963 : double_pitch_energy = pitch_energy;
802 : }
803 : /* calculate correlation for three/half pitch */
804 3390 : if ( ( 3 * pitch ) / 2 + offset + corr_len <= s_len )
805 : {
806 2829 : three_halves_pitch_cn = normalized_cross_correlation_self( signal, ( 3 * pitch ) / 2 + offset, offset, corr_len, ps->num_channels * 2, &three_halves_pitch_energy );
807 : }
808 : else
809 : {
810 561 : three_halves_pitch_cn = pitch_cn;
811 561 : three_halves_pitch_energy = pitch_energy;
812 : }
813 : /* calculate correlation for half pitch */
814 3390 : if ( pitch / 2 + offset + corr_len <= s_len )
815 : {
816 3390 : half_pitch_cn = normalized_cross_correlation_self( signal, pitch / 2 + offset, offset, corr_len, ps->num_channels * 2, &half_pitch_energy );
817 : }
818 : else
819 : {
820 0 : half_pitch_cn = pitch_cn;
821 0 : half_pitch_energy = pitch_energy;
822 : }
823 :
824 : /* combine correlation results */
825 3390 : *quality = ( half_pitch_cn * three_halves_pitch_cn ) + ( pitch_cn * double_pitch_cn );
826 3390 : energy = pitch_energy + half_pitch_energy + three_halves_pitch_energy + double_pitch_energy;
827 : }
828 : else
829 : {
830 1413 : *quality = pitch_cn; /* value is negative, thus pass it */
831 1413 : energy = pitch_energy;
832 : }
833 :
834 : /* update the quality by the quality of the signal with the highest energy */
835 4803 : if ( energy > maxEnergy )
836 : {
837 2667 : qualityOfMaxEnergy = *quality;
838 2667 : maxEnergy = energy;
839 : }
840 :
841 : /* go to next channel */
842 4803 : ++signal;
843 : }
844 1857 : *quality = qualityOfMaxEnergy;
845 :
846 : /* increase calculated quality of signals with low energy */
847 1857 : *energydB = apa_corrEnergy2dB( maxEnergy, corr_len );
848 1857 : *quality += apa_getQualityIncreaseForLowEnergy( *energydB );
849 :
850 1857 : return;
851 : }
852 :
853 :
854 : /* Converts the correlation energy to dB. */
855 1857 : static float apa_corrEnergy2dB(
856 : float energy,
857 : uint16_t corr_len )
858 : {
859 1857 : float energydB = 10.0f * (float) log10( energy / ( PCM16_TO_FLT_FAC * PCM16_TO_FLT_FAC * corr_len * 4.0f ) );
860 :
861 1857 : return energydB;
862 : }
863 :
864 :
865 : /* Increases the calculated quality of signals with low energy. */
866 1857 : static float apa_getQualityIncreaseForLowEnergy(
867 : float energydB )
868 : {
869 1857 : const float qualIncreaseMinEnergy = -65;
870 1857 : const float qualIncreaseMaxEnergy = -40;
871 1857 : float qualIncForLowEnergy = 0;
872 :
873 1857 : if ( energydB < qualIncreaseMaxEnergy )
874 : {
875 447 : qualIncForLowEnergy = energydB;
876 447 : if ( qualIncForLowEnergy < qualIncreaseMinEnergy )
877 : {
878 0 : qualIncForLowEnergy = qualIncreaseMinEnergy;
879 : }
880 447 : if ( qualIncForLowEnergy > qualIncreaseMaxEnergy )
881 : {
882 0 : qualIncForLowEnergy = qualIncreaseMaxEnergy;
883 : }
884 447 : qualIncForLowEnergy = ( qualIncForLowEnergy - qualIncreaseMaxEnergy ) /
885 447 : ( qualIncreaseMinEnergy - qualIncreaseMaxEnergy ) * 2;
886 447 : assert( qualIncForLowEnergy >= 0 && qualIncForLowEnergy <= 2 );
887 : }
888 :
889 1857 : return qualIncForLowEnergy;
890 : }
891 :
892 :
893 : /*
894 : ********************************************************************************
895 : *
896 : * Function : logarithmic_search
897 : * Tables : <none>
898 : * Compile Defines : <none>
899 : * Return : 0 on success, 1 on failure
900 : * Information : Search for best match of a template segment using
901 : * hierarchical search method:
902 : * Parameter css is used for sampling every css'd correlation
903 : * value. The area around the best match so far is used for
904 : * further correlation value with half css-value until css=1.
905 : * Search area length is always half previous search length.
906 : * Parameter wss is passed to the correlation computation
907 : * If the search area passes the boundaries, the search
908 : * window is reduced so that it's entirely inside the
909 : * boundaries.
910 : *
911 : ********************************************************************************
912 : */
913 1857 : static bool logarithmic_search(
914 : const apa_state_t *ps,
915 : const float *signal,
916 : int16_t s_start,
917 : uint16_t inlen,
918 : uint16_t offset,
919 : uint16_t fixed_pos,
920 : uint16_t corr_len,
921 : uint16_t wss,
922 : uint16_t css,
923 : int16_t *synchpos )
924 : {
925 : int16_t i;
926 : float coeff;
927 : float coeff_max;
928 1857 : int16_t s_start_old = 0;
929 1857 : uint16_t s_len_old = 0;
930 :
931 : do
932 : {
933 1857 : coeff_max = -FLT_MAX; /* will always be overwritten with result of first correlation */
934 311817 : for ( i = s_start; i < s_start + inlen; i += css * ps->num_channels )
935 : {
936 309960 : if ( ( wss == 1 ) && ( ps->num_channels == 1 ) )
937 : {
938 0 : coeff = cross_correlation_self( signal, i + offset, fixed_pos + offset, corr_len );
939 : }
940 : else
941 : {
942 309960 : coeff = cross_correlation_subsampled_self( signal, i + offset, fixed_pos + offset, corr_len, wss * ps->num_channels );
943 : }
944 :
945 : /* update max corr */
946 309960 : if ( ps->scale < 100 )
947 : {
948 : /* shrinking: prefer greater synchpos for equal coeff */
949 92160 : if ( coeff >= coeff_max )
950 : {
951 13047 : coeff_max = coeff;
952 13047 : *synchpos = i;
953 : }
954 : }
955 : else
956 : {
957 : /* extending: prefer smaller synchpos for equal coeff */
958 217800 : if ( coeff > coeff_max )
959 : {
960 20832 : coeff_max = coeff;
961 20832 : *synchpos = i;
962 : }
963 : }
964 : }
965 : /* backup old search range */
966 1857 : s_start_old = s_start;
967 1857 : s_len_old = inlen;
968 :
969 1857 : css = css / 2;
970 1857 : inlen = inlen / 2;
971 1857 : s_start = *synchpos - inlen / 2;
972 1857 : if ( s_start < s_start_old )
973 : {
974 525 : s_start = s_start_old;
975 : }
976 1857 : if ( ( s_start + inlen ) > ( s_start_old + s_len_old ) )
977 : {
978 432 : inlen = s_start_old - s_start + s_len_old;
979 : }
980 1857 : } while ( css > 2 );
981 :
982 1857 : return 0;
983 : }
984 :
985 :
986 : /*
987 : ********************************************************************************
988 : *
989 : * Function : find_synch
990 : * Tables : <none>
991 : * Compile Defines : <none>
992 : * Return : 0 on success, 1 on failure
993 : * Information : Find the best match of an template segment within
994 : * a search region by similarity measures.
995 : *
996 : * Typical example:
997 : *
998 : * 0 10 20 30 40 50 60
999 : * in[] = abcdefghijk_abcdefghijk_abcdefghijk_abcdEFGHIJk_abcdefghijk_a
1000 : * l_in = 61
1001 : * offset = 30 |
1002 : * s_start = -20 <-------------------|
1003 : * s_len = 15 <-------------> |
1004 : * search range: *************** |
1005 : * fixed_pos = 10 |--------->
1006 : * corr_len = 6 | <---->
1007 : * template segment: | ******
1008 : * synch_pos: -14 <-------------|
1009 : *
1010 : * All positions are given relative to offset. The
1011 : * search region starts at offset+s_start and ends
1012 : * at offset+s_start+s_len. The template segment
1013 : * starts at offset+fixed_pos and ends at
1014 : * offset+fixed_pos+corr_len. For correlation, the
1015 : * template segment (EFGHIJ) is matched against the
1016 : * segment in the search region, e.g., against (k_abcd)
1017 : * in the first search position. The search position
1018 : * with the best match (-14: EFGHIJ <-> efghij) is
1019 : * returned.
1020 : *
1021 : * 19-JUN-03 N.Faerber initial version
1022 : * 23-APR-04 S.Doehla added subsampling
1023 : *
1024 : ********************************************************************************
1025 : */
1026 1857 : static bool find_synch(
1027 : apa_state_t *ps,
1028 : const float *in,
1029 : uint16_t l_in,
1030 : int16_t s_start,
1031 : uint16_t s_len,
1032 : int16_t fixed_pos,
1033 : uint16_t corr_len,
1034 : uint16_t offset,
1035 : float *energy,
1036 : float *quality,
1037 : int16_t *synch_pos )
1038 : {
1039 1857 : assert( ( corr_len - 1 + s_start + s_len - 1 + offset ) < l_in );
1040 1857 : assert( ( corr_len - 1 + fixed_pos + offset ) < l_in );
1041 :
1042 : /* pass last pitch to search function as prediction value */
1043 1857 : *synch_pos = ps->last_pitch;
1044 :
1045 1857 : logarithmic_search( ps, in, s_start, s_len, offset, fixed_pos, corr_len, ps->wss, ps->css, synch_pos );
1046 :
1047 : /* assert synch_pos is cleanly divisible by number of channels */
1048 1857 : assert( *synch_pos % ps->num_channels == 0 );
1049 :
1050 1857 : *quality = 0;
1051 1857 : get_scaling_quality( ps, in, l_in, offset, corr_len, (uint16_t) abs( fixed_pos - *synch_pos ), energy, quality );
1052 :
1053 1857 : ps->last_pitch = *synch_pos;
1054 :
1055 1857 : return 0;
1056 : }
1057 :
1058 :
1059 : /*
1060 : ********************************************************************************
1061 : *
1062 : * Function : copy_frm
1063 : * Tables : <none>
1064 : * Compile Defines : <none>
1065 : * Return : 0 on success, 1 on failure
1066 : * Information : Copy an audio.
1067 : *
1068 : * The frame size is fixed to ps->l_frm. The input data
1069 : * is stored in frm_in[], where the first ps->l_frm samples
1070 : * shall include the previous output frame and the second
1071 : * ps->l_frm samples shall contain the current input frame.
1072 : * The output frame is stored in frm_out[] and contains
1073 : * l_frm_out = ps->l_frm.
1074 : *
1075 : * The first ps->l_frm input samples are not used by
1076 : * this function and are only provided for a consistent
1077 : * function call with shrink_frm() and extend_frm().
1078 : *
1079 : ********************************************************************************
1080 : */
1081 71442 : static bool copy_frm(
1082 : apa_state_t *ps,
1083 : const float frm_in[],
1084 : float frm_out[],
1085 : uint16_t *l_frm_out )
1086 : {
1087 : uint16_t i;
1088 :
1089 : /* only 2nd input frame is used */
1090 71442 : frm_in += ps->l_frm;
1091 :
1092 : /* copy frame */
1093 157047762 : for ( i = 0; i < ps->l_frm; i++ )
1094 : {
1095 156976320 : frm_out[i] = frm_in[i];
1096 : }
1097 :
1098 : /* set output length */
1099 71442 : *l_frm_out = ps->l_frm;
1100 :
1101 71442 : return 0;
1102 : }
1103 :
1104 :
1105 : /*
1106 : ********************************************************************************
1107 : *
1108 : * Function : shrink_frm
1109 : * Tables : <none>
1110 : * Compile Defines : <none>
1111 : * Return : 0 on success, 1 on failure
1112 : * Information : Shrink the length of an audio frame using the WSOLA
1113 : * algorithm.
1114 : *
1115 : * The frame size is fixed to ps->l_frm. The input data
1116 : * is stored in frm_in[], where the first ps->l_frm samples
1117 : * shall include the previous output frame and the second
1118 : * ps->l_frm samples shall contain the current input frame.
1119 : * The output frame is stored in frm_out[] and contains
1120 : * l_frm_out samples. The amount of shrinking is signal
1121 : * dependent.
1122 : *
1123 : * The first ps->l_frm input samples are not used by
1124 : * this function and are only provided for a consistent
1125 : * function call with extend_frm().
1126 : *
1127 : ********************************************************************************
1128 : */
1129 768 : static bool shrink_frm(
1130 : apa_state_t *ps,
1131 : const float frm_in[],
1132 : uint16_t maxScaling,
1133 : float frm_out[],
1134 : uint16_t *l_frm_out )
1135 : {
1136 768 : bool findSynchResult = 0;
1137 : int16_t xtract, l_rem, s_start, s_end;
1138 : uint16_t i;
1139 : uint16_t over;
1140 768 : float energy, quality = 0.0f;
1141 : uint16_t l_frm;
1142 : uint16_t l_seg;
1143 :
1144 768 : l_frm = ps->l_frm;
1145 768 : l_seg = ps->l_seg;
1146 :
1147 : /* only 2nd input frame is used */
1148 768 : frm_in += l_frm;
1149 :
1150 : /* set search range */
1151 768 : s_start = ( ps->p_min / ps->num_channels ) * ps->num_channels;
1152 768 : s_end = s_start + ps->l_search;
1153 768 : if ( ( s_end + l_seg ) >= l_frm )
1154 : {
1155 768 : s_end = ( l_frm - l_seg );
1156 : }
1157 :
1158 : /* calculate overlap position */
1159 768 : if ( isSilence( frm_in, l_seg, 10 ) )
1160 : {
1161 : /* maximum scaling */
1162 0 : energy = -65;
1163 0 : quality = 5;
1164 0 : if ( ps->evs_compat_mode == false )
1165 : {
1166 :
1167 0 : xtract = maxScaling;
1168 : /* take samples already in the renderer buf into account */
1169 0 : xtract += ps->l_r_buf;
1170 : /* snap to renderer time slot borders */
1171 0 : xtract -= ( ps->l_ts - ( l_frm - xtract + ps->l_r_buf ) % ps->l_ts );
1172 0 : while ( xtract < 0 )
1173 : {
1174 0 : xtract += ps->l_ts;
1175 : }
1176 0 : while ( xtract > ( s_end - ps->num_channels ) )
1177 : {
1178 : /* exceeded the possible shrinking, go back one renderer ts*/
1179 0 : xtract -= ps->l_ts;
1180 : }
1181 : }
1182 0 : else if ( maxScaling != 0U && s_end > maxScaling + 1 )
1183 : {
1184 0 : xtract = maxScaling;
1185 : }
1186 : else
1187 : {
1188 : /* set to last valid element (i.e. element[len - 1] but note for stereo last element is last pair of samples) */
1189 0 : xtract = s_end - ps->num_channels;
1190 : }
1191 : }
1192 : else
1193 : {
1194 : /* find synch */
1195 768 : findSynchResult = find_synch( ps, frm_in, l_frm, s_start, (uint16_t) ( s_end - s_start ), 0, l_seg, 0, &energy, &quality, &xtract );
1196 : }
1197 :
1198 : /* assert synch_pos is cleanly divisible by number of channels */
1199 768 : assert( xtract % ps->num_channels == 0 );
1200 :
1201 : /* set frame overlappable - reset if necessary */
1202 768 : over = 1;
1203 :
1204 : /* test whether frame has sufficient quality */
1205 768 : if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
1206 : {
1207 : /* not sufficient */
1208 618 : over = 0;
1209 618 : if ( ps->bad_frame_count < ps->qualityred )
1210 : {
1211 189 : ++ps->bad_frame_count;
1212 : }
1213 618 : if ( ps->good_frame_count > 0U )
1214 : {
1215 159 : --ps->good_frame_count;
1216 : }
1217 : }
1218 : else
1219 : {
1220 : /* sufficient quality */
1221 150 : if ( ps->bad_frame_count > 0U )
1222 : {
1223 150 : --ps->bad_frame_count;
1224 : }
1225 150 : if ( ps->good_frame_count < ps->qualityrise )
1226 : {
1227 150 : ++ps->good_frame_count;
1228 : }
1229 : }
1230 :
1231 : /* Calculate output data */
1232 768 : if ( over && xtract )
1233 : {
1234 150 : if ( findSynchResult == 1 )
1235 : {
1236 0 : return 1;
1237 : }
1238 150 : if ( ps->evs_compat_mode == true )
1239 : {
1240 0 : overlapAddEvs( frm_in, frm_in + xtract, frm_out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1241 : }
1242 : else
1243 : {
1244 150 : overlapAdd( frm_in, frm_in + xtract, frm_out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1245 : }
1246 : }
1247 : else
1248 : {
1249 618 : xtract = 0;
1250 659658 : for ( i = 0; i < l_seg; i++ )
1251 : {
1252 659040 : frm_out[i] = frm_in[i];
1253 : }
1254 : }
1255 :
1256 : /* append remaining samples */
1257 768 : l_rem = l_frm - xtract - l_seg;
1258 722142 : for ( i = 0; i < l_rem; i++ )
1259 : {
1260 721374 : frm_out[l_seg + i] = frm_in[l_frm - l_rem + i];
1261 : }
1262 :
1263 : /* set output length */
1264 768 : *l_frm_out = l_seg + l_rem;
1265 :
1266 768 : return 0;
1267 : }
1268 :
1269 :
1270 : /*
1271 : ********************************************************************************
1272 : *
1273 : * Function : extend_frm
1274 : * Tables : <none>
1275 : * Compile Defines : <none>
1276 : * Return : 0 on success, 1 on failure
1277 : * Information : Extend the length of an audio frame using the WSOLA
1278 : * algorithm.
1279 : *
1280 : * The frame size is fixed to ps->l_frm. The input data
1281 : * is stored in frm_in[], where the first ps->l_frm samples
1282 : * shall include the previous output frame and the second
1283 : * ps->l_frm samples shall contain the current input frame.
1284 : * The output frame is stored in frm_out[] and contains
1285 : * l_frm_out samples. The amount of extension is signal
1286 : * dependent.
1287 : *
1288 : ********************************************************************************
1289 : */
1290 1089 : static bool extend_frm(
1291 : apa_state_t *ps,
1292 : const float frm_in[],
1293 : float frm_out[],
1294 : uint16_t *l_frm_out )
1295 : {
1296 1089 : bool findSynchResult = 0;
1297 : uint16_t l_frm_out_target;
1298 : uint16_t n, i;
1299 : int16_t N;
1300 : int16_t s[MAXN + 2], s_max, s_min;
1301 : int16_t xtract[MAXN + 2], sync_start, s_end;
1302 : uint16_t over[MAXN + 2];
1303 : int16_t l_rem;
1304 1089 : int16_t s_start = 0;
1305 1089 : float energy, quality = 0.0f;
1306 : uint16_t l_frm, l_seg;
1307 : const float *fadeOut, *fadeIn;
1308 : float *out;
1309 :
1310 :
1311 1089 : l_frm = ps->l_frm;
1312 1089 : l_seg = ps->l_seg;
1313 :
1314 : /* number of segments/iterations */
1315 1089 : l_frm_out_target = (uint16_t) ( (float) l_frm * 1.5f );
1316 1089 : N = ( l_frm_out_target / l_seg ) - 1;
1317 1089 : if ( N < 1 )
1318 : {
1319 0 : N = 1;
1320 : }
1321 1089 : if ( N > MAXN )
1322 : {
1323 0 : return 1;
1324 : }
1325 : /* calculate equally spaced search regions */
1326 : /* s[n] are given relative to 2nd frame and point to the start of */
1327 : /* the search region. The first segment (n=1) will not be moved. */
1328 : /* Hence, the iterations will start with n=2. */
1329 1089 : s_min = -( ps->l_search ) - ( ps->p_min );
1330 : /* (make sure not to exceed array dimension) */
1331 1089 : if ( l_frm + s_min < 0 )
1332 : {
1333 0 : s_min = -( l_frm );
1334 : }
1335 1089 : s_max = l_frm - 2 * l_seg - ps->l_search;
1336 1089 : if ( s_max < s_min )
1337 : {
1338 0 : N = 1;
1339 : }
1340 : /* for just one segment start at s_min */
1341 1089 : if ( N == 1 )
1342 : {
1343 0 : s[2] = s_min;
1344 : }
1345 : /* else, spread linear in between s_min and s_max */
1346 : /* (including s_min and s_max) */
1347 : else
1348 : {
1349 3267 : for ( n = 2; n <= ( N + 1 ); n++ )
1350 : {
1351 2178 : s[n] = s_min + ( ( s_max - s_min ) * ( n - 2 ) ) / ( N - 1 );
1352 : }
1353 : }
1354 :
1355 : /*
1356 : * Planning Phase
1357 : */
1358 :
1359 1089 : xtract[1] = -( l_seg ); /* make sync_start=0 in 1st iteration */
1360 1089 : n = 2;
1361 :
1362 : /* define synch segment (to be correlated with search region) */
1363 1089 : sync_start = xtract[n - 1] + l_seg;
1364 1089 : over[n] = 1; /* will be reset if overlap is not required */
1365 : /* check end of search region: should be at least p_min */
1366 : /* samples on the left of synch_start */
1367 1089 : if ( ( s[n] + ps->l_search ) < ( sync_start - ( ps->p_min ) ) )
1368 : {
1369 0 : s_start = s[n];
1370 0 : s_end = s_start + ps->l_search;
1371 : }
1372 : else
1373 : {
1374 : /* shrink search region to enforce minimum shift */
1375 1089 : s_end = sync_start - ( ps->p_min );
1376 1089 : if ( s[n] + ps->l_search < sync_start )
1377 : {
1378 1089 : s_start = s[n]; /* just do it with normal start position */
1379 : }
1380 0 : else if ( n == ( N + 1 ) ) /* move search region left for last segment */
1381 : {
1382 0 : s_start = s_end - ( ps->l_search - ps->p_min );
1383 : }
1384 : else
1385 : {
1386 0 : over[n] = 0; /* don't search/overlap (just copy down) */
1387 : }
1388 : }
1389 :
1390 1089 : if ( over[n] )
1391 : {
1392 : /* calculate overlap position */
1393 1089 : if ( isSilence( frm_in, l_seg, 10 ) )
1394 : {
1395 : /* maximum scaling */
1396 0 : energy = -65;
1397 0 : quality = 5;
1398 0 : xtract[n] = s_start + ps->num_channels;
1399 0 : if ( ps->evs_compat_mode == false )
1400 : {
1401 : /* take renderer buffer samples into accout */
1402 0 : xtract[n] += ps->l_r_buf;
1403 : /* snap to next renderer time slot border to resynchronize */
1404 0 : xtract[n] -= ( ( N - 1 ) * l_seg - xtract[n] + ps->l_r_buf ) % ps->l_ts;
1405 : }
1406 : }
1407 : else
1408 : {
1409 : /* find synch */
1410 1089 : findSynchResult = find_synch( ps, frm_in, 2 * l_frm, s_start, s_end - s_start, sync_start, l_seg, l_frm, &energy, &quality, &xtract[n] );
1411 : }
1412 : /* assert synch_pos is cleanly divisible by number of channels */
1413 1089 : assert( xtract[n] % ps->num_channels == 0 );
1414 :
1415 : /* test for sufficient quality */
1416 1089 : if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
1417 : {
1418 : /* not sufficient */
1419 927 : over[n] = 0;
1420 927 : xtract[n] = sync_start;
1421 927 : if ( ps->bad_frame_count < ps->qualityred )
1422 : {
1423 369 : ++ps->bad_frame_count;
1424 : }
1425 927 : if ( ps->good_frame_count > 0U )
1426 : {
1427 60 : --ps->good_frame_count;
1428 : }
1429 : }
1430 : else
1431 : {
1432 : /* sufficient quality */
1433 162 : if ( ps->bad_frame_count > 0U )
1434 : {
1435 153 : --ps->bad_frame_count;
1436 : }
1437 162 : if ( ps->good_frame_count < ps->qualityrise )
1438 : {
1439 162 : ++ps->good_frame_count;
1440 : }
1441 : }
1442 1089 : if ( findSynchResult )
1443 : {
1444 0 : return 1;
1445 : }
1446 : }
1447 : else
1448 : {
1449 0 : xtract[n] = sync_start;
1450 : }
1451 :
1452 :
1453 : /* Calculate output data */
1454 2178 : for ( n = 2; n <= N; n++ )
1455 : {
1456 1089 : if ( over[n] && xtract[n - 1] + l_seg != xtract[n] )
1457 : {
1458 : /* mix 2nd half of previous segment with 1st half of current segment */
1459 162 : fadeOut = frm_in + l_frm + xtract[n - 1] + l_seg;
1460 162 : fadeIn = frm_in + l_frm + xtract[n];
1461 162 : out = frm_out + ( n - 2 ) * l_seg;
1462 162 : if ( ps->evs_compat_mode == true )
1463 : {
1464 0 : overlapAddEvs( fadeOut, fadeIn, out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1465 : }
1466 : else
1467 : {
1468 162 : overlapAdd( fadeOut, fadeIn, out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1469 : }
1470 : }
1471 : else
1472 : {
1473 : /* just copy down 1st half of current segment (= 2nd half of previous segment) */
1474 : float *frm_out_ptr;
1475 : const float *frm_in_ptr;
1476 927 : frm_out_ptr = &( frm_out[( n - 2 ) * l_seg] );
1477 927 : frm_in_ptr = &( frm_in[l_frm + xtract[n]] );
1478 1048287 : for ( i = 0; i < l_seg; i++ )
1479 : {
1480 1047360 : frm_out_ptr[i] = frm_in_ptr[i];
1481 : }
1482 : }
1483 : }
1484 :
1485 : /* append remaining samples */
1486 1089 : l_rem = l_frm - ( xtract[N] + l_seg );
1487 1442289 : for ( i = 0; i < l_rem; i++ )
1488 : {
1489 1441200 : frm_out[( N - 1 ) * l_seg + i] = frm_in[2 * l_frm - l_rem + i];
1490 : }
1491 :
1492 : /* set output length */
1493 1089 : *l_frm_out = ( N - 1 ) * l_seg + l_rem;
1494 :
1495 1089 : return 0;
1496 : }
|