Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : /*! @file jbm_pcmdsp_apa.c Adaptive Playout for Audio (apa). */
38 :
39 : /* system headers */
40 : #include <assert.h>
41 : #include <math.h>
42 : #include <stdlib.h>
43 : #include <stdio.h>
44 : #include <stdint.h>
45 : #include "options.h"
46 : #include "prot.h"
47 : #ifdef DEBUGGING
48 : #include "debug.h"
49 : #endif
50 : #include "wmc_auto.h"
51 : /* local headers */
52 : #include "jbm_pcmdsp_apa.h"
53 : #include "jbm_pcmdsp_similarityestimation.h"
54 : #include "jbm_pcmdsp_window.h"
55 : #include "cnst.h"
56 :
57 :
58 : /*---------------------------------------------------------------------*
59 : * Local state structure
60 : *---------------------------------------------------------------------*/
61 :
62 : /* maximum number of segments/iterations in extend_frm() */
63 : #define MAXN 10
64 :
65 : /* definition of state struct */
66 : struct apa_state_t
67 : {
68 : /* output buffer */
69 : bool evs_compat_mode;
70 : float *buf_out;
71 : uint16_t buf_out_capacity;
72 : uint16_t l_buf_out;
73 :
74 : /* Hann window */
75 : float win[APA_BUF_PER_CHANNEL];
76 : uint16_t l_halfwin;
77 :
78 : /* sampling rate [Hz] */
79 : uint16_t rate;
80 :
81 : /* length of a segment [samples] */
82 : uint16_t l_seg;
83 :
84 : /* length of a frame [samples] */
85 : uint16_t l_frm;
86 :
87 : /* total number of processed input samples since apa_reset() */
88 : uint32_t l_in_total;
89 :
90 : /* time resolution in samples of the IVAS renderer*/
91 : uint16_t l_ts;
92 :
93 : /* samples already available in the renderer buffer */
94 : uint16_t l_r_buf;
95 :
96 : /* sum of inserted/removed samples since last apa_set_scale() */
97 : int32_t diffSinceSetScale;
98 : /* number of input frames since last apa_set_scale() */
99 : uint32_t nFramesSinceSetScale;
100 :
101 : /* current and previous scaling ratio [%] */
102 : uint16_t scale;
103 :
104 : /* minimum pitch length [samples] */
105 : uint16_t p_min;
106 :
107 : /* search length [samples] */
108 : uint16_t l_search;
109 :
110 : uint16_t wss; /* waveform subsampling per channel */
111 : uint16_t css; /* correlation subsampling per channel */
112 :
113 : float targetQuality;
114 : uint16_t qualityred; /* quality reduction threshold */
115 : uint16_t qualityrise; /* quality rising for adaptive quality thresholds */
116 :
117 : uint16_t last_pitch; /* last pitch/sync position */
118 : uint16_t bad_frame_count; /* # frames before quality threshold is lowered */
119 : uint16_t good_frame_count; /* # scaled frames */
120 :
121 : uint16_t num_channels; /* number of input/output channels */
122 : };
123 :
124 :
125 : /*---------------------------------------------------------------------*
126 : * Local function prototypes
127 : *---------------------------------------------------------------------*/
128 :
129 : static float apa_corrEnergy2dB( float energy, uint16_t corr_len );
130 :
131 : static float apa_getQualityIncreaseForLowEnergy( float energydB );
132 :
133 : static bool logarithmic_search( const apa_state_t *ps, const float *signal, int16_t s_start, uint16_t inlen, uint16_t offset, uint16_t fixed_pos, uint16_t corr_len, uint16_t wss, uint16_t css, int16_t *synchpos );
134 :
135 : static bool find_synch( apa_state_t *ps, const float *in, uint16_t l_in, int16_t s_start, uint16_t s_len, int16_t fixed_pos, uint16_t corr_len, uint16_t offset, float *energy, float *quality, int16_t *synch_pos );
136 :
137 : static bool copy_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
138 :
139 : static bool shrink_frm( apa_state_t *ps, const float frm_in[], uint16_t maxScaling, float frm_out[], uint16_t *l_frm_out );
140 :
141 : static bool extend_frm( apa_state_t *ps, const float frm_in[], float frm_out[], uint16_t *l_frm_out );
142 :
143 : /*---------------------------------------------------------------------*
144 : * Public functions
145 : *---------------------------------------------------------------------*/
146 :
147 : /* Allocates memory for state struct and initializes elements. */
148 105 : ivas_error apa_init(
149 : apa_state_t **pps,
150 : const int32_t num_channels )
151 : {
152 105 : apa_state_t *ps = NULL;
153 :
154 : /* make sure pointer is valid */
155 105 : if ( !pps )
156 : {
157 0 : return 1;
158 : }
159 :
160 : /* allocate state struct */
161 105 : if ( ( ps = (apa_state_t *) malloc( sizeof( apa_state_t ) ) ) == NULL )
162 : {
163 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for JBM\n" ) );
164 : }
165 :
166 105 : ps->num_channels = (uint16_t) num_channels;
167 105 : ps->buf_out_capacity = (uint16_t) ( APA_BUF_PER_CHANNEL * num_channels );
168 105 : if ( ( ps->buf_out = malloc( sizeof( float ) * ps->buf_out_capacity ) ) == NULL )
169 : {
170 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for JBM\n" ) );
171 : }
172 :
173 105 : ps->evs_compat_mode = false;
174 :
175 105 : apa_reset( ps );
176 105 : *pps = ps;
177 :
178 105 : return IVAS_ERR_OK;
179 : }
180 :
181 :
182 : /* Sets state variables to initial value. */
183 210 : void apa_reset(
184 : apa_state_t *ps )
185 : {
186 : /* init state struct */
187 210 : ps->l_buf_out = 0;
188 210 : ps->l_halfwin = 0;
189 210 : ps->rate = 0;
190 210 : ps->l_seg = 0;
191 210 : ps->l_frm = 0;
192 210 : ps->l_in_total = 0;
193 210 : ps->diffSinceSetScale = 0;
194 210 : ps->nFramesSinceSetScale = 0;
195 210 : ps->scale = 100;
196 210 : ps->p_min = 0;
197 210 : ps->l_search = 0;
198 210 : ps->wss = 1;
199 210 : ps->css = 1;
200 210 : ps->targetQuality = 0.0f;
201 210 : ps->qualityred = 0;
202 210 : ps->qualityrise = 0;
203 210 : ps->last_pitch = 0;
204 210 : ps->bad_frame_count = 0;
205 210 : ps->good_frame_count = 0;
206 :
207 210 : ps->l_ts = 1;
208 210 : ps->l_r_buf = 0;
209 210 : return;
210 : }
211 :
212 3303 : uint8_t apa_reconfigure(
213 : apa_state_t *ps,
214 : uint16_t num_channels,
215 : uint16_t l_ts )
216 : {
217 :
218 : /* realloc buffer */
219 3303 : free( ps->buf_out );
220 3303 : ps->num_channels = (uint16_t) num_channels;
221 3303 : ps->buf_out_capacity = (uint16_t) ( APA_BUF_PER_CHANNEL * num_channels );
222 3303 : ps->buf_out = (float *) malloc( sizeof( float ) * ps->buf_out_capacity );
223 3303 : if ( !ps->buf_out )
224 : {
225 0 : return 2;
226 : }
227 3303 : ps->l_buf_out = 0;
228 3303 : ps->l_in_total = 0;
229 3303 : ps->l_ts = ps->num_channels * l_ts;
230 :
231 : /* set everything else dependent on the number of channels */
232 : /* set segment size */
233 : /* in the order of a pitch, set to 160 samples at 16 kHz */
234 : /* used for windowing and as the correlation length, i.e., */
235 : /* the size of the template segment. */
236 3303 : ps->l_seg = ( ps->rate / 100 ) * ps->num_channels;
237 :
238 : /* set frame size */
239 : /* set to 320 samples at 16 kHz */
240 3303 : ps->l_frm = ( ps->rate / FRAMES_PER_SEC ) * ps->num_channels;
241 :
242 : /* set minimum pitch */
243 : /* set to 40 samples at 16 kHz */
244 : /* (defines min change in number of samples, i.e., abs(l_in-l_out) >= p_min) */
245 3303 : ps->p_min = ( ps->rate / 400 ) * ps->num_channels;
246 :
247 : /* set search length */
248 : /* must cover one pitch, set to 200 samples at 16 kHz */
249 : /* (the resulting maximum pitch is then p_min+l_search = 240 samples at 16 kHz) */
250 3303 : ps->l_search = ( ps->rate / 80 ) * ps->num_channels;
251 :
252 3303 : return 0;
253 : }
254 :
255 :
256 : /* Sets the audio configuration. */
257 105 : bool apa_set_rate(
258 : apa_state_t *ps,
259 : const int32_t output_Fs )
260 : {
261 : /* make sure pointer is valid */
262 105 : if ( ps == NULL )
263 : {
264 0 : return 1;
265 : }
266 :
267 : /* check range */
268 105 : if ( ( output_Fs < APA_MIN_RATE ) || ( output_Fs > APA_MAX_RATE ) )
269 : {
270 0 : return 1;
271 : }
272 :
273 : /* reset state struct */
274 105 : apa_reset( ps );
275 :
276 : /* copy rate to state struct */
277 105 : ps->rate = (uint16_t) output_Fs;
278 :
279 105 : if ( ps->num_channels > APA_MAX_NUM_CHANNELS )
280 : {
281 0 : return 1;
282 : }
283 :
284 : /*
285 : * several other parameters depend on the sampling rate
286 : * and are set below. Some "magic numbers" are used here
287 : * which are based on typical values of a "pitch" in
288 : * human voice. The pitch length is the period of the
289 : * base frequency and is usually assumed to be 40-240
290 : * samples at 16 kHz.
291 : */
292 :
293 : /* set segment size */
294 : /* in the order of a pitch, set to 160 samples at 16 kHz */
295 : /* used for windowing and as the correlation length, i.e., */
296 : /* the size of the template segment. */
297 105 : ps->l_seg = ( ps->rate / 100 ) * ps->num_channels;
298 :
299 : /* init Hann window */
300 : /* Note: l_win < APA_BUF_PER_CHANNEL is required */
301 : /* Length of Hann window should be independent of
302 : * number of channels - same window applied to all channels */
303 105 : ps->l_halfwin = ps->rate / 100;
304 105 : hannWindow( ps->l_halfwin * 2, ps->win );
305 :
306 : /* set frame size */
307 : /* set to 320 samples at 16 kHz */
308 105 : ps->l_frm = ( ps->rate / FRAMES_PER_SEC ) * ps->num_channels;
309 :
310 : /* set minimum pitch */
311 : /* set to 40 samples at 16 kHz */
312 : /* (defines min change in number of samples, i.e., abs(l_in-l_out) >= p_min) */
313 105 : ps->p_min = ( ps->rate / 400 ) * ps->num_channels;
314 :
315 : /* set search length */
316 : /* must cover one pitch, set to 200 samples at 16 kHz */
317 : /* (the resulting maximum pitch is then p_min+l_search = 240 samples at 16 kHz) */
318 105 : ps->l_search = ( ps->rate / 80 ) * ps->num_channels;
319 :
320 105 : return 0;
321 : }
322 :
323 :
324 : /* Set scaling. */
325 76596 : bool apa_set_scale(
326 : apa_state_t *ps,
327 : uint16_t scale )
328 : {
329 : /* make sure pointer is valid */
330 76596 : if ( ps == NULL )
331 : {
332 0 : return 1;
333 : }
334 :
335 : /* check range */
336 76596 : if ( ( scale < APA_MIN_SCALE ) || ( scale > APA_MAX_SCALE ) )
337 : {
338 0 : return 1;
339 : }
340 :
341 : /* do nothing if same scale is set multiple times */
342 : /* (otherwise scale control is confused) */
343 76596 : if ( ps->scale == scale )
344 : {
345 76224 : return 0;
346 : }
347 :
348 : /* copy to state struct */
349 372 : ps->scale = scale;
350 :
351 : /* reset scaling statistics */
352 372 : ps->diffSinceSetScale = 0;
353 372 : ps->nFramesSinceSetScale = 0;
354 :
355 372 : return 0;
356 : }
357 :
358 105 : bool apa_set_renderer_granularity(
359 : apa_state_t *ps,
360 : uint16_t l_ts )
361 : {
362 : /* make sure pointer is valid */
363 105 : if ( ps == NULL )
364 : {
365 0 : return 1;
366 : }
367 :
368 :
369 : /* copy to state struct */
370 105 : ps->l_ts = l_ts * ps->num_channels;
371 105 : return 0;
372 : }
373 :
374 76596 : bool apa_set_renderer_residual_samples(
375 : apa_state_t *ps,
376 : uint16_t l_r_buf )
377 : {
378 : /* make sure pointer is valid */
379 76596 : if ( ps == NULL )
380 : {
381 0 : return 1;
382 : }
383 :
384 :
385 : /* copy to state struct */
386 76596 : ps->l_r_buf = l_r_buf * ps->num_channels;
387 76596 : return 0;
388 : }
389 :
390 0 : bool apa_set_evs_compat_mode(
391 : apa_state_t *ps,
392 : bool mode )
393 : {
394 : /* make sure pointer is valid */
395 0 : if ( ps == NULL )
396 : {
397 0 : return 1;
398 : }
399 :
400 0 : ps->evs_compat_mode = mode;
401 :
402 0 : return 0;
403 : }
404 :
405 : /*
406 : ********************************************************************************
407 : *
408 : * Function : apa_set_quality
409 : * Tables : <none>
410 : * Compile Defines : <none>
411 : * Return : 0 on success, 1 on failure
412 : * Information : Set quality thresholds.
413 : *
414 : * quality is lower limit for minimum quality
415 : * Range is [-2;2] - where positive values allow
416 : * only pasting with same phase information
417 : * Negative values would yield cross phased pasting
418 : *
419 : * qualityred allows dynamic lowering of lower quality
420 : * bound - this gives better results for rhythmic signals
421 : * Range is [0;20], meaning 0.1 lowering*qualityred
422 : *
423 : * undocumented: qualityrise (same as qualityred - other
424 : * direction)
425 : *
426 : ********************************************************************************
427 : */
428 105 : bool apa_set_quality(
429 : apa_state_t *ps,
430 : float quality,
431 : uint16_t qualityred,
432 : uint16_t qualityrise )
433 : {
434 105 : assert( ps != NULL );
435 105 : assert( -2.0f <= quality && quality <= 3.1f );
436 105 : assert( qualityred > 0 && qualityred <= 20 );
437 105 : assert( qualityrise > 0 && qualityrise <= 20 );
438 :
439 105 : ps->targetQuality = quality;
440 105 : ps->qualityred = qualityred;
441 105 : ps->qualityrise = qualityrise;
442 105 : ps->bad_frame_count = 0;
443 105 : ps->good_frame_count = 0;
444 :
445 105 : return 0;
446 : }
447 :
448 : /*
449 : ********************************************************************************
450 : *
451 : * Function : apa_set_complexity_options
452 : * Tables : <none>
453 : * Compile Defines : <none>
454 : * Return : 0 on success, 1 on failure
455 : * Information : Set complexity options
456 : * Waveform subsampling computes the correlation function
457 : * for certain positions only
458 : * Correlation function subsampling computes the maxima
459 : * for certain positions only
460 : *
461 : ********************************************************************************
462 : */
463 105 : bool apa_set_complexity_options(
464 : apa_state_t *ps,
465 : uint16_t wss,
466 : uint16_t css )
467 : {
468 : /* make sure pointer is valid */
469 105 : if ( ps == NULL )
470 : {
471 0 : return 1;
472 : }
473 :
474 105 : if ( wss == 0 || wss > 1000 )
475 : {
476 0 : return 1;
477 : }
478 :
479 105 : if ( css == 0 || css > 1000 )
480 : {
481 0 : return 1;
482 : }
483 :
484 105 : ps->wss = wss;
485 105 : ps->css = css;
486 :
487 105 : return 0;
488 : }
489 :
490 : /*
491 : ********************************************************************************
492 : *
493 : * Function : apa_exit
494 : * Tables : <none>
495 : * Compile Defines : <none>
496 : * Return : 0 on success, 1 on failure
497 : * Information : The memory used for storing the state is freed.
498 : * The state struct pointer is set to NULL.
499 : *
500 : ********************************************************************************
501 : */
502 1884 : bool apa_exit(
503 : apa_state_t **pps )
504 : {
505 : /* ignore NULL pointer input */
506 1884 : if ( *pps == NULL )
507 : {
508 1779 : return 0;
509 : }
510 :
511 : /* deallocate state struct members */
512 105 : free( ( *pps )->buf_out );
513 :
514 : /* deallocate state struct */
515 105 : free( *pps );
516 :
517 : /* set pointer to NULL */
518 105 : *pps = NULL;
519 :
520 105 : return 0;
521 : }
522 :
523 : /*
524 : ********************************************************************************
525 : *
526 : * Function : apa_exec
527 : * Tables : <none>
528 : * Compile Defines : <none>
529 : * Return : 0 on success, 1 on failure
530 : * Information : Execute adaptive playout for audio, i.e., audio scaling.
531 : * Will take l_in input samples from a_in[] and
532 : * try to extend/shrink the amount of samples according
533 : * to the last scaling set by using apa_set_scale().
534 : * The actual amount of samples after scaling may vary
535 : * and is given in l_out. The scaled audio samples
536 : * are contained in a_out[]. Note that the scaling is
537 : * achieved only in average. The input buffer must be
538 : * filled with 20ms audio. The output buffer must be
539 : * allocated externally and must be at least of size
540 : * APA_BUF.
541 : * Scaling can only be performed when a sampling rate
542 : * is specified using apa_set_rate(). Otherwise,
543 : * an error is returned.
544 : *
545 : * The amount of scaling is achieved by controlling the
546 : * frequency of scaling. Note that the exact amount of
547 : * scaling is signal dependent and is an integer
548 : * multiple of a pitch. Hence, when we want to achieve
549 : * a scaling of e.g. 110% then the APA module will typically
550 : * forward several frames without any modification and
551 : * then scale one frame by a higher amount, e.g. 143%.
552 : *
553 : ********************************************************************************
554 : */
555 76596 : uint8_t apa_exec(
556 : apa_state_t *ps, /* i/o: state struct */
557 : const float a_in[], /* i : input samples */
558 : uint16_t l_in, /* i : number of input samples */
559 : uint16_t maxScaling, /* i : allowed number of inserted/removed samples */
560 : float a_out[], /* o : output samples */
561 : uint16_t *l_out /* o : number of output samples */
562 : )
563 : {
564 : uint16_t i;
565 : float frm_in[APA_BUF]; /* NOTE: this buffer could be smaller if alocated dynamically based on the actual sampling rate and number of channels */
566 : uint16_t l_frm_out;
567 : int16_t l_rem;
568 : int32_t dl_scaled, dl_copied, l_frm_out_target;
569 : int32_t expScaling, actScaling;
570 : uint32_t statsResetThreshold, statsResetShift;
571 :
572 76596 : statsResetThreshold = 1637;
573 76596 : statsResetShift = 2;
574 :
575 : /* Convert max_scaling from "per channel" to total */
576 76596 : maxScaling *= ps->num_channels;
577 :
578 : /* make sure no invalid output is used */
579 76596 : *l_out = 0;
580 76596 : l_frm_out = 0;
581 :
582 : /* make sure pointer is valid */
583 76596 : if ( ps == NULL )
584 : {
585 0 : return 1;
586 : }
587 : /* check available rate */
588 76596 : if ( ps->rate == 0 )
589 : {
590 0 : return 2;
591 : }
592 : /* check size of input */
593 76596 : if ( l_in != ps->l_frm )
594 : {
595 0 : return 3;
596 : }
597 :
598 : /* get target length */
599 76596 : if ( ps->scale > 100 )
600 : {
601 1407 : expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) + 0.5f );
602 : }
603 75189 : else if ( ps->scale < 100 )
604 : {
605 804 : expScaling = (int32_t) ( ( ps->l_frm * ( ps->scale - 100.0f ) / 100.0f ) * ( ps->nFramesSinceSetScale + 1 ) - 0.5f );
606 : }
607 : else
608 : {
609 74385 : expScaling = 0;
610 : }
611 76596 : actScaling = ps->diffSinceSetScale - ps->l_frm;
612 76596 : l_frm_out_target = expScaling - actScaling;
613 :
614 : /* Wait until we have l_frm outputs samples */
615 : /* (required to search for correlation in the past). */
616 : /* If we don't have enough samples, simply copy input to output */
617 76596 : if ( ps->l_buf_out < ps->l_frm )
618 : {
619 8430288 : for ( i = 0; i < ps->l_frm; i++ )
620 : {
621 8426880 : a_out[i] = a_in[i];
622 : }
623 3408 : l_frm_out = ps->l_frm;
624 : }
625 : else
626 : {
627 73188 : float *buf_out_ptr = &( ps->buf_out[ps->l_buf_out - ps->l_frm] );
628 73188 : float *frm_in_ptr = &( frm_in[ps->l_frm] );
629 :
630 : /* fill input frame */
631 : /* 1st input frame: previous output samples */
632 160957668 : for ( i = 0; i < ps->l_frm; i++ )
633 : {
634 160884480 : frm_in[i] = buf_out_ptr[i];
635 : }
636 : /* 2nd input frame: new input samples */
637 160957668 : for ( i = 0; i < ps->l_frm; i++ )
638 : {
639 160884480 : frm_in_ptr[i] = a_in[i];
640 : }
641 : /* no scaling */
642 73188 : if ( ps->scale == 100 )
643 : {
644 71331 : copy_frm( ps, frm_in, a_out, &l_frm_out );
645 : }
646 : /* shrink */
647 1857 : else if ( ps->scale < 100 )
648 : {
649 768 : shrink_frm( ps, frm_in, maxScaling, a_out, &l_frm_out );
650 : }
651 : /* extend */
652 : else
653 : {
654 1089 : extend_frm( ps, frm_in, a_out, &l_frm_out );
655 : }
656 : /* control the amount/frequency of scaling */
657 73188 : if ( l_frm_out != ps->l_frm )
658 : {
659 312 : if ( maxScaling != 0U &&
660 312 : abs( (int16_t) ( ps->l_frm - l_frm_out ) ) > maxScaling )
661 : {
662 : /* maxScaling exceeded -> discard scaled frame */
663 102 : copy_frm( ps, frm_in, a_out, &l_frm_out );
664 : }
665 210 : else if ( abs( l_frm_out_target ) > ps->l_frm ) /* ignore small difference */
666 : {
667 168 : dl_copied = l_frm_out_target - (int32_t) ps->l_frm;
668 168 : dl_scaled = l_frm_out_target - (int32_t) l_frm_out;
669 : /* discard scaled frame if copied frame is closer to target length */
670 168 : if ( abs( dl_copied ) < abs( dl_scaled ) )
671 : {
672 9 : copy_frm( ps, frm_in, a_out, &l_frm_out );
673 : }
674 : }
675 : }
676 : }
677 :
678 : /* copy output to internal buffer */
679 : /* avoid buffer overflow: */
680 : /* discard old samples; always keep at least most recent l_frm samples */
681 76596 : if ( ( ps->l_buf_out + l_frm_out ) > ps->buf_out_capacity )
682 : {
683 29289 : float *buf_out_ptr1 = ps->buf_out;
684 : float *buf_out_ptr2;
685 :
686 29289 : l_rem = ( ps->l_frm - l_frm_out );
687 29289 : if ( l_rem < 0 )
688 : {
689 126 : l_rem = 0;
690 : }
691 29289 : buf_out_ptr2 = &( ps->buf_out[ps->l_buf_out - l_rem] );
692 38919 : for ( i = 0; i < l_rem; i++ )
693 : {
694 9630 : buf_out_ptr1[i] = buf_out_ptr2[i];
695 : }
696 29289 : ps->l_buf_out = l_rem;
697 : }
698 : /* append new output samples */
699 76596 : if ( ( ps->l_buf_out + l_frm_out ) > ps->buf_out_capacity )
700 : {
701 0 : return 5;
702 : }
703 : {
704 76596 : float *buf_out_ptr = &( ps->buf_out[ps->l_buf_out] );
705 169547742 : for ( i = 0; i < l_frm_out; i++ )
706 : {
707 169471146 : buf_out_ptr[i] = a_out[i];
708 : }
709 : }
710 76596 : ps->l_buf_out += l_frm_out;
711 :
712 76596 : *l_out = l_frm_out;
713 : /* update time */
714 76596 : ps->l_in_total += ps->l_frm;
715 :
716 76596 : if ( abs( ps->diffSinceSetScale ) < ( 0x7FFFFF - ( l_frm_out - ps->l_frm ) ) &&
717 76596 : ps->nFramesSinceSetScale < statsResetThreshold )
718 : {
719 76596 : ps->diffSinceSetScale += l_frm_out - ps->l_frm;
720 76596 : ++ps->nFramesSinceSetScale;
721 : }
722 : else /* scale statistics down to avoid overflow */
723 : {
724 0 : ps->diffSinceSetScale >>= statsResetShift;
725 0 : ps->nFramesSinceSetScale >>= statsResetShift;
726 : }
727 :
728 : #ifdef DEBUG_APA_SILENCE_NON_SCALED
729 : if ( l_in == *l_out )
730 : {
731 : set_zero( a_out, *l_out );
732 : }
733 : else
734 : {
735 : set_f( a_out, (float) INT16_MAX, *l_out );
736 : }
737 : #endif
738 :
739 76596 : return 0;
740 : }
741 :
742 :
743 : /*---------------------------------------------------------------------*
744 : * Local functions
745 : *---------------------------------------------------------------------*/
746 :
747 : /*
748 : ********************************************************************************
749 : *
750 : * Function : get_scaling_quality
751 : * Tables : <none>
752 : * Compile Defines : <none>
753 : * Return : 0 on success, 1 on failure
754 : * Information : Uses pitch, half pitch, three halves and double pitch
755 : * to evaluate the quality of the scaled frame by checking
756 : * periodicity.
757 : * Silence can be detected as additional feature. This must
758 : * be set in global struct apa_state.
759 : *
760 : * If search length is very narrow then use fewer points
761 : * to evaluate periodicity and silence.
762 : *
763 : * Computationally not very efficient by using normalized
764 : * cross-correlation: Using sqrt() for energy calculation
765 : * adds complexity.
766 : *
767 : * 03-AUG-04 S.Doehla initial version
768 : *
769 : ********************************************************************************
770 : */
771 1857 : static void get_scaling_quality(
772 : const apa_state_t *ps,
773 : const float *signal,
774 : uint16_t s_len,
775 : uint16_t offset,
776 : uint16_t corr_len,
777 : uint16_t pitch,
778 : float *energydB,
779 : float *quality )
780 : {
781 1857 : float maxEnergy = 0.0f;
782 1857 : float qualityOfMaxEnergy = 0.0f; /* we measure the quality for all channels and select the one with highest energy */
783 :
784 1857 : float half_pitch_cn = 0.0f;
785 1857 : float pitch_cn = 0.0f;
786 1857 : float three_halves_pitch_cn = 0.0f;
787 1857 : float double_pitch_cn = 0.0f;
788 :
789 1857 : float pitch_energy = 0.0f;
790 1857 : float half_pitch_energy = 0.0f;
791 1857 : float three_halves_pitch_energy = 0.0f;
792 1857 : float double_pitch_energy = 0.0f;
793 :
794 1857 : uint16_t i = 0;
795 :
796 6660 : for ( i = 0; i < ps->num_channels; i++ )
797 : {
798 : float energy;
799 4803 : offset = 0;
800 :
801 4803 : pitch_cn = normalized_cross_correlation_self( signal, pitch + offset, offset, corr_len, ps->num_channels * 2, &pitch_energy );
802 4803 : if ( pitch_cn > 0.0f )
803 : {
804 : /* calculate correlation for double pitch */
805 3390 : if ( 2 * pitch + offset + corr_len <= s_len )
806 : {
807 2427 : double_pitch_cn = normalized_cross_correlation_self( signal, 2 * pitch + offset, offset, corr_len, ps->num_channels * 2, &double_pitch_energy );
808 : }
809 : else
810 : {
811 963 : double_pitch_cn = pitch_cn;
812 963 : double_pitch_energy = pitch_energy;
813 : }
814 : /* calculate correlation for three/half pitch */
815 3390 : if ( ( 3 * pitch ) / 2 + offset + corr_len <= s_len )
816 : {
817 2829 : three_halves_pitch_cn = normalized_cross_correlation_self( signal, ( 3 * pitch ) / 2 + offset, offset, corr_len, ps->num_channels * 2, &three_halves_pitch_energy );
818 : }
819 : else
820 : {
821 561 : three_halves_pitch_cn = pitch_cn;
822 561 : three_halves_pitch_energy = pitch_energy;
823 : }
824 : /* calculate correlation for half pitch */
825 3390 : if ( pitch / 2 + offset + corr_len <= s_len )
826 : {
827 3390 : half_pitch_cn = normalized_cross_correlation_self( signal, pitch / 2 + offset, offset, corr_len, ps->num_channels * 2, &half_pitch_energy );
828 : }
829 : else
830 : {
831 0 : half_pitch_cn = pitch_cn;
832 0 : half_pitch_energy = pitch_energy;
833 : }
834 :
835 : /* combine correlation results */
836 3390 : *quality = ( half_pitch_cn * three_halves_pitch_cn ) + ( pitch_cn * double_pitch_cn );
837 3390 : energy = pitch_energy + half_pitch_energy + three_halves_pitch_energy + double_pitch_energy;
838 : }
839 : else
840 : {
841 1413 : *quality = pitch_cn; /* value is negative, thus pass it */
842 1413 : energy = pitch_energy;
843 : }
844 :
845 : /* update the quality by the quality of the signal with the highest energy */
846 4803 : if ( energy > maxEnergy )
847 : {
848 2667 : qualityOfMaxEnergy = *quality;
849 2667 : maxEnergy = energy;
850 : }
851 :
852 : /* go to next channel */
853 4803 : ++signal;
854 : }
855 1857 : *quality = qualityOfMaxEnergy;
856 :
857 : /* increase calculated quality of signals with low energy */
858 1857 : *energydB = apa_corrEnergy2dB( maxEnergy, corr_len );
859 1857 : *quality += apa_getQualityIncreaseForLowEnergy( *energydB );
860 :
861 1857 : return;
862 : }
863 :
864 :
865 : /* Converts the correlation energy to dB. */
866 1857 : static float apa_corrEnergy2dB(
867 : float energy,
868 : uint16_t corr_len )
869 : {
870 1857 : float energydB = 10.0f * (float) log10( energy / ( PCM16_TO_FLT_FAC * PCM16_TO_FLT_FAC * corr_len * 4.0f ) );
871 :
872 1857 : return energydB;
873 : }
874 :
875 :
876 : /* Increases the calculated quality of signals with low energy. */
877 1857 : static float apa_getQualityIncreaseForLowEnergy(
878 : float energydB )
879 : {
880 1857 : const float qualIncreaseMinEnergy = -65;
881 1857 : const float qualIncreaseMaxEnergy = -40;
882 1857 : float qualIncForLowEnergy = 0;
883 :
884 1857 : if ( energydB < qualIncreaseMaxEnergy )
885 : {
886 447 : qualIncForLowEnergy = energydB;
887 447 : if ( qualIncForLowEnergy < qualIncreaseMinEnergy )
888 : {
889 0 : qualIncForLowEnergy = qualIncreaseMinEnergy;
890 : }
891 447 : if ( qualIncForLowEnergy > qualIncreaseMaxEnergy )
892 : {
893 0 : qualIncForLowEnergy = qualIncreaseMaxEnergy;
894 : }
895 447 : qualIncForLowEnergy = ( qualIncForLowEnergy - qualIncreaseMaxEnergy ) /
896 447 : ( qualIncreaseMinEnergy - qualIncreaseMaxEnergy ) * 2;
897 447 : assert( qualIncForLowEnergy >= 0 && qualIncForLowEnergy <= 2 );
898 : }
899 :
900 1857 : return qualIncForLowEnergy;
901 : }
902 :
903 :
904 : /*
905 : ********************************************************************************
906 : *
907 : * Function : logarithmic_search
908 : * Tables : <none>
909 : * Compile Defines : <none>
910 : * Return : 0 on success, 1 on failure
911 : * Information : Search for best match of a template segment using
912 : * hierarchical search method:
913 : * Parameter css is used for sampling every css'd correlation
914 : * value. The area around the best match so far is used for
915 : * further correlation value with half css-value until css=1.
916 : * Search area length is always half previous search length.
917 : * Parameter wss is passed to the correlation computation
918 : * If the search area passes the boundaries, the search
919 : * window is reduced so that it's entirely inside the
920 : * boundaries.
921 : *
922 : ********************************************************************************
923 : */
924 1857 : static bool logarithmic_search(
925 : const apa_state_t *ps,
926 : const float *signal,
927 : int16_t s_start,
928 : uint16_t inlen,
929 : uint16_t offset,
930 : uint16_t fixed_pos,
931 : uint16_t corr_len,
932 : uint16_t wss,
933 : uint16_t css,
934 : int16_t *synchpos )
935 : {
936 : int16_t i;
937 : float coeff;
938 : float coeff_max;
939 1857 : int16_t s_start_old = 0;
940 1857 : uint16_t s_len_old = 0;
941 :
942 : do
943 : {
944 1857 : coeff_max = -FLT_MAX; /* will always be overwritten with result of first correlation */
945 311817 : for ( i = s_start; i < s_start + inlen; i += css * ps->num_channels )
946 : {
947 309960 : if ( ( wss == 1 ) && ( ps->num_channels == 1 ) )
948 : {
949 0 : coeff = cross_correlation_self( signal, i + offset, fixed_pos + offset, corr_len );
950 : }
951 : else
952 : {
953 309960 : coeff = cross_correlation_subsampled_self( signal, i + offset, fixed_pos + offset, corr_len, wss * ps->num_channels );
954 : }
955 :
956 : /* update max corr */
957 309960 : if ( ps->scale < 100 )
958 : {
959 : /* shrinking: prefer greater synchpos for equal coeff */
960 92160 : if ( coeff >= coeff_max )
961 : {
962 13047 : coeff_max = coeff;
963 13047 : *synchpos = i;
964 : }
965 : }
966 : else
967 : {
968 : /* extending: prefer smaller synchpos for equal coeff */
969 217800 : if ( coeff > coeff_max )
970 : {
971 20832 : coeff_max = coeff;
972 20832 : *synchpos = i;
973 : }
974 : }
975 : }
976 : /* backup old search range */
977 1857 : s_start_old = s_start;
978 1857 : s_len_old = inlen;
979 :
980 1857 : css = css / 2;
981 1857 : inlen = inlen / 2;
982 1857 : s_start = *synchpos - inlen / 2;
983 1857 : if ( s_start < s_start_old )
984 : {
985 525 : s_start = s_start_old;
986 : }
987 1857 : if ( ( s_start + inlen ) > ( s_start_old + s_len_old ) )
988 : {
989 432 : inlen = s_start_old - s_start + s_len_old;
990 : }
991 1857 : } while ( css > 2 );
992 :
993 1857 : return 0;
994 : }
995 :
996 :
997 : /*
998 : ********************************************************************************
999 : *
1000 : * Function : find_synch
1001 : * Tables : <none>
1002 : * Compile Defines : <none>
1003 : * Return : 0 on success, 1 on failure
1004 : * Information : Find the best match of an template segment within
1005 : * a search region by similarity measures.
1006 : *
1007 : * Typical example:
1008 : *
1009 : * 0 10 20 30 40 50 60
1010 : * in[] = abcdefghijk_abcdefghijk_abcdefghijk_abcdEFGHIJk_abcdefghijk_a
1011 : * l_in = 61
1012 : * offset = 30 |
1013 : * s_start = -20 <-------------------|
1014 : * s_len = 15 <-------------> |
1015 : * search range: *************** |
1016 : * fixed_pos = 10 |--------->
1017 : * corr_len = 6 | <---->
1018 : * template segment: | ******
1019 : * synch_pos: -14 <-------------|
1020 : *
1021 : * All positions are given relative to offset. The
1022 : * search region starts at offset+s_start and ends
1023 : * at offset+s_start+s_len. The template segment
1024 : * starts at offset+fixed_pos and ends at
1025 : * offset+fixed_pos+corr_len. For correlation, the
1026 : * template segment (EFGHIJ) is matched against the
1027 : * segment in the search region, e.g., against (k_abcd)
1028 : * in the first search position. The search position
1029 : * with the best match (-14: EFGHIJ <-> efghij) is
1030 : * returned.
1031 : *
1032 : * 19-JUN-03 N.Faerber initial version
1033 : * 23-APR-04 S.Doehla added subsampling
1034 : *
1035 : ********************************************************************************
1036 : */
1037 1857 : static bool find_synch(
1038 : apa_state_t *ps,
1039 : const float *in,
1040 : uint16_t l_in,
1041 : int16_t s_start,
1042 : uint16_t s_len,
1043 : int16_t fixed_pos,
1044 : uint16_t corr_len,
1045 : uint16_t offset,
1046 : float *energy,
1047 : float *quality,
1048 : int16_t *synch_pos )
1049 : {
1050 1857 : assert( ( corr_len - 1 + s_start + s_len - 1 + offset ) < l_in );
1051 1857 : assert( ( corr_len - 1 + fixed_pos + offset ) < l_in );
1052 :
1053 : /* pass last pitch to search function as prediction value */
1054 1857 : *synch_pos = ps->last_pitch;
1055 :
1056 1857 : logarithmic_search( ps, in, s_start, s_len, offset, fixed_pos, corr_len, ps->wss, ps->css, synch_pos );
1057 :
1058 : /* assert synch_pos is cleanly divisible by number of channels */
1059 1857 : assert( *synch_pos % ps->num_channels == 0 );
1060 :
1061 1857 : *quality = 0;
1062 1857 : get_scaling_quality( ps, in, l_in, offset, corr_len, (uint16_t) abs( fixed_pos - *synch_pos ), energy, quality );
1063 :
1064 1857 : ps->last_pitch = *synch_pos;
1065 :
1066 1857 : return 0;
1067 : }
1068 :
1069 :
1070 : /*
1071 : ********************************************************************************
1072 : *
1073 : * Function : copy_frm
1074 : * Tables : <none>
1075 : * Compile Defines : <none>
1076 : * Return : 0 on success, 1 on failure
1077 : * Information : Copy an audio.
1078 : *
1079 : * The frame size is fixed to ps->l_frm. The input data
1080 : * is stored in frm_in[], where the first ps->l_frm samples
1081 : * shall include the previous output frame and the second
1082 : * ps->l_frm samples shall contain the current input frame.
1083 : * The output frame is stored in frm_out[] and contains
1084 : * l_frm_out = ps->l_frm.
1085 : *
1086 : * The first ps->l_frm input samples are not used by
1087 : * this function and are only provided for a consistent
1088 : * function call with shrink_frm() and extend_frm().
1089 : *
1090 : ********************************************************************************
1091 : */
1092 71442 : static bool copy_frm(
1093 : apa_state_t *ps,
1094 : const float frm_in[],
1095 : float frm_out[],
1096 : uint16_t *l_frm_out )
1097 : {
1098 : uint16_t i;
1099 :
1100 : /* only 2nd input frame is used */
1101 71442 : frm_in += ps->l_frm;
1102 :
1103 : /* copy frame */
1104 157047762 : for ( i = 0; i < ps->l_frm; i++ )
1105 : {
1106 156976320 : frm_out[i] = frm_in[i];
1107 : }
1108 :
1109 : /* set output length */
1110 71442 : *l_frm_out = ps->l_frm;
1111 :
1112 71442 : return 0;
1113 : }
1114 :
1115 :
1116 : /*
1117 : ********************************************************************************
1118 : *
1119 : * Function : shrink_frm
1120 : * Tables : <none>
1121 : * Compile Defines : <none>
1122 : * Return : 0 on success, 1 on failure
1123 : * Information : Shrink the length of an audio frame using the WSOLA
1124 : * algorithm.
1125 : *
1126 : * The frame size is fixed to ps->l_frm. The input data
1127 : * is stored in frm_in[], where the first ps->l_frm samples
1128 : * shall include the previous output frame and the second
1129 : * ps->l_frm samples shall contain the current input frame.
1130 : * The output frame is stored in frm_out[] and contains
1131 : * l_frm_out samples. The amount of shrinking is signal
1132 : * dependent.
1133 : *
1134 : * The first ps->l_frm input samples are not used by
1135 : * this function and are only provided for a consistent
1136 : * function call with extend_frm().
1137 : *
1138 : ********************************************************************************
1139 : */
1140 768 : static bool shrink_frm(
1141 : apa_state_t *ps,
1142 : const float frm_in[],
1143 : uint16_t maxScaling,
1144 : float frm_out[],
1145 : uint16_t *l_frm_out )
1146 : {
1147 768 : bool findSynchResult = 0;
1148 : int16_t xtract, l_rem, s_start, s_end;
1149 : uint16_t i;
1150 : uint16_t over;
1151 768 : float energy, quality = 0.0f;
1152 : uint16_t l_frm;
1153 : uint16_t l_seg;
1154 :
1155 768 : l_frm = ps->l_frm;
1156 768 : l_seg = ps->l_seg;
1157 :
1158 : /* only 2nd input frame is used */
1159 768 : frm_in += l_frm;
1160 :
1161 : /* set search range */
1162 768 : s_start = ( ps->p_min / ps->num_channels ) * ps->num_channels;
1163 768 : s_end = s_start + ps->l_search;
1164 768 : if ( ( s_end + l_seg ) >= l_frm )
1165 : {
1166 768 : s_end = ( l_frm - l_seg );
1167 : }
1168 :
1169 : /* calculate overlap position */
1170 768 : if ( isSilence( frm_in, l_seg, 10 ) )
1171 : {
1172 : /* maximum scaling */
1173 0 : energy = -65;
1174 0 : quality = 5;
1175 0 : if ( ps->evs_compat_mode == false )
1176 : {
1177 :
1178 0 : xtract = maxScaling;
1179 : /* take samples already in the renderer buf into account */
1180 0 : xtract += ps->l_r_buf;
1181 : /* snap to renderer time slot borders */
1182 0 : xtract -= ( ps->l_ts - ( l_frm - xtract + ps->l_r_buf ) % ps->l_ts );
1183 0 : while ( xtract < 0 )
1184 : {
1185 0 : xtract += ps->l_ts;
1186 : }
1187 0 : while ( xtract > ( s_end - ps->num_channels ) )
1188 : {
1189 : /* exceeded the possible shrinking, go back one renderer ts*/
1190 0 : xtract -= ps->l_ts;
1191 : }
1192 : }
1193 0 : else if ( maxScaling != 0U && s_end > maxScaling + 1 )
1194 : {
1195 0 : xtract = maxScaling;
1196 : }
1197 : else
1198 : {
1199 : /* set to last valid element (i.e. element[len - 1] but note for stereo last element is last pair of samples) */
1200 0 : xtract = s_end - ps->num_channels;
1201 : }
1202 : }
1203 : else
1204 : {
1205 : /* find synch */
1206 768 : findSynchResult = find_synch( ps, frm_in, l_frm, s_start, (uint16_t) ( s_end - s_start ), 0, l_seg, 0, &energy, &quality, &xtract );
1207 : }
1208 :
1209 : /* assert synch_pos is cleanly divisible by number of channels */
1210 768 : assert( xtract % ps->num_channels == 0 );
1211 :
1212 : /* set frame overlappable - reset if necessary */
1213 768 : over = 1;
1214 :
1215 : /* test whether frame has sufficient quality */
1216 768 : if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
1217 : {
1218 : /* not sufficient */
1219 618 : over = 0;
1220 618 : if ( ps->bad_frame_count < ps->qualityred )
1221 : {
1222 189 : ++ps->bad_frame_count;
1223 : }
1224 618 : if ( ps->good_frame_count > 0U )
1225 : {
1226 159 : --ps->good_frame_count;
1227 : }
1228 : }
1229 : else
1230 : {
1231 : /* sufficient quality */
1232 150 : if ( ps->bad_frame_count > 0U )
1233 : {
1234 150 : --ps->bad_frame_count;
1235 : }
1236 150 : if ( ps->good_frame_count < ps->qualityrise )
1237 : {
1238 150 : ++ps->good_frame_count;
1239 : }
1240 : }
1241 :
1242 : /* Calculate output data */
1243 768 : if ( over && xtract )
1244 : {
1245 150 : if ( findSynchResult == 1 )
1246 : {
1247 0 : return 1;
1248 : }
1249 150 : if ( ps->evs_compat_mode == true )
1250 : {
1251 0 : overlapAddEvs( frm_in, frm_in + xtract, frm_out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1252 : }
1253 : else
1254 : {
1255 150 : overlapAdd( frm_in, frm_in + xtract, frm_out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1256 : }
1257 : }
1258 : else
1259 : {
1260 618 : xtract = 0;
1261 659658 : for ( i = 0; i < l_seg; i++ )
1262 : {
1263 659040 : frm_out[i] = frm_in[i];
1264 : }
1265 : }
1266 :
1267 : /* append remaining samples */
1268 768 : l_rem = l_frm - xtract - l_seg;
1269 722142 : for ( i = 0; i < l_rem; i++ )
1270 : {
1271 721374 : frm_out[l_seg + i] = frm_in[l_frm - l_rem + i];
1272 : }
1273 :
1274 : /* set output length */
1275 768 : *l_frm_out = l_seg + l_rem;
1276 :
1277 768 : return 0;
1278 : }
1279 :
1280 :
1281 : /*
1282 : ********************************************************************************
1283 : *
1284 : * Function : extend_frm
1285 : * Tables : <none>
1286 : * Compile Defines : <none>
1287 : * Return : 0 on success, 1 on failure
1288 : * Information : Extend the length of an audio frame using the WSOLA
1289 : * algorithm.
1290 : *
1291 : * The frame size is fixed to ps->l_frm. The input data
1292 : * is stored in frm_in[], where the first ps->l_frm samples
1293 : * shall include the previous output frame and the second
1294 : * ps->l_frm samples shall contain the current input frame.
1295 : * The output frame is stored in frm_out[] and contains
1296 : * l_frm_out samples. The amount of extension is signal
1297 : * dependent.
1298 : *
1299 : ********************************************************************************
1300 : */
1301 1089 : static bool extend_frm(
1302 : apa_state_t *ps,
1303 : const float frm_in[],
1304 : float frm_out[],
1305 : uint16_t *l_frm_out )
1306 : {
1307 1089 : bool findSynchResult = 0;
1308 : uint16_t l_frm_out_target;
1309 : uint16_t n, i;
1310 : int16_t N;
1311 : int16_t s[MAXN + 2], s_max, s_min;
1312 : int16_t xtract[MAXN + 2], sync_start, s_end;
1313 : uint16_t over[MAXN + 2];
1314 : int16_t l_rem;
1315 1089 : int16_t s_start = 0;
1316 1089 : float energy, quality = 0.0f;
1317 : uint16_t l_frm, l_seg;
1318 : const float *fadeOut, *fadeIn;
1319 : float *out;
1320 :
1321 :
1322 1089 : l_frm = ps->l_frm;
1323 1089 : l_seg = ps->l_seg;
1324 :
1325 : /* number of segments/iterations */
1326 1089 : l_frm_out_target = (uint16_t) ( (float) l_frm * 1.5f );
1327 1089 : N = ( l_frm_out_target / l_seg ) - 1;
1328 1089 : if ( N < 1 )
1329 : {
1330 0 : N = 1;
1331 : }
1332 1089 : if ( N > MAXN )
1333 : {
1334 0 : return 1;
1335 : }
1336 : /* calculate equally spaced search regions */
1337 : /* s[n] are given relative to 2nd frame and point to the start of */
1338 : /* the search region. The first segment (n=1) will not be moved. */
1339 : /* Hence, the iterations will start with n=2. */
1340 1089 : s_min = -( ps->l_search ) - ( ps->p_min );
1341 : /* (make sure not to exceed array dimension) */
1342 1089 : if ( l_frm + s_min < 0 )
1343 : {
1344 0 : s_min = -( l_frm );
1345 : }
1346 1089 : s_max = l_frm - 2 * l_seg - ps->l_search;
1347 1089 : if ( s_max < s_min )
1348 : {
1349 0 : N = 1;
1350 : }
1351 : /* for just one segment start at s_min */
1352 1089 : if ( N == 1 )
1353 : {
1354 0 : s[2] = s_min;
1355 : }
1356 : /* else, spread linear in between s_min and s_max */
1357 : /* (including s_min and s_max) */
1358 : else
1359 : {
1360 3267 : for ( n = 2; n <= ( N + 1 ); n++ )
1361 : {
1362 2178 : s[n] = s_min + ( ( s_max - s_min ) * ( n - 2 ) ) / ( N - 1 );
1363 : }
1364 : }
1365 :
1366 : /*
1367 : * Planning Phase
1368 : */
1369 :
1370 1089 : xtract[1] = -( l_seg ); /* make sync_start=0 in 1st iteration */
1371 1089 : n = 2;
1372 :
1373 : /* define synch segment (to be correlated with search region) */
1374 1089 : sync_start = xtract[n - 1] + l_seg;
1375 1089 : over[n] = 1; /* will be reset if overlap is not required */
1376 : /* check end of search region: should be at least p_min */
1377 : /* samples on the left of synch_start */
1378 1089 : if ( ( s[n] + ps->l_search ) < ( sync_start - ( ps->p_min ) ) )
1379 : {
1380 0 : s_start = s[n];
1381 0 : s_end = s_start + ps->l_search;
1382 : }
1383 : else
1384 : {
1385 : /* shrink search region to enforce minimum shift */
1386 1089 : s_end = sync_start - ( ps->p_min );
1387 1089 : if ( s[n] + ps->l_search < sync_start )
1388 : {
1389 1089 : s_start = s[n]; /* just do it with normal start position */
1390 : }
1391 0 : else if ( n == ( N + 1 ) ) /* move search region left for last segment */
1392 : {
1393 0 : s_start = s_end - ( ps->l_search - ps->p_min );
1394 : }
1395 : else
1396 : {
1397 0 : over[n] = 0; /* don't search/overlap (just copy down) */
1398 : }
1399 : }
1400 :
1401 1089 : if ( over[n] )
1402 : {
1403 : /* calculate overlap position */
1404 1089 : if ( isSilence( frm_in, l_seg, 10 ) )
1405 : {
1406 : /* maximum scaling */
1407 0 : energy = -65;
1408 0 : quality = 5;
1409 0 : xtract[n] = s_start + ps->num_channels;
1410 0 : if ( ps->evs_compat_mode == false )
1411 : {
1412 : /* take renderer buffer samples into accout */
1413 0 : xtract[n] += ps->l_r_buf;
1414 : /* snap to next renderer time slot border to resynchronize */
1415 0 : xtract[n] -= ( ( N - 1 ) * l_seg - xtract[n] + ps->l_r_buf ) % ps->l_ts;
1416 : }
1417 : }
1418 : else
1419 : {
1420 : /* find synch */
1421 1089 : findSynchResult = find_synch( ps, frm_in, 2 * l_frm, s_start, s_end - s_start, sync_start, l_seg, l_frm, &energy, &quality, &xtract[n] );
1422 : }
1423 : /* assert synch_pos is cleanly divisible by number of channels */
1424 1089 : assert( xtract[n] % ps->num_channels == 0 );
1425 :
1426 : /* test for sufficient quality */
1427 1089 : if ( quality < ( ps->targetQuality - ( ps->bad_frame_count * 0.1f ) + ( ps->good_frame_count * 0.2f ) ) )
1428 : {
1429 : /* not sufficient */
1430 927 : over[n] = 0;
1431 927 : xtract[n] = sync_start;
1432 927 : if ( ps->bad_frame_count < ps->qualityred )
1433 : {
1434 369 : ++ps->bad_frame_count;
1435 : }
1436 927 : if ( ps->good_frame_count > 0U )
1437 : {
1438 60 : --ps->good_frame_count;
1439 : }
1440 : }
1441 : else
1442 : {
1443 : /* sufficient quality */
1444 162 : if ( ps->bad_frame_count > 0U )
1445 : {
1446 153 : --ps->bad_frame_count;
1447 : }
1448 162 : if ( ps->good_frame_count < ps->qualityrise )
1449 : {
1450 162 : ++ps->good_frame_count;
1451 : }
1452 : }
1453 1089 : if ( findSynchResult )
1454 : {
1455 0 : return 1;
1456 : }
1457 : }
1458 : else
1459 : {
1460 0 : xtract[n] = sync_start;
1461 : }
1462 :
1463 :
1464 : /* Calculate output data */
1465 2178 : for ( n = 2; n <= N; n++ )
1466 : {
1467 1089 : if ( over[n] && xtract[n - 1] + l_seg != xtract[n] )
1468 : {
1469 : /* mix 2nd half of previous segment with 1st half of current segment */
1470 162 : fadeOut = frm_in + l_frm + xtract[n - 1] + l_seg;
1471 162 : fadeIn = frm_in + l_frm + xtract[n];
1472 162 : out = frm_out + ( n - 2 ) * l_seg;
1473 162 : if ( ps->evs_compat_mode == true )
1474 : {
1475 0 : overlapAddEvs( fadeOut, fadeIn, out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1476 : }
1477 : else
1478 : {
1479 162 : overlapAdd( fadeOut, fadeIn, out, l_seg, ps->num_channels, ps->win + ps->l_halfwin, ps->win );
1480 : }
1481 : }
1482 : else
1483 : {
1484 : /* just copy down 1st half of current segment (= 2nd half of previous segment) */
1485 : float *frm_out_ptr;
1486 : const float *frm_in_ptr;
1487 927 : frm_out_ptr = &( frm_out[( n - 2 ) * l_seg] );
1488 927 : frm_in_ptr = &( frm_in[l_frm + xtract[n]] );
1489 1048287 : for ( i = 0; i < l_seg; i++ )
1490 : {
1491 1047360 : frm_out_ptr[i] = frm_in_ptr[i];
1492 : }
1493 : }
1494 : }
1495 :
1496 : /* append remaining samples */
1497 1089 : l_rem = l_frm - ( xtract[N] + l_seg );
1498 1442289 : for ( i = 0; i < l_rem; i++ )
1499 : {
1500 1441200 : frm_out[( N - 1 ) * l_seg + i] = frm_in[2 * l_frm - l_rem + i];
1501 : }
1502 :
1503 : /* set output length */
1504 1089 : *l_frm_out = ( N - 1 ) * l_seg + l_rem;
1505 :
1506 1089 : return 0;
1507 : }
|