Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #ifdef DEBUGGING
36 : #include "debug.h"
37 : #endif
38 : #include "cnst.h"
39 : #include "ivas_cnst.h"
40 : #include "ivas_prot.h"
41 : #include "rom_enc.h"
42 : #include "rom_com.h"
43 : #include "prot.h"
44 : #include "wmc_auto.h"
45 :
46 :
47 : /*-------------------------------------------------------------------*
48 : * pre_proc_ivas()
49 : *
50 : * Pre-processing (Selection of internal Fs, classification, SC VBR decision,
51 : * Decision matrix, Preprocessing at other Fs, core switching decision, ...)
52 : *--------------------------------------------------------------------*/
53 :
54 1129814 : ivas_error pre_proc_ivas(
55 : Encoder_State *st, /* i/o: encoder state structure */
56 : const int16_t last_element_mode, /* i : last element mode */
57 : const int32_t element_brate, /* i : element bitrate */
58 : const int32_t last_element_brate, /* i : last element bitrate */
59 : const int16_t input_frame, /* i : frame length */
60 : float old_inp_12k8[], /* i/o: buffer of old input signal */
61 : float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */
62 : float **inp, /* o : ptr. to inp. signal in the current frame*/
63 : float *ener, /* o : residual energy from Levinson-Durbin */
64 : float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */
65 : float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */
66 : float epsP[M + 1], /* i/o: LP prediction errors */
67 : float lsp_new[M], /* i/o: LSPs at the end of the frame */
68 : float lsp_mid[M], /* i/o: LSPs in the middle of the frame */
69 : float *new_inp_resamp16k, /* o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
70 : int16_t *Voicing_flag, /* o : voicing flag for HQ FEC */
71 : const float old_wsp[], /* i : weighted input signal buffer */
72 : const int16_t loc_harm, /* i : harmonicity flag */
73 : const float cor_map_sum, /* i : speech/music clasif. parameter */
74 : const int16_t vad_flag_dtx, /* i : HE-SAD flag with additional DTX HO */
75 : const float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer */
76 : const float fft_buff[2 * L_FFT], /* i : FFT buffer */
77 : const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
78 : const int16_t vad_hover_flag, /* i : VAD hangover flag */
79 : const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */
80 : )
81 : {
82 : int16_t L_look, element_mode, lMemRecalc_12k8;
83 : float *inp_12k8, *new_inp_12k8, *inp_16k; /* pointers to current frame and new data */
84 : const float *wsp; /* weighted input signal buffer */
85 : int32_t sr_core_tmp, total_brate_tmp;
86 : ivas_error error;
87 :
88 1129814 : push_wmops( "pre_proc" );
89 :
90 1129814 : error = IVAS_ERR_OK;
91 :
92 : /*----------------------------------------------------------------*
93 : * Initialization
94 : *----------------------------------------------------------------*/
95 :
96 1129814 : element_mode = st->element_mode;
97 :
98 1129814 : new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */
99 1129814 : inp_12k8 = new_inp_12k8 - L_LOOK_12k8;
100 :
101 1129814 : if ( element_mode != IVAS_CPE_DFT )
102 : {
103 1070135 : new_inp_12k8 -= L_FILT;
104 : }
105 :
106 1129814 : wsp = old_wsp + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */
107 :
108 1129814 : lMemRecalc_12k8 = 0;
109 1129814 : if ( element_mode == IVAS_CPE_TD )
110 : {
111 7582 : lMemRecalc_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_NS );
112 : }
113 :
114 : /*----------------------------------------------------------------*
115 : * Selection of internal ACELP Fs (12.8 kHz or 16 kHz)
116 : *----------------------------------------------------------------*/
117 :
118 1129814 : if ( st->core_brate == FRAME_NO_DATA )
119 : {
120 : /* prevent "L_frame" changes in CNG segments */
121 20571 : st->L_frame = st->last_L_frame;
122 : }
123 1109243 : else if ( st->core_brate == SID_2k40 && st->bwidth >= WB && st->hDtxEnc->first_CNG && ( st->hTdCngEnc != NULL && st->hTdCngEnc->act_cnt2 < MIN_ACT_CNG_UPD ) )
124 : {
125 : /* prevent "L_frame" changes in SID frame after short segment of active frames */
126 1366 : st->L_frame = st->hDtxEnc->last_CNG_L_frame;
127 : }
128 1107877 : else if ( ( ( st->element_mode == IVAS_CPE_MDCT && st->element_brate >= IVAS_64k && st->bwidth >= SWB ) || ( element_mode == IVAS_SCE && st->total_brate > MAX_ACELP_BRATE && st->bwidth >= SWB ) ) && st->core_brate != SID_2k40 )
129 : {
130 638945 : st->L_frame = L_FRAME32k;
131 : }
132 468932 : else if ( st->bwidth >= SWB && st->total_brate > MAX_ACELP_BRATE_ISM && st->total_brate <= MAX_ACELP_BRATE && element_mode == IVAS_SCE && st->is_ism_format && st->tcxonly && st->core_brate != SID_2k40 )
133 : {
134 9150 : st->L_frame = L_FRAME25_6k;
135 : }
136 459782 : else if ( st->flag_ACELP16k )
137 : {
138 321834 : st->L_frame = L_FRAME16k;
139 : }
140 : else
141 : {
142 137948 : st->L_frame = L_FRAME;
143 : }
144 :
145 1129814 : if ( st->hFdCngEnc != NULL && st->element_mode != IVAS_CPE_MDCT && ( ( st->hFdCngEnc->hFdCngCom->frameSize != st->L_frame ) || ( st->hFdCngEnc->hFdCngCom->CngBandwidth != st->input_bwidth ) ) )
146 : {
147 30940 : configureFdCngEnc( st->hFdCngEnc, max( st->input_bwidth, WB ), st->L_frame == L_FRAME16k ? ACELP_16k40 : ACELP_9k60 );
148 : }
149 :
150 1129814 : if ( st->ini_frame == 0 )
151 : {
152 : /* avoid switching of internal ACELP Fs in the very first frame */
153 8724 : st->last_L_frame = st->L_frame;
154 : }
155 :
156 1129814 : if ( st->L_frame == L_FRAME )
157 : {
158 144839 : st->gamma = GAMMA1;
159 144839 : st->preemph_fac = PREEMPH_FAC;
160 : }
161 984975 : else if ( st->L_frame == L_FRAME32k )
162 : {
163 638945 : st->gamma = GAMMA16k;
164 638945 : st->preemph_fac = PREEMPH_FAC_SWB;
165 : }
166 : else
167 : {
168 346030 : st->gamma = GAMMA16k;
169 346030 : st->preemph_fac = PREEMPH_FAC_16k;
170 : }
171 :
172 1129814 : st->sr_core = st->L_frame * FRAMES_PER_SEC;
173 1129814 : st->encoderLookahead_enc = NS2SA( st->sr_core, ACELP_LOOK_NS );
174 1129814 : st->encoderPastSamples_enc = ( st->L_frame * 9 ) >> 4;
175 :
176 :
177 : /*-----------------------------------------------------------------*
178 : * coder_type rewriting in case of switching
179 : * IC frames selection
180 : * enforce TC frames in case of switching
181 : *-----------------------------------------------------------------*/
182 :
183 : /* enforce TRANSITION frames */
184 1129814 : if ( !( st->element_mode == IVAS_CPE_TD && st->idchan == 1 ) && st->last_L_frame != st->L_frame && st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 && st->last_core_brate != FRAME_NO_DATA && st->last_core_brate != SID_2k40 && st->coder_type_raw != VOICED )
185 : {
186 : /* enforce TC frame in case of ACELP@12k8 <-> ACELP@16k core switching */
187 186365 : st->coder_type = TRANSITION;
188 : }
189 943449 : else if ( st->last_core == HQ_CORE && st->coder_type_raw != VOICED )
190 : {
191 : /* enforce TC frame in case of HQ -> ACELP core switching */
192 4940 : st->coder_type = TRANSITION;
193 : }
194 938509 : else if ( st->last_core_brate <= SID_2k40 && st->cng_type == FD_CNG && !( element_mode == IVAS_CPE_TD ) )
195 : {
196 : /* enforce TC frame in case of FD_CNG -> ACELP switching (past excitation not available) */
197 22687 : st->coder_type = TRANSITION;
198 : }
199 : /* select INACTIVE frames */
200 915822 : else if ( st->total_brate <= MAX_GSC_INACTIVE_BRATE && st->vad_flag == 0 && st->element_mode != IVAS_CPE_MDCT )
201 : {
202 : /* inactive frames will be coded by GSC technology */
203 : /* except for the VBR mode. VBR mode uses NELP for that */
204 18908 : if ( !( st->Opt_SC_VBR && vad_flag_dtx ) && ( st->idchan == 0 || element_mode != IVAS_CPE_TD ) )
205 : {
206 18908 : st->coder_type = INACTIVE;
207 18908 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP3;
208 : }
209 : }
210 896914 : else if ( st->total_brate > MAX_GSC_INACTIVE_BRATE && ( ( st->vad_flag == 0 && st->bwidth >= SWB && st->max_bwidth >= SWB ) || ( st->localVAD == 0 && ( st->bwidth <= WB || st->max_bwidth <= WB ) ) ) )
211 : {
212 : /* inactive frames will be coded by AVQ technology (exceptionally it can be later rewritten to GSC technology in ivas_combined_format_brate_sanity()) */
213 67566 : st->coder_type = INACTIVE;
214 : }
215 :
216 :
217 : /*---------------------------------------------------------------------*
218 : * Decision matrix (selection of technologies)
219 : *---------------------------------------------------------------------*/
220 :
221 1129814 : st->mdct_sw = MODE1;
222 1129814 : st->mdct_sw_enable = MODE1;
223 1129814 : if ( ( st->total_brate <= MIN_BRATE_GSC_NOISY_FLAG || st->bwidth < SWB || st->flag_ACELP16k ) && st->GSC_IVAS_mode == 0 )
224 : {
225 1050106 : st->GSC_noisy_speech = 0;
226 : }
227 :
228 : /* core selection */
229 1129814 : ivas_decision_matrix_enc( st, element_brate, fft_buff, enerBuffer, last_element_mode );
230 :
231 1129814 : if ( st->L_frame == L_FRAME16k && ( st->coder_type == VOICED || st->coder_type == UNVOICED ) ) /* VOICED and UNVOICED are not supported in ACELP@16k */
232 : {
233 0 : st->coder_type = GENERIC;
234 : }
235 :
236 1129814 : if ( st->core == TCX_20_CORE || st->core == HQ_CORE )
237 : {
238 949127 : st->Nb_ACELP_frames = 0;
239 : /* Configure TCX with the same bitrate as given when (re-)initializing TCX */
240 949127 : total_brate_tmp = st->total_brate;
241 949127 : st->total_brate = st->bits_frame_nominal * FRAMES_PER_SEC;
242 949127 : SetModeIndex( st, st->last_bits_frame_nominal * FRAMES_PER_SEC, last_element_mode, MCT_flag );
243 :
244 949127 : st->sr_core = getCoreSamplerateMode2( element_mode, st->total_brate, st->bwidth, st->flag_ACELP16k, st->rf_mode, st->is_ism_format );
245 949127 : st->total_brate = total_brate_tmp;
246 :
247 949127 : st->L_frame = (int16_t) ( st->sr_core / FRAMES_PER_SEC );
248 949127 : st->encoderLookahead_enc = NS2SA( st->sr_core, ACELP_LOOK_NS );
249 949127 : st->encoderPastSamples_enc = ( st->L_frame * 9 ) >> 4;
250 :
251 949127 : if ( st->sr_core == INT_FS_12k8 )
252 : {
253 54300 : st->preemph_fac = PREEMPH_FAC;
254 54300 : st->gamma = GAMMA1;
255 : }
256 894827 : else if ( st->sr_core == INT_FS_16k )
257 : {
258 148496 : st->preemph_fac = PREEMPH_FAC_16k;
259 148496 : st->gamma = GAMMA16k;
260 : }
261 : else /* st->sr_core >=25600 */
262 : {
263 746331 : st->preemph_fac = PREEMPH_FAC_SWB;
264 746331 : st->gamma = GAMMA16k;
265 : }
266 :
267 949127 : if ( st->vad_flag == 0 )
268 : {
269 104947 : st->coder_type = INACTIVE;
270 : }
271 844180 : else if ( st->coder_type > GENERIC )
272 : {
273 189730 : st->coder_type = GENERIC;
274 : }
275 949127 : if ( st->element_mode != IVAS_CPE_MDCT )
276 : {
277 243229 : SetTCXModeInfo( st, st->hTranDet, &st->hTcxCfg->tcx_curr_overlap_mode );
278 : }
279 : }
280 180687 : else if ( st->element_mode == IVAS_CPE_MDCT )
281 : {
282 8872 : st->hTcxEnc->tfm_mem = 0.75f;
283 : }
284 171815 : else if ( element_brate != last_element_brate )
285 : {
286 36 : if ( st->core_brate != FRAME_NO_DATA )
287 : {
288 34 : SetModeIndex( st, st->bits_frame_nominal * FRAMES_PER_SEC, element_mode, MCT_flag );
289 : }
290 :
291 36 : if ( st->extl != -1 && st->extl != IGF_BWE && st->igf == 1 )
292 : {
293 34 : st->igf = 0;
294 : }
295 : }
296 :
297 :
298 : /*-----------------------------------------------------------------*
299 : * Update of ACELP harmonicity counter (used in ACELP transform codebook @32kbps)
300 : *-----------------------------------------------------------------*/
301 :
302 1129814 : if ( loc_harm == 1 && cor_map_sum > 50 && st->clas == VOICED_CLAS && st->coder_type == GENERIC )
303 : {
304 212835 : st->last_harm_flag_acelp++;
305 :
306 212835 : if ( st->last_harm_flag_acelp > 10 )
307 : {
308 31422 : st->last_harm_flag_acelp = 10;
309 : }
310 : }
311 : else
312 : {
313 916979 : st->last_harm_flag_acelp = 0;
314 : }
315 :
316 : /*-----------------------------------------------------------------*
317 : * Update audio frames counter (used for UV decision)
318 : *-----------------------------------------------------------------*/
319 :
320 1129814 : if ( st->coder_type == AUDIO )
321 : {
322 11096 : st->audio_frame_cnt += AUDIO_COUNTER_STEP;
323 : }
324 1118718 : else if ( st->coder_type != INACTIVE )
325 : {
326 995895 : st->audio_frame_cnt--;
327 : }
328 :
329 1129814 : if ( st->audio_frame_cnt > AUDIO_COUNTER_MAX )
330 : {
331 3806 : st->audio_frame_cnt = AUDIO_COUNTER_MAX;
332 : }
333 :
334 1129814 : if ( st->audio_frame_cnt < 0 )
335 : {
336 587825 : st->audio_frame_cnt = 0;
337 : }
338 :
339 : /*-----------------------------------------------------------------*
340 : * Set formant sharpening flag
341 : *-----------------------------------------------------------------*/
342 :
343 1129814 : st->sharpFlag = 0;
344 :
345 1129814 : if ( st->coder_type == GENERIC || st->coder_type == VOICED || st->coder_type == TRANSITION )
346 : {
347 991607 : if ( element_brate >= FRMT_SHP_MIN_BRATE_IVAS && st->lp_noise > FORMANT_SHARPENING_NOISE_THRESHOLD )
348 : {
349 69128 : st->sharpFlag = 0;
350 : }
351 : else
352 : {
353 922479 : st->sharpFlag = 1;
354 : }
355 : }
356 :
357 : /* channel-aware mode - due to lack of signaling bit, sharpFlag is 1 always in RF mode */
358 1129814 : if ( st->rf_mode && ( st->coder_type == VOICED || st->coder_type == GENERIC ) )
359 : {
360 0 : st->sharpFlag = 1;
361 : }
362 :
363 : /* TD stereo, secondary channel - due to lack of signaling bits, sharpFlag is always 1 */
364 1129814 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 )
365 : {
366 3791 : st->sharpFlag = 0;
367 3791 : if ( st->coder_type == GENERIC || st->coder_type == VOICED )
368 : {
369 3696 : st->sharpFlag = 1;
370 : }
371 : }
372 :
373 : /*-----------------------------------------------------------------*
374 : * Set voicing flag for HQ FEC
375 : *-----------------------------------------------------------------*/
376 :
377 1129814 : if ( st->sp_aud_decision1 == 0 && ( st->coder_type == VOICED || st->coder_type == GENERIC ) )
378 : {
379 461744 : *Voicing_flag = 1;
380 : }
381 : else
382 : {
383 668070 : *Voicing_flag = 0;
384 : }
385 :
386 : /*-----------------------------------------------------------------*
387 : * Compute core-coder buffers at internal sampling rate
388 : *-----------------------------------------------------------------*/
389 :
390 1129814 : sr_core_tmp = ( st->tcxonly == 0 ) ? INT_FS_16k : max( INT_FS_16k, st->sr_core ); /* indicates the ACELP sampling rate */
391 :
392 1129814 : L_look = NS2SA( sr_core_tmp, ACELP_LOOK_NS ); /* lookahead at other sampling rate (16kHz, 25.6kHz, 32kHz) */
393 :
394 1129814 : inp_16k = old_inp_16k + L_INP_MEM - L_look;
395 :
396 1129814 : if ( !flag_16k_smc )
397 : {
398 1103252 : error = ivas_compute_core_buffers( st, &inp_16k, old_inp_16k, new_inp_resamp16k, input_frame, last_element_mode, sr_core_tmp, ener, A, Aw, epsP, lsp_new, lsp_mid );
399 1103252 : if ( error != IVAS_ERR_OK )
400 : {
401 0 : return error;
402 : }
403 : }
404 :
405 1129814 : if ( !( st->L_frame == L_FRAME16k && element_mode != IVAS_CPE_MDCT ) )
406 : {
407 : /* update signal buffers */
408 966009 : mvr2r( new_inp_12k8, st->buf_speech_enc_pe + st->L_frame, L_FRAME );
409 966009 : mvr2r( st->buf_speech_enc + L_FRAME32k, st->buf_speech_enc + st->L_frame, L_FRAME );
410 :
411 966009 : if ( element_mode == IVAS_CPE_DFT )
412 : {
413 26401 : mvr2r( st->buf_speech_enc + L_FRAME32k - STEREO_DFT_OVL_12k8, st->buf_speech_enc + st->L_frame - STEREO_DFT_OVL_12k8, STEREO_DFT_OVL_12k8 );
414 : }
415 939608 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
416 : {
417 720243 : mvr2r( st->buf_speech_enc + L_FRAME32k - lMemRecalc_12k8 - L_FILT, st->buf_speech_enc + st->L_frame - lMemRecalc_12k8 - L_FILT, lMemRecalc_12k8 + L_FILT );
418 : }
419 219365 : else if ( element_mode == IVAS_SCE )
420 : {
421 219365 : mvr2r( st->buf_speech_enc + L_FRAME32k - L_FILT, st->buf_speech_enc + st->L_frame - L_FILT, L_FILT );
422 : }
423 :
424 966009 : if ( st->tcxonly == 0 )
425 : {
426 139926 : mvr2r( wsp, st->wspeech_enc, L_FRAME + L_LOOK_12k8 );
427 : }
428 : }
429 :
430 1129814 : if ( flag_16k_smc )
431 : {
432 26562 : mvr2r( st->buf_speech_enc + L_FRAME16k, new_inp_resamp16k, L_FRAME16k );
433 : }
434 :
435 : /*-----------------------------------------------------------------*
436 : * Updates
437 : *-----------------------------------------------------------------*/
438 :
439 : /* update old weighted speech buffer - for OL pitch analysis */
440 1129814 : mvr2r( &old_wsp[L_FRAME], st->old_wsp, L_WSP_MEM );
441 :
442 :
443 : /* set the pointer of the current frame for the ACELP core */
444 1129814 : if ( st->L_frame == L_FRAME )
445 : {
446 144839 : *inp = inp_12k8;
447 : }
448 : else
449 : {
450 984975 : *inp = inp_16k;
451 : }
452 :
453 : /* Update VAD hangover frame counter in active frames */
454 1129814 : if ( !( st->core_brate == SID_2k40 || st->core_brate == FRAME_NO_DATA ) && st->tcxonly == 0 )
455 : {
456 286909 : if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && vad_hover_flag )
457 : {
458 2254 : st->hTdCngEnc->burst_ho_cnt++;
459 2254 : if ( st->hTdCngEnc->burst_ho_cnt > HO_HIST_SIZE )
460 : {
461 44 : st->hTdCngEnc->burst_ho_cnt = HO_HIST_SIZE;
462 : }
463 : }
464 284655 : else if ( st->hTdCngEnc != NULL && vad_flag_dtx )
465 : {
466 30533 : st->hTdCngEnc->burst_ho_cnt = 0;
467 : }
468 : }
469 :
470 : #ifdef DEBUG_MODE_ACELP
471 : dbgwrite( inp_12k8, sizeof( float ), L_FRAME, 1, fname( debug_dir, "inp_12k8", st->idchan, st->id_element, ENC ) );
472 : dbgwrite( inp_16k, sizeof( float ), L_FRAME, 1, fname( debug_dir, "inp_16k", st->idchan, st->id_element, ENC ) );
473 : #endif
474 :
475 1129814 : pop_wmops();
476 1129814 : return error;
477 : }
478 :
479 :
480 : /*-------------------------------------------------------------------*
481 : * ivas_compute_core_buffers()
482 : *
483 : * Compute core-coder buffers at internal sampling rate
484 : *--------------------------------------------------------------------*/
485 :
486 1129814 : ivas_error ivas_compute_core_buffers(
487 : Encoder_State *st, /* i/o: encoder state structure */
488 : float **inp16k_out, /* o : ptr. to inp. signal in the current frame */
489 : float *old_inp_16k, /* i/o: buffer of old input signal @ 16kHz */
490 : float new_inp_resamp16k_out[], /* o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
491 : const int16_t input_frame, /* i : frame length */
492 : const int16_t last_element_mode, /* i : last element mode */
493 : const int32_t sr_core, /* i : core-coder sampling rate */
494 : float *ener, /* o : residual energy from Levinson-Durbin */
495 : float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */
496 : float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */
497 : float epsP[M + 1], /* i/o: LP prediction errors */
498 : float lsp_new[M], /* i/o: LSPs at the end of the frame */
499 : float lsp_mid[M] /* i/o: LSPs in the middle of the frame */
500 : )
501 : {
502 : float *inp_16k, *new_inp_16k;
503 : float tmp, mem_decim16k_dummy[2 * L_FILT_MAX];
504 : const float *signal_in;
505 : int16_t delay, element_mode;
506 : float temp1F_icatdmResampBuf[L_FILT_MAX]; /* temp buffers for ICA TDM resamplers */
507 : float new_inp_resamp16k[L_FRAME16k];
508 : int16_t lMemRecalc, lMemRecalc_16k, L_frame_tmp, L_look;
509 : int32_t input_Fs;
510 :
511 1129814 : signal_in = st->input;
512 :
513 1129814 : input_Fs = st->input_Fs;
514 1129814 : element_mode = st->element_mode;
515 :
516 1129814 : lMemRecalc_16k = 0;
517 1129814 : lMemRecalc = 0;
518 1129814 : if ( element_mode == IVAS_CPE_TD )
519 : {
520 7582 : lMemRecalc_16k = NS2SA( INT_FS_16k, L_MEM_RECALC_NS );
521 7582 : lMemRecalc = NS2SA( input_Fs, L_MEM_RECALC_NS );
522 : }
523 :
524 : /*---------------------------------------------------------------*
525 : * Preprocessing at other sampling frequency rate (16/25.6/32kHz)
526 : *----------------------------------------------------------------*/
527 :
528 1129814 : L_frame_tmp = ( st->tcxonly == 0 ) ? L_FRAME16k : max( L_FRAME16k, st->L_frame );
529 :
530 1129814 : L_look = NS2SA( sr_core, ACELP_LOOK_NS ); /* lookahead at other sampling rate (16kHz, 25.6kHz, 32kHz) */
531 :
532 1129814 : new_inp_16k = old_inp_16k + L_INP_MEM; /* pointer to new samples of the input signal in 16kHz core */
533 1129814 : inp_16k = new_inp_16k - L_look; /* pointer to the current frame of input signal in 16kHz core */
534 :
535 : /* shift the pointer back to take care of resampler memory update */
536 1129814 : if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_SCE )
537 : {
538 355365 : new_inp_16k -= NS2SA( sr_core, DELAY_FIR_RESAMPL_NS );
539 : }
540 :
541 1129814 : if ( element_mode == IVAS_CPE_DFT )
542 : {
543 59679 : mvr2r( st->old_inp_16k, old_inp_16k, L_INP_MEM - STEREO_DFT_OVL_16k );
544 : }
545 1070135 : else if ( element_mode == IVAS_CPE_TD )
546 : {
547 7582 : mvr2r( st->old_inp_16k, old_inp_16k, L_INP_MEM - L_MEM_RECALC_16K - L_FILT16k );
548 : }
549 : else
550 : {
551 1062553 : mvr2r( st->old_inp_16k, old_inp_16k, L_INP_MEM - L_FILT16k );
552 : }
553 :
554 : /*---------------------------------------------------------------*
555 : * Change the sampling frequency to 16/25.6/32 kHz
556 : *----------------------------------------------------------------*/
557 :
558 1129814 : if ( element_mode == IVAS_SCE )
559 : {
560 347783 : if ( input_Fs == sr_core )
561 : {
562 : /* no resampling needed, only delay adjustment to account for the FIR resampling delay */
563 6782 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
564 6782 : mvr2r( st->mem_decim16k + delay, new_inp_16k, delay );
565 6782 : mvr2r( signal_in, new_inp_16k + delay, input_frame );
566 6782 : mvr2r( signal_in + input_frame - 2 * delay, st->mem_decim16k, 2 * delay );
567 : }
568 341001 : else if ( input_Fs == 32000 || input_Fs == 48000 )
569 : {
570 341001 : modify_Fs( signal_in, input_frame, input_Fs, new_inp_16k, sr_core, st->mem_decim16k, 0 );
571 :
572 341001 : mvr2r( st->mem_decim16k, mem_decim16k_dummy, 2 * L_FILT_MAX );
573 341001 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
574 341001 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ), sr_core, mem_decim16k_dummy, 0 );
575 : }
576 : }
577 782031 : else if ( element_mode == IVAS_CPE_TD )
578 : {
579 7582 : if ( input_Fs == sr_core )
580 : {
581 : /* no resampling needed, only delay adjustment to account for the FIR resampling delay */
582 1490 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
583 1490 : mvr2r( st->mem_decim16k + delay, new_inp_16k - lMemRecalc - delay + L_FILT16k, delay );
584 1490 : mvr2r( signal_in - lMemRecalc, new_inp_16k - lMemRecalc + L_FILT16k, input_frame + lMemRecalc );
585 1490 : mvr2r( signal_in + input_frame - lMemRecalc - 2 * delay, st->mem_decim16k, 2 * delay );
586 : }
587 6092 : else if ( input_Fs == 32000 || input_Fs == 48000 )
588 : {
589 : /* reconstruct past segment of input signal when switching from MDCT stereo */
590 6092 : if ( last_element_mode == IVAS_CPE_MDCT /*|| st->idchan == 1*/ )
591 : {
592 16 : int16_t length_inp = NS2SA( input_Fs, L_MEM_RECALC_SCH_NS - DELAY_FIR_RESAMPL_NS );
593 16 : int16_t length_16k = NS2SA( INT_FS_16k, L_MEM_RECALC_SCH_NS - DELAY_FIR_RESAMPL_NS );
594 :
595 16 : mvr2r( signal_in - lMemRecalc - length_inp - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim16k, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
596 16 : modify_Fs( signal_in - lMemRecalc - length_inp, length_inp, input_Fs, new_inp_16k - lMemRecalc_16k - length_16k, sr_core, st->mem_decim16k, 0 );
597 : }
598 :
599 6092 : modify_Fs( signal_in - lMemRecalc, input_frame, input_Fs, new_inp_16k - ( lMemRecalc * sr_core ) / st->input_Fs, sr_core, st->mem_decim16k, 0 );
600 6092 : mvr2r( st->mem_decim16k, mem_decim16k_dummy, 2 * L_FILT_MAX );
601 :
602 6092 : if ( lMemRecalc > 0 )
603 : {
604 6092 : modify_Fs( signal_in - lMemRecalc + input_frame, lMemRecalc, input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ) - ( lMemRecalc * sr_core ) / st->input_Fs, sr_core, mem_decim16k_dummy, 0 );
605 : }
606 :
607 6092 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
608 6092 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ), sr_core, mem_decim16k_dummy, 0 );
609 : }
610 : #ifdef DEBUGGING
611 : else
612 : {
613 : return ( IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "Wrong internal sampling rate. Exiting..." ) );
614 : }
615 : #endif
616 : }
617 774449 : else if ( st->idchan == 0 )
618 : {
619 : /* update the FIR resampling filter memory, needed for switching to time-domain (FIR) resampling */
620 417064 : mvr2r( signal_in + input_frame - NS2SA( input_Fs, L_MEM_RECALC_NS ) - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim16k, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
621 : }
622 :
623 : /*------------------------------------------------*
624 : * Update BWE memories *
625 : *------------------------------------------------*/
626 :
627 1129814 : if ( sr_core == INT_FS_16k && element_mode != IVAS_CPE_MDCT )
628 : {
629 308643 : delay = NS2SA( INT_FS_16k, DELAY_FD_BWE_ENC_12k8_NS );
630 :
631 308643 : if ( element_mode == IVAS_CPE_DFT )
632 : {
633 : /* save input resampled at 16kHz, non-preemphasised.*/
634 59679 : mvr2r( new_inp_16k, new_inp_resamp16k, L_FRAME16k );
635 :
636 59679 : if ( st->bwidth == WB )
637 : {
638 12915 : mvr2r( new_inp_16k - delay, st->hBWE_FD->old_input_wb, delay );
639 12915 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k, st->hBWE_FD->old_wtda_swb + L_FRAME16k - STEREO_DFT_OVL_16k + delay, STEREO_DFT_OVL_16k - delay );
640 : }
641 : }
642 248964 : else if ( element_mode == IVAS_CPE_TD )
643 : {
644 : /* save input resampled at 16kHz, non-preemphasised */
645 7582 : mvr2r( new_inp_16k + L_FILT16k, new_inp_resamp16k, L_FRAME16k );
646 :
647 7582 : if ( st->bwidth == WB && st->hBWE_FD != NULL )
648 : {
649 1589 : mvr2r( new_inp_16k + L_FILT16k - delay, st->hBWE_FD->old_input_wb, delay );
650 1589 : mvr2r( new_inp_16k - L_MEM_RECALC_16K, st->hBWE_FD->old_wtda_swb + L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k + delay, L_MEM_RECALC_16K + L_FILT16k - delay );
651 : }
652 : }
653 241382 : else if ( element_mode == IVAS_SCE )
654 : {
655 : /* save input resampled at 16kHz, non-preemphasised */
656 241382 : mvr2r( new_inp_16k + L_FILT16k, new_inp_resamp16k, L_FRAME16k );
657 :
658 241382 : if ( st->bwidth == WB )
659 : {
660 29218 : mvr2r( new_inp_16k, st->hBWE_FD->old_input_wb + delay - L_FILT16k, L_FILT16k );
661 : /* all buffer st->hBWE_FD->old_wtda_swb is correct and does not need to be updated */
662 : }
663 : }
664 : }
665 821171 : else if ( sr_core > INT_FS_16k )
666 : {
667 : /* reset the buffer, the signal is needed for WB BWEs */
668 746331 : set_f( new_inp_resamp16k, 0.0f, L_FRAME16k );
669 : }
670 :
671 : /*------------------------------------------------------------------*
672 : * Perform fixed preemphasis (16kHz signal) through 1 - g*z^-1
673 : *-----------------------------------------------------------------*/
674 :
675 1129814 : if (
676 1129814 : st->tcxonly == 0 &&
677 302218 : !( ( element_mode == IVAS_CPE_TD && st->idchan == 1 ) || element_mode == IVAS_CPE_MDCT ) )
678 : {
679 298554 : st->mem_preemph_enc = new_inp_16k[L_frame_tmp - 1];
680 : }
681 :
682 1129814 : if ( input_Fs > 8000 && sr_core == INT_FS_16k && element_mode != IVAS_CPE_MDCT )
683 : {
684 308643 : if ( element_mode == IVAS_CPE_DFT )
685 : {
686 59679 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k + L_FRAME16k, st->inp_16k_mem_stereo_sw, STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ); /* update for TD/DFT stereo switching */
687 :
688 59679 : st->mem_preemph16k = st->mem_preemph16k_DFT;
689 59679 : st->mem_preemph16k_DFT = old_inp_16k[L_INP_MEM - STEREO_DFT_OVL_16k + L_FRAME16k - 1];
690 :
691 59679 : if ( st->L_frame == L_FRAME16k )
692 : {
693 33278 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k, st->buf_speech_enc + L_FRAME16k - STEREO_DFT_OVL_16k, L_FRAME16k + STEREO_DFT_OVL_16k );
694 : }
695 59679 : preemph( new_inp_16k - STEREO_DFT_OVL_16k, PREEMPH_FAC_16k, L_FRAME16k, &( st->mem_preemph16k ) );
696 59679 : tmp = st->mem_preemph16k;
697 59679 : preemph( new_inp_16k - STEREO_DFT_OVL_16k + L_FRAME16k, PREEMPH_FAC_16k, STEREO_DFT_OVL_16k, &tmp );
698 : }
699 248964 : else if ( st->element_mode == IVAS_CPE_TD )
700 : {
701 7582 : if ( last_element_mode == IVAS_CPE_DFT )
702 : {
703 102 : st->mem_preemph16k = st->mem_preemph16k_DFT;
704 102 : mvr2r( st->inp_16k_mem_stereo_sw, new_inp_16k - L_MEM_RECALC_16K - ( STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ), STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k );
705 102 : preemph( new_inp_16k - L_MEM_RECALC_16K - ( STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ), PREEMPH_FAC_16k, STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k, &st->mem_preemph16k );
706 : }
707 :
708 7582 : st->mem_preemph16k_DFT = old_inp_16k[L_INP_MEM - STEREO_DFT_OVL_16k + L_FRAME16k - 1];
709 :
710 : /* preemphasise past segment of input signal when switching from MDCT stereo */
711 7582 : if ( last_element_mode == IVAS_CPE_MDCT )
712 : {
713 16 : int16_t length_16k = NS2SA( INT_FS_16k, L_MEM_RECALC_SCH_NS - DELAY_FIR_RESAMPL_NS );
714 16 : preemph( new_inp_16k - lMemRecalc_16k - length_16k, PREEMPH_FAC, length_16k, &st->mem_preemph16k );
715 : }
716 :
717 7582 : if ( st->L_frame == L_FRAME16k )
718 : {
719 2109 : mvr2r( new_inp_16k - lMemRecalc_16k, st->buf_speech_enc + L_FRAME16k - lMemRecalc_16k - L_FILT16k, L_FRAME16k + lMemRecalc_16k + L_FILT16k );
720 : }
721 7582 : preemph( new_inp_16k - lMemRecalc_16k, PREEMPH_FAC_16k, L_FRAME16k, &( st->mem_preemph16k ) );
722 7582 : tmp = st->mem_preemph16k;
723 7582 : preemph( new_inp_16k - lMemRecalc_16k + L_FRAME16k, PREEMPH_FAC_16k, lMemRecalc_16k + L_FILT16k, &tmp );
724 : }
725 241382 : else if ( element_mode == IVAS_SCE )
726 : {
727 241382 : preemph( new_inp_16k, PREEMPH_FAC_16k, L_FRAME16k, &( st->mem_preemph16k ) );
728 241382 : tmp = st->mem_preemph16k;
729 241382 : preemph( new_inp_16k + L_FRAME16k, PREEMPH_FAC_16k, L_FILT16k, &tmp );
730 : }
731 : }
732 821171 : else if ( input_Fs > 8000 ) /* keep memory up-to-date in case of bitrate switching */
733 : {
734 821171 : if ( element_mode == IVAS_CPE_DFT )
735 : {
736 0 : st->mem_preemph16k = new_inp_16k[L_frame_tmp - STEREO_DFT_OVL_16k - 1];
737 : }
738 821171 : else if ( element_mode == IVAS_CPE_TD )
739 : {
740 0 : st->mem_preemph16k = new_inp_16k[L_frame_tmp - lMemRecalc_16k - 1];
741 : }
742 821171 : else if ( element_mode == IVAS_CPE_MDCT )
743 : {
744 714770 : st->mem_preemph16k = 0;
745 : }
746 : else /* SCE */
747 : {
748 106401 : st->mem_preemph16k = new_inp_16k[L_frame_tmp - 1];
749 : }
750 821171 : st->mem_preemph16k_DFT = st->mem_preemph16k;
751 : }
752 :
753 : /*-----------------------------------------------------------------*
754 : * LP analysis at 16kHz if ACELP@16k core was selected
755 : * update buffers
756 : *-----------------------------------------------------------------*/
757 :
758 1129814 : if ( st->L_frame == L_FRAME16k && element_mode != IVAS_CPE_MDCT )
759 : {
760 : /* update signal buffers */
761 163805 : if ( element_mode == IVAS_CPE_DFT )
762 : {
763 33278 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k, st->buf_speech_enc_pe + L_FRAME16k - STEREO_DFT_OVL_16k, L_FRAME16k + STEREO_DFT_OVL_16k );
764 : }
765 130527 : else if ( element_mode == IVAS_CPE_TD )
766 : {
767 2109 : mvr2r( new_inp_16k - lMemRecalc_16k, st->buf_speech_enc_pe + L_FRAME16k - lMemRecalc_16k - L_FILT16k, L_FRAME16k + lMemRecalc_16k + L_FILT16k );
768 : }
769 : else
770 : {
771 128418 : mvr2r( new_inp_resamp16k, st->buf_speech_enc + L_FRAME16k, L_FRAME16k );
772 128418 : mvr2r( new_inp_16k, st->buf_speech_enc_pe + L_FRAME16k, L_FRAME16k );
773 : }
774 :
775 : /*--------------------------------------------------------------*
776 : * LPC analysis
777 : *---------------------------------------------------------------*/
778 :
779 163805 : if ( st->last_L_frame == L_FRAME )
780 : {
781 : /* this is just an approximation, but it is sufficient */
782 1027 : mvr2r( st->lsp_old1, st->lspold_enc, M );
783 : }
784 :
785 163805 : analy_lp( inp_16k, L_FRAME16k, L_look, ener, A, epsP, lsp_new, lsp_mid, st->lspold_enc, st->pitch, st->voicing, INT_FS_16k, 0 );
786 :
787 : /*--------------------------------------------------------------*
788 : * Compute Weighted Input
789 : *---------------------------------------------------------------*/
790 :
791 163805 : find_wsp( L_FRAME16k, L_SUBFR, NB_SUBFR16k, A, Aw, st->speech_enc_pe, PREEMPH_FAC_16k, st->wspeech_enc, &st->mem_wsp_enc, st->gamma, L_LOOK_16k );
792 : }
793 :
794 : /*-----------------------------------------------------------------*
795 : * Updates
796 : *-----------------------------------------------------------------*/
797 :
798 : /* update old input signal @16kHz buffer */
799 1129814 : if ( ( element_mode == IVAS_CPE_TD && st->idchan == 1 ) || element_mode == IVAS_CPE_MDCT )
800 : {
801 718561 : set_f( st->old_inp_16k, 0, L_INP_MEM );
802 : }
803 411253 : else if ( input_Fs > 8000 && sr_core == INT_FS_16k )
804 : {
805 304852 : mvr2r( &old_inp_16k[L_frame_tmp], st->old_inp_16k, L_INP_MEM );
806 : }
807 106401 : else if ( input_Fs > 8000 )
808 : {
809 106401 : lerp( st->old_inp_12k8 + L_INP_MEM - L_INP_MEM * 4 / 5, st->old_inp_16k, L_INP_MEM, L_INP_MEM * 4 / 5 );
810 : }
811 :
812 1129814 : if ( inp16k_out != NULL )
813 : {
814 1103252 : *inp16k_out = inp_16k;
815 : }
816 :
817 1129814 : if ( new_inp_resamp16k_out != NULL )
818 : {
819 1103252 : mvr2r( new_inp_resamp16k, new_inp_resamp16k_out, L_FRAME16k );
820 : }
821 :
822 1129814 : return IVAS_ERR_OK;
823 : }
|