Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2026 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include "options.h"
35 : #ifdef DEBUGGING
36 : #include "debug.h"
37 : #endif
38 : #include "cnst.h"
39 : #include "ivas_cnst.h"
40 : #include "ivas_prot.h"
41 : #include "rom_enc.h"
42 : #include "rom_com.h"
43 : #include "prot.h"
44 : #include "wmc_auto.h"
45 :
46 :
47 : /*-------------------------------------------------------------------*
48 : * pre_proc_ivas()
49 : *
50 : * Pre-processing (Selection of internal Fs, classification, SC VBR decision,
51 : * Decision matrix, Preprocessing at other Fs, core switching decision, ...)
52 : *--------------------------------------------------------------------*/
53 :
54 : #ifdef FIX_2344_ALIGN_PREPROC
55 3098934 : void pre_proc_ivas(
56 : #else
57 : ivas_error pre_proc_ivas(
58 : #endif
59 : Encoder_State *st, /* i/o: encoder state structure */
60 : const int16_t last_element_mode, /* i : last element mode */
61 : const int32_t element_brate, /* i : element bitrate */
62 : const int32_t last_element_brate, /* i : last element bitrate */
63 : const int16_t input_frame, /* i : frame length */
64 : float old_inp_12k8[], /* i/o: buffer of old input signal */
65 : float old_inp_16k[], /* i/o: buffer of old input signal @ 16kHz */
66 : float **inp, /* o : ptr. to inp. signal in the current frame*/
67 : float *ener, /* o : residual energy from Levinson-Durbin */
68 : float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */
69 : float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */
70 : float epsP[M + 1], /* i/o: LP prediction errors */
71 : float lsp_new[M], /* i/o: LSPs at the end of the frame */
72 : float lsp_mid[M], /* i/o: LSPs in the middle of the frame */
73 : float *new_inp_resamp16k, /* o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
74 : int16_t *Voicing_flag, /* o : voicing flag for HQ FEC */
75 : const float old_wsp[], /* i : weighted input signal buffer */
76 : const int16_t loc_harm, /* i : harmonicity flag */
77 : const float cor_map_sum, /* i : speech/music clasif. parameter */
78 : const int16_t vad_flag_dtx, /* i : HE-SAD flag with additional DTX HO */
79 : const float enerBuffer[CLDFB_NO_CHANNELS_MAX], /* i : energy buffer */
80 : const float fft_buff[2 * L_FFT], /* i : FFT buffer */
81 : const int16_t MCT_flag, /* i : hMCT handle allocated (1) or not (0) */
82 : const int16_t vad_hover_flag, /* i : VAD hangover flag */
83 : const int16_t flag_16k_smc /* i : flag to indicate if the OL SMC is run at 16 kHz */
84 : )
85 : {
86 : int16_t L_look, element_mode, lMemRecalc_12k8;
87 : float *inp_12k8, *new_inp_12k8, *inp_16k; /* pointers to current frame and new data */
88 : const float *wsp; /* weighted input signal buffer */
89 : int32_t sr_core_tmp, total_brate_tmp;
90 : #ifndef FIX_2344_ALIGN_PREPROC
91 : ivas_error error;
92 : #endif
93 :
94 3098934 : push_wmops( "pre_proc" );
95 :
96 : #ifndef FIX_2344_ALIGN_PREPROC
97 : error = IVAS_ERR_OK;
98 : #endif
99 : /*----------------------------------------------------------------*
100 : * Initialization
101 : *----------------------------------------------------------------*/
102 :
103 3098934 : element_mode = st->element_mode;
104 :
105 3098934 : new_inp_12k8 = old_inp_12k8 + L_INP_MEM; /* pointer to new samples of the input signal in 12.8kHz core */
106 3098934 : inp_12k8 = new_inp_12k8 - L_LOOK_12k8;
107 :
108 3098934 : if ( element_mode != IVAS_CPE_DFT )
109 : {
110 2964309 : new_inp_12k8 -= L_FILT;
111 : }
112 :
113 3098934 : wsp = old_wsp + L_WSP_MEM; /* pointer to the current frame of weighted signal in 12.8kHz core */
114 :
115 3098934 : lMemRecalc_12k8 = 0;
116 3098934 : if ( element_mode == IVAS_CPE_TD )
117 : {
118 9182 : lMemRecalc_12k8 = NS2SA( INT_FS_12k8, L_MEM_RECALC_NS );
119 : }
120 :
121 : /*----------------------------------------------------------------*
122 : * Selection of internal ACELP Fs (12.8 kHz or 16 kHz)
123 : *----------------------------------------------------------------*/
124 :
125 3098934 : if ( st->core_brate == FRAME_NO_DATA )
126 : {
127 : /* prevent "L_frame" changes in CNG segments */
128 21246 : st->L_frame = st->last_L_frame;
129 : }
130 3077688 : else if ( st->core_brate == SID_2k40 && st->bwidth >= WB && st->hDtxEnc->first_CNG && ( st->hTdCngEnc != NULL && st->hTdCngEnc->act_cnt2 < MIN_ACT_CNG_UPD ) )
131 : {
132 : /* prevent "L_frame" changes in SID frame after short segment of active frames */
133 1473 : st->L_frame = st->hDtxEnc->last_CNG_L_frame;
134 : }
135 3076215 : else if ( ( ( st->element_mode == IVAS_CPE_MDCT && st->element_brate >= IVAS_64k && st->bwidth >= SWB ) || ( element_mode == IVAS_SCE && st->total_brate > MAX_ACELP_BRATE && st->bwidth >= SWB ) ) && st->core_brate != SID_2k40 )
136 : {
137 1938860 : st->L_frame = L_FRAME32k;
138 : }
139 1137355 : else if ( st->bwidth >= SWB && st->total_brate > MAX_ACELP_BRATE_ISM && st->total_brate <= MAX_ACELP_BRATE && element_mode == IVAS_SCE && st->is_ism_format && st->tcxonly && st->core_brate != SID_2k40 )
140 : {
141 19040 : st->L_frame = L_FRAME25_6k;
142 : }
143 1118315 : else if ( st->flag_ACELP16k )
144 : {
145 841623 : st->L_frame = L_FRAME16k;
146 : }
147 : else
148 : {
149 276692 : st->L_frame = L_FRAME;
150 : }
151 :
152 3098934 : if ( st->hFdCngEnc != NULL && st->element_mode != IVAS_CPE_MDCT && ( ( st->hFdCngEnc->hFdCngCom->frameSize != st->L_frame ) || ( st->hFdCngEnc->hFdCngCom->CngBandwidth != st->input_bwidth ) ) )
153 : {
154 60364 : configureFdCngEnc( st->hFdCngEnc, max( st->input_bwidth, WB ), st->L_frame == L_FRAME16k ? ACELP_16k40 : ACELP_9k60 );
155 : }
156 :
157 3098934 : if ( st->ini_frame == 0 )
158 : {
159 : /* avoid switching of internal ACELP Fs in the very first frame */
160 32855 : st->last_L_frame = st->L_frame;
161 : }
162 :
163 3098934 : if ( st->L_frame == L_FRAME )
164 : {
165 284274 : st->gamma = GAMMA1;
166 284274 : st->preemph_fac = PREEMPH_FAC;
167 : }
168 2814660 : else if ( st->L_frame == L_FRAME32k )
169 : {
170 1938860 : st->gamma = GAMMA16k;
171 1938860 : st->preemph_fac = PREEMPH_FAC_SWB;
172 : }
173 : else
174 : {
175 875800 : st->gamma = GAMMA16k;
176 875800 : st->preemph_fac = PREEMPH_FAC_16k;
177 : }
178 :
179 3098934 : st->sr_core = st->L_frame * FRAMES_PER_SEC;
180 3098934 : st->encoderLookahead_enc = NS2SA( st->sr_core, ACELP_LOOK_NS );
181 3098934 : st->encoderPastSamples_enc = ( st->L_frame * 9 ) >> 4;
182 :
183 :
184 : /*-----------------------------------------------------------------*
185 : * coder_type rewriting in case of switching
186 : * IC frames selection
187 : * enforce TC frames in case of switching
188 : *-----------------------------------------------------------------*/
189 :
190 : /* enforce TRANSITION frames */
191 3098934 : if ( !( st->element_mode == IVAS_CPE_TD && st->idchan == 1 ) && st->last_L_frame != st->L_frame && st->core_brate != FRAME_NO_DATA && st->core_brate != SID_2k40 && st->last_core_brate != FRAME_NO_DATA && st->last_core_brate != SID_2k40 && st->coder_type_raw != VOICED )
192 : {
193 : /* enforce TC frame in case of ACELP@12k8 <-> ACELP@16k core switching */
194 612958 : st->coder_type = TRANSITION;
195 : }
196 2485976 : else if ( st->last_core == HQ_CORE && st->coder_type_raw != VOICED )
197 : {
198 : /* enforce TC frame in case of HQ -> ACELP core switching */
199 9055 : st->coder_type = TRANSITION;
200 : }
201 2476921 : else if ( st->last_core_brate <= SID_2k40 && st->cng_type == FD_CNG && !( element_mode == IVAS_CPE_TD ) )
202 : {
203 : /* enforce TC frame in case of FD_CNG -> ACELP switching (past excitation not available) */
204 23687 : st->coder_type = TRANSITION;
205 : }
206 : /* select INACTIVE frames */
207 2453234 : else if ( st->total_brate <= MAX_GSC_INACTIVE_BRATE && st->vad_flag == 0 && st->element_mode != IVAS_CPE_MDCT )
208 : {
209 : /* inactive frames will be coded by GSC technology */
210 : /* except for the VBR mode. VBR mode uses NELP for that */
211 29355 : if ( !( st->Opt_SC_VBR && vad_flag_dtx ) && ( st->idchan == 0 || element_mode != IVAS_CPE_TD ) )
212 : {
213 29337 : st->coder_type = INACTIVE;
214 29337 : st->hGSCEnc->noise_lev = NOISE_LEVEL_SP3;
215 : }
216 : }
217 2423879 : else if ( st->total_brate > MAX_GSC_INACTIVE_BRATE && ( ( st->vad_flag == 0 && st->bwidth >= SWB && st->max_bwidth >= SWB ) || ( st->localVAD == 0 && ( st->bwidth <= WB || st->max_bwidth <= WB ) ) ) )
218 : {
219 : /* inactive frames will be coded by AVQ technology (exceptionally it can be later rewritten to GSC technology in ivas_combined_format_brate_sanity()) */
220 207190 : st->coder_type = INACTIVE;
221 : }
222 :
223 : /*---------------------------------------------------------------------*
224 : * Decision matrix (selection of technologies)
225 : *---------------------------------------------------------------------*/
226 :
227 3098934 : st->mdct_sw = MODE1;
228 3098934 : st->mdct_sw_enable = MODE1;
229 3098934 : if ( ( st->total_brate <= MIN_BRATE_GSC_NOISY_FLAG || st->bwidth < SWB || st->flag_ACELP16k ) && st->GSC_IVAS_mode == 0 )
230 : {
231 2933307 : st->GSC_noisy_speech = 0;
232 : }
233 :
234 : /* core selection */
235 3098934 : ivas_decision_matrix_enc( st, element_brate, fft_buff, enerBuffer, last_element_mode );
236 :
237 3098934 : if ( st->L_frame == L_FRAME16k && ( st->coder_type == VOICED || st->coder_type == UNVOICED ) ) /* VOICED and UNVOICED are not supported in ACELP@16k */
238 : {
239 0 : st->coder_type = GENERIC;
240 : }
241 :
242 3098934 : if ( st->core == TCX_20_CORE || st->core == HQ_CORE )
243 : {
244 2755014 : st->Nb_ACELP_frames = 0;
245 : /* Configure TCX with the same bitrate as given when (re-)initializing TCX */
246 2755014 : total_brate_tmp = st->total_brate;
247 2755014 : st->total_brate = st->bits_frame_nominal * FRAMES_PER_SEC;
248 2755014 : SetModeIndex( st, st->last_bits_frame_nominal * FRAMES_PER_SEC, last_element_mode, MCT_flag );
249 :
250 2755014 : st->sr_core = getCoreSamplerateMode2( element_mode, st->total_brate, st->bwidth, st->flag_ACELP16k, st->rf_mode, st->is_ism_format );
251 2755014 : st->total_brate = total_brate_tmp;
252 :
253 2755014 : st->L_frame = (int16_t) ( st->sr_core / FRAMES_PER_SEC );
254 2755014 : st->encoderLookahead_enc = NS2SA( st->sr_core, ACELP_LOOK_NS );
255 2755014 : st->encoderPastSamples_enc = ( st->L_frame * 9 ) >> 4;
256 :
257 2755014 : if ( st->sr_core == INT_FS_12k8 )
258 : {
259 106557 : st->preemph_fac = PREEMPH_FAC;
260 106557 : st->gamma = GAMMA1;
261 : }
262 2648457 : else if ( st->sr_core == INT_FS_16k )
263 : {
264 385966 : st->preemph_fac = PREEMPH_FAC_16k;
265 385966 : st->gamma = GAMMA16k;
266 : }
267 : else /* st->sr_core >=25600 */
268 : {
269 2262491 : st->preemph_fac = PREEMPH_FAC_SWB;
270 2262491 : st->gamma = GAMMA16k;
271 : }
272 :
273 2755014 : if ( st->vad_flag == 0 )
274 : {
275 340523 : st->coder_type = INACTIVE;
276 : }
277 2414491 : else if ( st->coder_type > GENERIC )
278 : {
279 589161 : st->coder_type = GENERIC;
280 : }
281 2755014 : if ( st->element_mode != IVAS_CPE_MDCT )
282 : {
283 537246 : SetTCXModeInfo( st, st->hTranDet, &st->hTcxCfg->tcx_curr_overlap_mode );
284 : }
285 : }
286 343920 : else if ( st->element_mode == IVAS_CPE_MDCT )
287 : {
288 8870 : st->hTcxEnc->tfm_mem = 0.75f;
289 : }
290 335050 : else if ( element_brate != last_element_brate )
291 : {
292 52 : if ( st->core_brate != FRAME_NO_DATA )
293 : {
294 52 : SetModeIndex( st, st->bits_frame_nominal * FRAMES_PER_SEC, element_mode, MCT_flag );
295 : }
296 :
297 52 : if ( st->extl != -1 && st->extl != IGF_BWE && st->igf == 1 )
298 : {
299 52 : st->igf = 0;
300 : }
301 : }
302 :
303 : /*-----------------------------------------------------------------*
304 : * Update of ACELP harmonicity counter (used in ACELP transform codebook @32kbps)
305 : *-----------------------------------------------------------------*/
306 :
307 3098934 : if ( loc_harm == 1 && cor_map_sum > 50 && st->clas == VOICED_CLAS && st->coder_type == GENERIC )
308 : {
309 515862 : st->last_harm_flag_acelp++;
310 :
311 515862 : if ( st->last_harm_flag_acelp > 10 )
312 : {
313 82421 : st->last_harm_flag_acelp = 10;
314 : }
315 : }
316 : else
317 : {
318 2583072 : st->last_harm_flag_acelp = 0;
319 : }
320 :
321 : /*-----------------------------------------------------------------*
322 : * Update audio frames counter (used for UV decision)
323 : *-----------------------------------------------------------------*/
324 :
325 3098934 : if ( st->coder_type == AUDIO )
326 : {
327 24434 : st->audio_frame_cnt += AUDIO_COUNTER_STEP;
328 : }
329 3074500 : else if ( st->coder_type != INACTIVE )
330 : {
331 2705745 : st->audio_frame_cnt--;
332 : }
333 :
334 3098934 : if ( st->audio_frame_cnt > AUDIO_COUNTER_MAX )
335 : {
336 5749 : st->audio_frame_cnt = AUDIO_COUNTER_MAX;
337 : }
338 :
339 3098934 : if ( st->audio_frame_cnt < 0 )
340 : {
341 889323 : st->audio_frame_cnt = 0;
342 : }
343 :
344 : /*-----------------------------------------------------------------*
345 : * Set formant sharpening flag
346 : *-----------------------------------------------------------------*/
347 :
348 3098934 : st->sharpFlag = 0;
349 :
350 3098934 : if ( st->coder_type == GENERIC || st->coder_type == VOICED || st->coder_type == TRANSITION )
351 : {
352 2699136 : if ( element_brate >= FRMT_SHP_MIN_BRATE_IVAS && st->lp_noise > FORMANT_SHARPENING_NOISE_THRESHOLD )
353 : {
354 123820 : st->sharpFlag = 0;
355 : }
356 : else
357 : {
358 2575316 : st->sharpFlag = 1;
359 : }
360 : }
361 :
362 : #ifndef FIX_2344_ALIGN_PREPROC
363 : /* channel-aware mode - due to lack of signaling bit, sharpFlag is 1 always in RF mode */
364 : if ( st->rf_mode && ( st->coder_type == VOICED || st->coder_type == GENERIC ) )
365 : {
366 : st->sharpFlag = 1;
367 : }
368 : #endif
369 : /* TD stereo, secondary channel - due to lack of signaling bits, sharpFlag is always 1 */
370 3098934 : if ( element_mode == IVAS_CPE_TD && st->idchan == 1 )
371 : {
372 4591 : st->sharpFlag = 0;
373 4591 : if ( st->coder_type == GENERIC || st->coder_type == VOICED )
374 : {
375 4478 : st->sharpFlag = 1;
376 : }
377 : }
378 :
379 : /*-----------------------------------------------------------------*
380 : * Set voicing flag for HQ FEC
381 : *-----------------------------------------------------------------*/
382 :
383 3098934 : if ( st->sp_aud_decision1 == 0 && ( st->coder_type == VOICED || st->coder_type == GENERIC ) )
384 : {
385 1534728 : *Voicing_flag = 1;
386 : }
387 : else
388 : {
389 1564206 : *Voicing_flag = 0;
390 : }
391 :
392 : /*-----------------------------------------------------------------*
393 : * Compute core-coder buffers at internal sampling rate
394 : *-----------------------------------------------------------------*/
395 :
396 3098934 : sr_core_tmp = ( st->tcxonly == 0 ) ? INT_FS_16k : max( INT_FS_16k, st->sr_core ); /* indicates the ACELP sampling rate */
397 :
398 3098934 : L_look = NS2SA( sr_core_tmp, ACELP_LOOK_NS ); /* lookahead at other sampling rate (16kHz, 25.6kHz, 32kHz) */
399 :
400 3098934 : inp_16k = old_inp_16k + L_INP_MEM - L_look;
401 :
402 3098934 : if ( !flag_16k_smc )
403 : {
404 : #ifdef FIX_2344_ALIGN_PREPROC
405 3067420 : ivas_compute_core_buffers( st, &inp_16k, old_inp_16k, new_inp_resamp16k, input_frame, last_element_mode, sr_core_tmp, ener, A, Aw, epsP, lsp_new, lsp_mid );
406 : #else
407 : error = ivas_compute_core_buffers( st, &inp_16k, old_inp_16k, new_inp_resamp16k, input_frame, last_element_mode, sr_core_tmp, ener, A, Aw, epsP, lsp_new, lsp_mid );
408 : if ( error != IVAS_ERR_OK )
409 : {
410 : return error;
411 : }
412 : #endif
413 : }
414 :
415 3098934 : if ( !( st->L_frame == L_FRAME16k && element_mode != IVAS_CPE_MDCT ) )
416 : {
417 : /* update signal buffers */
418 2718510 : mvr2r( new_inp_12k8, st->buf_speech_enc_pe + st->L_frame, L_FRAME );
419 2718510 : mvr2r( st->buf_speech_enc + L_FRAME32k, st->buf_speech_enc + st->L_frame, L_FRAME );
420 :
421 2718510 : if ( element_mode == IVAS_CPE_DFT )
422 : {
423 74290 : mvr2r( st->buf_speech_enc + L_FRAME32k - STEREO_DFT_OVL_12k8, st->buf_speech_enc + st->L_frame - STEREO_DFT_OVL_12k8, STEREO_DFT_OVL_12k8 );
424 : }
425 2644220 : else if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_CPE_MDCT )
426 : {
427 2233026 : mvr2r( st->buf_speech_enc + L_FRAME32k - lMemRecalc_12k8 - L_FILT, st->buf_speech_enc + st->L_frame - lMemRecalc_12k8 - L_FILT, lMemRecalc_12k8 + L_FILT );
428 : }
429 411194 : else if ( element_mode == IVAS_SCE )
430 : {
431 411194 : mvr2r( st->buf_speech_enc + L_FRAME32k - L_FILT, st->buf_speech_enc + st->L_frame - L_FILT, L_FILT );
432 : }
433 :
434 2718510 : if ( st->tcxonly == 0 )
435 : {
436 275037 : mvr2r( wsp, st->wspeech_enc, L_FRAME + L_LOOK_12k8 );
437 : }
438 : }
439 :
440 3098934 : if ( flag_16k_smc )
441 : {
442 31514 : mvr2r( st->buf_speech_enc + L_FRAME16k, new_inp_resamp16k, L_FRAME16k );
443 : }
444 :
445 : /*-----------------------------------------------------------------*
446 : * Updates
447 : *-----------------------------------------------------------------*/
448 :
449 : /* update old weighted speech buffer - for OL pitch analysis */
450 3098934 : mvr2r( &old_wsp[L_FRAME], st->old_wsp, L_WSP_MEM );
451 :
452 : /* set the pointer of the current frame for the ACELP core */
453 3098934 : if ( st->L_frame == L_FRAME )
454 : {
455 284274 : *inp = inp_12k8;
456 : }
457 : else
458 : {
459 2814660 : *inp = inp_16k;
460 : }
461 :
462 : /* Update VAD hangover frame counter in active frames */
463 3098934 : if ( !( st->core_brate == SID_2k40 || st->core_brate == FRAME_NO_DATA ) && st->tcxonly == 0 )
464 : {
465 623116 : if ( st->hTdCngEnc != NULL && st->Opt_DTX_ON && vad_hover_flag )
466 : {
467 2443 : st->hTdCngEnc->burst_ho_cnt++;
468 2443 : if ( st->hTdCngEnc->burst_ho_cnt > HO_HIST_SIZE )
469 : {
470 47 : st->hTdCngEnc->burst_ho_cnt = HO_HIST_SIZE;
471 : }
472 : }
473 620673 : else if ( st->hTdCngEnc != NULL && vad_flag_dtx )
474 : {
475 38354 : st->hTdCngEnc->burst_ho_cnt = 0;
476 : }
477 : }
478 :
479 : #ifdef DEBUG_MODE_ACELP
480 : dbgwrite( inp_12k8, sizeof( float ), L_FRAME, 1, fname( debug_dir, "inp_12k8", st->idchan, st->id_element, ENC ) );
481 : dbgwrite( inp_16k, sizeof( float ), L_FRAME, 1, fname( debug_dir, "inp_16k", st->idchan, st->id_element, ENC ) );
482 : #endif
483 :
484 3098934 : pop_wmops();
485 : #ifdef FIX_2344_ALIGN_PREPROC
486 3098934 : return;
487 : #else
488 : return error;
489 : #endif
490 : }
491 :
492 :
493 : /*-------------------------------------------------------------------*
494 : * ivas_compute_core_buffers()
495 : *
496 : * Compute core-coder buffers at internal sampling rate
497 : *--------------------------------------------------------------------*/
498 :
499 : #ifdef FIX_2344_ALIGN_PREPROC
500 3098934 : void ivas_compute_core_buffers(
501 : #else
502 : ivas_error ivas_compute_core_buffers(
503 : #endif
504 : Encoder_State *st, /* i/o: encoder state structure */
505 : float **inp16k_out, /* o : ptr. to inp. signal in the current frame */
506 : float *old_inp_16k, /* i/o: buffer of old input signal @ 16kHz */
507 : float new_inp_resamp16k_out[], /* o : new input signal @16kHz, non pre-emphasised, used by the WB TBE/BWE */
508 : const int16_t input_frame, /* i : frame length */
509 : const int16_t last_element_mode, /* i : last element mode */
510 : const int32_t sr_core, /* i : core-coder sampling rate */
511 : float *ener, /* o : residual energy from Levinson-Durbin */
512 : float A[NB_SUBFR16k * ( M + 1 )], /* i/o: A(z) unquantized for the 4 subframes */
513 : float Aw[NB_SUBFR16k * ( M + 1 )], /* i/o: weighted A(z) unquantized for subframes */
514 : float epsP[M + 1], /* i/o: LP prediction errors */
515 : float lsp_new[M], /* i/o: LSPs at the end of the frame */
516 : float lsp_mid[M] /* i/o: LSPs in the middle of the frame */
517 : )
518 : {
519 : float *inp_16k, *new_inp_16k;
520 : float tmp, mem_decim16k_dummy[2 * L_FILT_MAX];
521 : const float *signal_in;
522 : int16_t delay, element_mode;
523 : float temp1F_icatdmResampBuf[L_FILT_MAX]; /* temp buffers for ICA TDM resamplers */
524 : float new_inp_resamp16k[L_FRAME16k];
525 : int16_t lMemRecalc, lMemRecalc_16k, L_frame_tmp, L_look;
526 : int32_t input_Fs;
527 :
528 3098934 : signal_in = st->input;
529 :
530 3098934 : input_Fs = st->input_Fs;
531 3098934 : element_mode = st->element_mode;
532 :
533 3098934 : lMemRecalc_16k = 0;
534 3098934 : lMemRecalc = 0;
535 3098934 : if ( element_mode == IVAS_CPE_TD )
536 : {
537 9182 : lMemRecalc_16k = NS2SA( INT_FS_16k, L_MEM_RECALC_NS );
538 9182 : lMemRecalc = NS2SA( input_Fs, L_MEM_RECALC_NS );
539 : }
540 :
541 : /*---------------------------------------------------------------*
542 : * Preprocessing at other sampling frequency rate (16/25.6/32kHz)
543 : *----------------------------------------------------------------*/
544 :
545 3098934 : L_frame_tmp = ( st->tcxonly == 0 ) ? L_FRAME16k : max( L_FRAME16k, st->L_frame );
546 :
547 3098934 : L_look = NS2SA( sr_core, ACELP_LOOK_NS ); /* lookahead at other sampling rate (16kHz, 25.6kHz, 32kHz) */
548 :
549 3098934 : new_inp_16k = old_inp_16k + L_INP_MEM; /* pointer to new samples of the input signal in 16kHz core */
550 3098934 : inp_16k = new_inp_16k - L_look; /* pointer to the current frame of input signal in 16kHz core */
551 :
552 : /* shift the pointer back to take care of resampler memory update */
553 3098934 : if ( element_mode == IVAS_CPE_TD || element_mode == IVAS_SCE )
554 : {
555 737671 : new_inp_16k -= NS2SA( sr_core, DELAY_FIR_RESAMPL_NS );
556 : }
557 :
558 3098934 : if ( element_mode == IVAS_CPE_DFT )
559 : {
560 134625 : mvr2r( st->old_inp_16k, old_inp_16k, L_INP_MEM - STEREO_DFT_OVL_16k );
561 : }
562 2964309 : else if ( element_mode == IVAS_CPE_TD )
563 : {
564 9182 : mvr2r( st->old_inp_16k, old_inp_16k, L_INP_MEM - L_MEM_RECALC_16K - L_FILT16k );
565 : }
566 : else
567 : {
568 2955127 : mvr2r( st->old_inp_16k, old_inp_16k, L_INP_MEM - L_FILT16k );
569 : }
570 :
571 : /*---------------------------------------------------------------*
572 : * Change the sampling frequency to 16/25.6/32 kHz
573 : *----------------------------------------------------------------*/
574 :
575 3098934 : if ( element_mode == IVAS_SCE )
576 : {
577 728489 : if ( input_Fs == sr_core )
578 : {
579 : /* no resampling needed, only delay adjustment to account for the FIR resampling delay */
580 58732 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
581 58732 : mvr2r( st->mem_decim16k + delay, new_inp_16k, delay );
582 58732 : mvr2r( signal_in, new_inp_16k + delay, input_frame );
583 58732 : mvr2r( signal_in + input_frame - 2 * delay, st->mem_decim16k, 2 * delay );
584 : }
585 669757 : else if ( input_Fs == 32000 || input_Fs == 48000 )
586 : {
587 669757 : modify_Fs( signal_in, input_frame, input_Fs, new_inp_16k, sr_core, st->mem_decim16k, 0 );
588 :
589 669757 : mvr2r( st->mem_decim16k, mem_decim16k_dummy, 2 * L_FILT_MAX );
590 669757 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
591 669757 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ), sr_core, mem_decim16k_dummy, 0 );
592 : }
593 : }
594 2370445 : else if ( element_mode == IVAS_CPE_TD )
595 : {
596 9182 : if ( input_Fs == sr_core )
597 : {
598 : /* no resampling needed, only delay adjustment to account for the FIR resampling delay */
599 1554 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
600 1554 : mvr2r( st->mem_decim16k + delay, new_inp_16k - lMemRecalc - delay + L_FILT16k, delay );
601 1554 : mvr2r( signal_in - lMemRecalc, new_inp_16k - lMemRecalc + L_FILT16k, input_frame + lMemRecalc );
602 1554 : mvr2r( signal_in + input_frame - lMemRecalc - 2 * delay, st->mem_decim16k, 2 * delay );
603 : }
604 7628 : else if ( input_Fs == 32000 || input_Fs == 48000 )
605 : {
606 : /* reconstruct past segment of input signal when switching from MDCT stereo */
607 7628 : if ( last_element_mode == IVAS_CPE_MDCT /*|| st->idchan == 1*/ )
608 : {
609 52 : int16_t length_inp = NS2SA( input_Fs, L_MEM_RECALC_SCH_NS - DELAY_FIR_RESAMPL_NS );
610 52 : int16_t length_16k = NS2SA( INT_FS_16k, L_MEM_RECALC_SCH_NS - DELAY_FIR_RESAMPL_NS );
611 :
612 52 : mvr2r( signal_in - lMemRecalc - length_inp - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim16k, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
613 52 : modify_Fs( signal_in - lMemRecalc - length_inp, length_inp, input_Fs, new_inp_16k - lMemRecalc_16k - length_16k, sr_core, st->mem_decim16k, 0 );
614 : }
615 :
616 : #ifdef FIX_2344_ALIGN_PREPROC
617 7628 : modify_Fs( signal_in - lMemRecalc, input_frame, input_Fs, new_inp_16k - lMemRecalc_16k, sr_core, st->mem_decim16k, 0 );
618 : #else
619 : modify_Fs( signal_in - lMemRecalc, input_frame, input_Fs, new_inp_16k - ( lMemRecalc * sr_core ) / st->input_Fs, sr_core, st->mem_decim16k, 0 );
620 : #endif
621 7628 : mvr2r( st->mem_decim16k, mem_decim16k_dummy, 2 * L_FILT_MAX );
622 :
623 7628 : if ( lMemRecalc > 0 )
624 : {
625 : #ifdef FIX_2344_ALIGN_PREPROC
626 7628 : modify_Fs( signal_in - lMemRecalc + input_frame, lMemRecalc, input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ) - lMemRecalc_16k, sr_core, mem_decim16k_dummy, 0 );
627 : #else
628 : modify_Fs( signal_in - lMemRecalc + input_frame, lMemRecalc, input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ) - ( lMemRecalc * sr_core ) / st->input_Fs, sr_core, mem_decim16k_dummy, 0 );
629 : #endif
630 : }
631 :
632 7628 : set_f( temp1F_icatdmResampBuf, 0, L_FILT_MAX );
633 7628 : modify_Fs( temp1F_icatdmResampBuf, NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), input_Fs, new_inp_16k + NS2SA( sr_core, FRAME_SIZE_NS ), sr_core, mem_decim16k_dummy, 0 );
634 : }
635 : #ifndef FIX_2344_ALIGN_PREPROC
636 : #ifdef DEBUGGING
637 : else
638 : {
639 : return ( IVAS_ERROR( IVAS_ERR_INTERNAL_FATAL, "Wrong internal sampling rate. Exiting..." ) );
640 : }
641 : #endif
642 : #endif
643 : }
644 2361263 : else if ( st->idchan == 0 )
645 : {
646 : /* update the FIR resampling filter memory, needed for switching to time-domain (FIR) resampling */
647 1247944 : mvr2r( signal_in + input_frame - NS2SA( input_Fs, L_MEM_RECALC_NS ) - 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ), st->mem_decim16k, 2 * NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS ) );
648 : }
649 :
650 : /*------------------------------------------------*
651 : * Update BWE memories *
652 : *------------------------------------------------*/
653 :
654 3098934 : if ( sr_core == INT_FS_16k && element_mode != IVAS_CPE_MDCT )
655 : {
656 664697 : delay = NS2SA( INT_FS_16k, DELAY_FD_BWE_ENC_12k8_NS );
657 :
658 664697 : if ( element_mode == IVAS_CPE_DFT )
659 : {
660 : /* save input resampled at 16kHz, non-preemphasised.*/
661 134625 : mvr2r( new_inp_16k, new_inp_resamp16k, L_FRAME16k );
662 :
663 134625 : if ( st->bwidth == WB )
664 : {
665 16443 : mvr2r( new_inp_16k - delay, st->hBWE_FD->old_input_wb, delay );
666 16443 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k, st->hBWE_FD->old_wtda_swb + L_FRAME16k - STEREO_DFT_OVL_16k + delay, STEREO_DFT_OVL_16k - delay );
667 : }
668 : }
669 530072 : else if ( element_mode == IVAS_CPE_TD )
670 : {
671 : /* save input resampled at 16kHz, non-preemphasised */
672 9182 : mvr2r( new_inp_16k + L_FILT16k, new_inp_resamp16k, L_FRAME16k );
673 :
674 9182 : if ( st->bwidth == WB && st->hBWE_FD != NULL )
675 : {
676 1645 : mvr2r( new_inp_16k + L_FILT16k - delay, st->hBWE_FD->old_input_wb, delay );
677 1645 : mvr2r( new_inp_16k - L_MEM_RECALC_16K, st->hBWE_FD->old_wtda_swb + L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k + delay, L_MEM_RECALC_16K + L_FILT16k - delay );
678 : }
679 : }
680 520890 : else if ( element_mode == IVAS_SCE )
681 : {
682 : /* save input resampled at 16kHz, non-preemphasised */
683 520890 : mvr2r( new_inp_16k + L_FILT16k, new_inp_resamp16k, L_FRAME16k );
684 :
685 520890 : if ( st->bwidth == WB )
686 : {
687 73041 : mvr2r( new_inp_16k, st->hBWE_FD->old_input_wb + delay - L_FILT16k, L_FILT16k );
688 : /* all buffer st->hBWE_FD->old_wtda_swb is correct and does not need to be updated */
689 : }
690 : }
691 : }
692 2434237 : else if ( sr_core > INT_FS_16k )
693 : {
694 : /* reset the buffer, the signal is needed for WB BWEs */
695 2262491 : set_f( new_inp_resamp16k, 0.0f, L_FRAME16k );
696 : }
697 :
698 : /*------------------------------------------------------------------*
699 : * Perform fixed preemphasis (16kHz signal) through 1 - g*z^-1
700 : *-----------------------------------------------------------------*/
701 :
702 3098934 : if ( st->tcxonly == 0 && !( ( element_mode == IVAS_CPE_TD && st->idchan == 1 ) || element_mode == IVAS_CPE_MDCT ) )
703 : {
704 635092 : st->mem_preemph_enc = new_inp_16k[L_frame_tmp - 1];
705 : }
706 :
707 : #ifdef FIX_2344_ALIGN_PREPROC
708 3098934 : if ( sr_core == INT_FS_16k && element_mode != IVAS_CPE_MDCT )
709 : #else
710 : if ( input_Fs > 8000 && sr_core == INT_FS_16k && element_mode != IVAS_CPE_MDCT )
711 : #endif
712 : {
713 664697 : if ( element_mode == IVAS_CPE_DFT )
714 : {
715 134625 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k + L_FRAME16k, st->inp_16k_mem_stereo_sw, STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ); /* update for TD/DFT stereo switching */
716 :
717 134625 : st->mem_preemph16k = st->mem_preemph16k_DFT;
718 134625 : st->mem_preemph16k_DFT = old_inp_16k[L_INP_MEM - STEREO_DFT_OVL_16k + L_FRAME16k - 1];
719 :
720 134625 : if ( st->L_frame == L_FRAME16k )
721 : {
722 60335 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k, st->buf_speech_enc + L_FRAME16k - STEREO_DFT_OVL_16k, L_FRAME16k + STEREO_DFT_OVL_16k );
723 : }
724 134625 : preemph( new_inp_16k - STEREO_DFT_OVL_16k, PREEMPH_FAC_16k, L_FRAME16k, &( st->mem_preemph16k ) );
725 134625 : tmp = st->mem_preemph16k;
726 134625 : preemph( new_inp_16k - STEREO_DFT_OVL_16k + L_FRAME16k, PREEMPH_FAC_16k, STEREO_DFT_OVL_16k, &tmp );
727 : }
728 530072 : else if ( st->element_mode == IVAS_CPE_TD )
729 : {
730 9182 : if ( last_element_mode == IVAS_CPE_DFT )
731 : {
732 194 : st->mem_preemph16k = st->mem_preemph16k_DFT;
733 194 : mvr2r( st->inp_16k_mem_stereo_sw, new_inp_16k - L_MEM_RECALC_16K - ( STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ), STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k );
734 194 : preemph( new_inp_16k - L_MEM_RECALC_16K - ( STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k ), PREEMPH_FAC_16k, STEREO_DFT_OVL_16k - L_MEM_RECALC_16K - L_FILT16k, &st->mem_preemph16k );
735 : }
736 :
737 9182 : st->mem_preemph16k_DFT = old_inp_16k[L_INP_MEM - STEREO_DFT_OVL_16k + L_FRAME16k - 1];
738 :
739 : /* preemphasise past segment of input signal when switching from MDCT stereo */
740 9182 : if ( last_element_mode == IVAS_CPE_MDCT )
741 : {
742 52 : int16_t length_16k = NS2SA( INT_FS_16k, L_MEM_RECALC_SCH_NS - DELAY_FIR_RESAMPL_NS );
743 52 : preemph( new_inp_16k - lMemRecalc_16k - length_16k, PREEMPH_FAC, length_16k, &st->mem_preemph16k );
744 : }
745 :
746 9182 : if ( st->L_frame == L_FRAME16k )
747 : {
748 2794 : mvr2r( new_inp_16k - lMemRecalc_16k, st->buf_speech_enc + L_FRAME16k - lMemRecalc_16k - L_FILT16k, L_FRAME16k + lMemRecalc_16k + L_FILT16k );
749 : }
750 9182 : preemph( new_inp_16k - lMemRecalc_16k, PREEMPH_FAC_16k, L_FRAME16k, &( st->mem_preemph16k ) );
751 9182 : tmp = st->mem_preemph16k;
752 9182 : preemph( new_inp_16k - lMemRecalc_16k + L_FRAME16k, PREEMPH_FAC_16k, lMemRecalc_16k + L_FILT16k, &tmp );
753 : }
754 520890 : else if ( element_mode == IVAS_SCE )
755 : {
756 520890 : preemph( new_inp_16k, PREEMPH_FAC_16k, L_FRAME16k, &( st->mem_preemph16k ) );
757 520890 : tmp = st->mem_preemph16k;
758 520890 : preemph( new_inp_16k + L_FRAME16k, PREEMPH_FAC_16k, L_FILT16k, &tmp );
759 : }
760 : }
761 : #ifdef FIX_2344_ALIGN_PREPROC
762 : else /* keep memory up-to-date in case of bitrate switching */
763 : #else
764 : else if ( input_Fs > 8000 ) /* keep memory up-to-date in case of bitrate switching */
765 : #endif
766 : {
767 : #ifndef FIX_2344_ALIGN_PREPROC
768 : if ( element_mode == IVAS_CPE_DFT )
769 : {
770 : st->mem_preemph16k = new_inp_16k[L_frame_tmp - STEREO_DFT_OVL_16k - 1];
771 : }
772 : else if ( element_mode == IVAS_CPE_TD )
773 : {
774 : st->mem_preemph16k = new_inp_16k[L_frame_tmp - lMemRecalc_16k - 1];
775 : }
776 : else
777 : #endif
778 2434237 : if ( element_mode == IVAS_CPE_MDCT )
779 : {
780 2226638 : st->mem_preemph16k = 0;
781 : }
782 : else /* SCE */
783 : {
784 207599 : st->mem_preemph16k = new_inp_16k[L_frame_tmp - 1];
785 : }
786 2434237 : st->mem_preemph16k_DFT = st->mem_preemph16k;
787 : }
788 :
789 : /*-----------------------------------------------------------------*
790 : * LP analysis at 16kHz if ACELP@16k core was selected
791 : * update buffers
792 : *-----------------------------------------------------------------*/
793 :
794 3098934 : if ( st->L_frame == L_FRAME16k && element_mode != IVAS_CPE_MDCT )
795 : {
796 : /* update signal buffers */
797 380424 : if ( element_mode == IVAS_CPE_DFT )
798 : {
799 60335 : mvr2r( new_inp_16k - STEREO_DFT_OVL_16k, st->buf_speech_enc_pe + L_FRAME16k - STEREO_DFT_OVL_16k, L_FRAME16k + STEREO_DFT_OVL_16k );
800 : }
801 320089 : else if ( element_mode == IVAS_CPE_TD )
802 : {
803 2794 : mvr2r( new_inp_16k - lMemRecalc_16k, st->buf_speech_enc_pe + L_FRAME16k - lMemRecalc_16k - L_FILT16k, L_FRAME16k + lMemRecalc_16k + L_FILT16k );
804 : }
805 : else
806 : {
807 317295 : mvr2r( new_inp_resamp16k, st->buf_speech_enc + L_FRAME16k, L_FRAME16k );
808 317295 : mvr2r( new_inp_16k, st->buf_speech_enc_pe + L_FRAME16k, L_FRAME16k );
809 : }
810 :
811 : /*--------------------------------------------------------------*
812 : * LPC analysis
813 : *---------------------------------------------------------------*/
814 :
815 380424 : if ( st->last_L_frame == L_FRAME )
816 : {
817 : /* this is just an approximation, but it is sufficient */
818 2823 : mvr2r( st->lsp_old1, st->lspold_enc, M );
819 : }
820 :
821 380424 : analy_lp( inp_16k, L_FRAME16k, L_look, ener, A, epsP, lsp_new, lsp_mid, st->lspold_enc, st->pitch, st->voicing, INT_FS_16k, 0 );
822 :
823 : /*--------------------------------------------------------------*
824 : * Compute Weighted Input
825 : *---------------------------------------------------------------*/
826 :
827 380424 : find_wsp( L_FRAME16k, L_SUBFR, NB_SUBFR16k, A, Aw, st->speech_enc_pe, PREEMPH_FAC_16k, st->wspeech_enc, &st->mem_wsp_enc, st->gamma, L_LOOK_16k );
828 : }
829 :
830 : /*-----------------------------------------------------------------*
831 : * Updates
832 : *-----------------------------------------------------------------*/
833 :
834 : /* update old input signal @16kHz buffer */
835 3098934 : if ( ( element_mode == IVAS_CPE_TD && st->idchan == 1 ) || element_mode == IVAS_CPE_MDCT )
836 : {
837 2231229 : set_f( st->old_inp_16k, 0, L_INP_MEM );
838 : }
839 : #ifdef FIX_2344_ALIGN_PREPROC
840 867705 : else if ( sr_core == INT_FS_16k )
841 : #else
842 : else if ( input_Fs > 8000 && sr_core == INT_FS_16k )
843 : #endif
844 : {
845 660106 : mvr2r( &old_inp_16k[L_frame_tmp], st->old_inp_16k, L_INP_MEM );
846 : }
847 : #ifdef FIX_2344_ALIGN_PREPROC
848 : else
849 : #else
850 : else if ( input_Fs > 8000 )
851 : #endif
852 : {
853 207599 : lerp( st->old_inp_12k8 + L_INP_MEM - L_INP_MEM * 4 / 5, st->old_inp_16k, L_INP_MEM, L_INP_MEM * 4 / 5 );
854 : }
855 :
856 3098934 : if ( inp16k_out != NULL )
857 : {
858 3067420 : *inp16k_out = inp_16k;
859 : }
860 :
861 3098934 : if ( new_inp_resamp16k_out != NULL )
862 : {
863 3067420 : mvr2r( new_inp_resamp16k, new_inp_resamp16k_out, L_FRAME16k );
864 : }
865 :
866 : #ifdef FIX_2344_ALIGN_PREPROC
867 3098934 : return;
868 : #else
869 : return IVAS_ERR_OK;
870 : #endif
871 : }
|