Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "rom_enc.h"
45 : #include "rom_com.h"
46 : #include "prot.h"
47 : #include "wmc_auto.h"
48 :
49 : /*---------------------------------------------------------------------*
50 : * Local function prototypes
51 : *---------------------------------------------------------------------*/
52 :
53 : static void encod_gen_voic_core_switch( Encoder_State *st, const int16_t L_frame, const float inp[], const float Aq[], const float A[], const int16_t T_op[], const float voicing[], float *exc, const int32_t core_bitrate );
54 :
55 : static void bwe_switch_enc( Encoder_State *st, const float *old_input );
56 :
57 :
58 : /*-------------------------------------------------------------------*
59 : * acelp_core_switch_enc()
60 : *
61 : * ACELP core encoder in the ACELP->HQ switching frame
62 : *--------------------------------------------------------------------*/
63 :
64 25 : void acelp_core_switch_enc(
65 : Encoder_State *st, /* i/o: encoder state structure */
66 : const float inp12k8[], /* i : input signal @12.8 kHz */
67 : const float inp16k[], /* i : input signal @16 kHz */
68 : const float A[NB_SUBFR16k * ( M + 1 )] /* i : A(z) unquantized for the 4 subframes*/
69 : )
70 : {
71 : int16_t i, j, T_op[2];
72 : float old_exc[L_EXC], *exc; /* excitation signal buffer */
73 : const float *inp;
74 : int32_t cbrate;
75 : float Aq[2 * ( M + 1 )];
76 : uint16_t value;
77 : int16_t nb_bits;
78 25 : BSTR_ENC_HANDLE hBstr = st->hBstr;
79 :
80 : /* initializations */
81 25 : exc = old_exc + L_EXC_MEM; /* pointer to excitation signal in the current frame */
82 25 : mvr2r( st->hLPDmem->old_exc, old_exc, L_EXC_MEM );
83 :
84 25 : mvr2r( st->old_Aq_12_8, Aq, M + 1 );
85 25 : mvr2r( st->old_Aq_12_8, Aq + ( M + 1 ), M + 1 );
86 :
87 25 : T_op[0] = st->pitch[0];
88 25 : T_op[1] = st->pitch[1];
89 :
90 :
91 : /*----------------------------------------------------------------*
92 : * set switching frame bitrate
93 : *----------------------------------------------------------------*/
94 :
95 25 : if ( st->last_L_frame == L_FRAME ) /* ACELP@12k8 core */
96 : {
97 5 : inp = inp12k8;
98 :
99 5 : if ( st->core_brate > ACELP_24k40 )
100 : {
101 0 : cbrate = ACELP_24k40;
102 : }
103 : else
104 : {
105 5 : cbrate = st->core_brate;
106 : }
107 : }
108 : else /* ACELP@16k core */
109 : {
110 20 : inp = inp16k;
111 :
112 20 : if ( st->core_brate <= ACELP_8k00 )
113 : {
114 0 : cbrate = ACELP_8k00;
115 : }
116 20 : else if ( st->core_brate <= ACELP_14k80 )
117 : {
118 0 : cbrate = ACELP_14k80;
119 : }
120 : else
121 : {
122 20 : cbrate = min( st->core_brate, ACELP_22k60 );
123 : }
124 : }
125 :
126 : #ifdef FIX_I4_OL_PITCH
127 : if ( st->last_codec_mode == MODE1 )
128 : {
129 : /* in MODE1 T_op is at 12.8 kHz */
130 : if ( st->last_L_frame != L_FRAME ) /* ACELP@16k core -> convert T_op to 16 kHz */
131 : {
132 : T_op[0] = (short) ( 1.25f * T_op[0] + 0.5f );
133 : T_op[1] = (short) ( 1.25f * T_op[1] + 0.5f );
134 : }
135 : }
136 : else
137 : {
138 : /* in MODE2 T_op is at 16 kHz */
139 : if ( st->last_L_frame == L_FRAME ) /* ACELP@12.8k core -> convert T_op to 12.8 kHz */
140 : {
141 : T_op[0] = (short) ( 0.8f * T_op[0] + 0.5f );
142 : T_op[1] = (short) ( 0.8f * T_op[1] + 0.5f );
143 : }
144 : }
145 : #else
146 25 : if ( st->last_L_frame != L_FRAME ) /* ACELP@16k core */
147 : {
148 20 : T_op[0] = (short) ( 1.25f * T_op[0] + 0.5f );
149 20 : T_op[1] = (short) ( 1.25f * T_op[1] + 0.5f );
150 : }
151 : #endif
152 : /*----------------------------------------------------------------*
153 : * Excitation encoding
154 : *----------------------------------------------------------------*/
155 :
156 25 : config_acelp1( ENC, st->total_brate, cbrate, st->core, -1, -1, st->last_L_frame, -1, &( st->acelp_cfg ), hBstr->nb_bits_tot, GENERIC, st->inactive_coder_type_flag, -1, -1, &j, &i, st->element_mode, &i /*dummy*/, 0 /*tdm_lp_reuse_flag*/, 0 /*tdm_low_rate_mode*/, st->idchan, st->active_cnt, 0 /*tdm_Pitch_reuse_flag*/, 0, 0 /*GSC_IVAS_mode*/ );
157 :
158 25 : encod_gen_voic_core_switch( st, st->last_L_frame, inp, Aq, A, T_op, st->voicing, exc, cbrate );
159 :
160 : /*----------------------------------------------------------------*
161 : * Manipulate ACELP subframe indices (move them to their proper place)
162 : *----------------------------------------------------------------*/
163 :
164 25 : i = find_indice( hBstr, TAG_ACELP_SUBFR_LOOP_START, &value, &nb_bits );
165 : #ifdef DEBUGGING
166 : assert( i >= 0 && "Internal error in ACELP core switching - unable to find ACELP subframe indices!" );
167 : #endif
168 195 : while ( hBstr->ind_list[i].id == TAG_ACELP_SUBFR_LOOP_START )
169 : {
170 170 : push_indice( hBstr, IND_CORE_SWITCHING_CELP_SUBFRAME, hBstr->ind_list[i].value, hBstr->ind_list[i].nb_bits );
171 170 : i++;
172 : }
173 25 : delete_indice( hBstr, TAG_ACELP_SUBFR_LOOP_START );
174 :
175 : /*----------------------------------------------------------------*
176 : * BWE encoding
177 : *----------------------------------------------------------------*/
178 :
179 25 : if ( !( ( st->last_L_frame == L_FRAME16k && inner_frame_tbl[st->bwidth] == L_FRAME16k ) || inner_frame_tbl[st->bwidth] == L_FRAME8k ) )
180 : {
181 25 : bwe_switch_enc( st, (const float *) st->old_input_signal );
182 : }
183 :
184 25 : return;
185 : }
186 :
187 :
188 : /*-------------------------------------------------------------------*
189 : * encod_gen_voic_core_switch()
190 : *
191 : * Encode excitation signal in ACELP->HQ switching frame
192 : *-------------------------------------------------------------------*/
193 :
194 25 : static void encod_gen_voic_core_switch(
195 : Encoder_State *st, /* i/o: state structure */
196 : const int16_t L_frame, /* i : length of the frame */
197 : const float inp[], /* i : input signal */
198 : const float Aq[], /* i : LP coefficients */
199 : const float A[], /* i : unquantized A(z) filter */
200 : const int16_t T_op[], /* i : open loop pitch */
201 : const float voicing[], /* i : voicing */
202 : float *exc, /* i/o: current non-enhanced excitation */
203 : const int32_t core_bitrate /* i : switching frame bitrate */
204 : )
205 : {
206 : float res[L_SUBFR]; /* residual signal */
207 : float Ap[M + 1]; /* A(z) with spectral expansion */
208 : float xn[L_SUBFR]; /* Target vector for pitch search */
209 : float xn2[L_SUBFR]; /* Target vector for codebook search */
210 : float cn[L_SUBFR]; /* Target vector in residual domain */
211 : float h1[L_SUBFR + ( M + 1 )]; /* Impulse response vector */
212 : float code[L_SUBFR]; /* Fixed codebook excitation */
213 : float y1[L_SUBFR]; /* Filtered adaptive excitation */
214 : float y2[L_SUBFR]; /* Filtered algebraic excitation */
215 : float gain_pit; /* Pitch gain */
216 : float voice_fac; /* Voicing factor */
217 : float gain_code; /* Gain of code */
218 : float gain_inov; /* inovation gain */
219 : int16_t i; /* tmp variables */
220 : int16_t T0, T0_frac; /* close loop integer pitch and fractional part */
221 : int16_t T0_min, T0_max; /* pitch variables */
222 : float pitch; /* floating pitch value */
223 : float g_corr[6]; /* ACELP correl, values + gain pitch */
224 : int16_t clip_gain; /* ISF clip gain */
225 : int16_t unbits; /* number of unused bits for EVS_PI */
226 : float norm_gain_code;
227 : int16_t pitch_limit_flag;
228 : float tmpF, dummyF[NB_SUBFR16k];
229 : int16_t lp_select, lp_flag;
230 :
231 25 : LPD_state_HANDLE hLPDmem = st->hLPDmem;
232 :
233 25 : BSTR_ENC_HANDLE hBstr = st->hBstr;
234 :
235 : /*------------------------------------------------------------------*
236 : * Initializations
237 : *------------------------------------------------------------------*/
238 :
239 25 : unbits = 0;
240 :
241 25 : if ( L_frame == L_FRAME )
242 : {
243 5 : T0_max = PIT_MAX;
244 5 : T0_min = PIT_MIN;
245 : }
246 : else /* L_frame == L_FRAME16k */
247 : {
248 20 : T0_max = PIT16k_MAX;
249 20 : T0_min = PIT16k_MIN;
250 : }
251 :
252 : /*------------------------------------------------------------------*
253 : * Calculation of LP residual (filtering through A[z] filter)
254 : *------------------------------------------------------------------*/
255 :
256 25 : residu( Aq, M, inp, res, L_SUBFR );
257 :
258 : /*------------------------------------------------------------------*
259 : * ACELP subframe loop
260 : *------------------------------------------------------------------*/
261 :
262 25 : mvr2r( res, exc, L_SUBFR );
263 :
264 25 : if ( L_frame == L_FRAME16k )
265 : {
266 20 : weight_a( A, Ap, GAMMA16k, M ); /* Bandwidth expansion of A(z) filter coefficients */
267 20 : find_targets( inp, hLPDmem->mem_syn, 0, &( hLPDmem->mem_w0 ), Aq, res, L_SUBFR, Ap, PREEMPH_FAC_16k, xn, cn, h1 );
268 : }
269 : else
270 : {
271 5 : weight_a( A, Ap, GAMMA1, M ); /* Bandwidth expansion of A(z) filter coefficients */
272 5 : find_targets( inp, hLPDmem->mem_syn, 0, &( hLPDmem->mem_w0 ), Aq, res, L_SUBFR, Ap, PREEMPH_FAC, xn, cn, h1 );
273 : }
274 :
275 : /*----------------------------------------------------------------*
276 : * Close-loop pitch search and quantization
277 : * Adaptive exc. construction
278 : *----------------------------------------------------------------*/
279 :
280 25 : set_f( dummyF, -1, NB_SUBFR16k ); /* hack to signal ACELP->HQ switching frame */
281 25 : pitch = pit_encode( hBstr, st->acelp_cfg.pitch_bits, core_bitrate, 0, L_frame, GENERIC, &pitch_limit_flag, 0, exc, L_SUBFR, T_op, &T0_min, &T0_max, &T0, &T0_frac, h1, xn, 0 /*hStereoTD->tdm_Pitch_reuse_flag*/, dummyF /*hStereoTD->tdm_Pri_pitch_buf*/ );
282 :
283 : /*-----------------------------------------------------------------*
284 : * Find adaptive exitation
285 : *-----------------------------------------------------------------*/
286 :
287 25 : pred_lt4( exc, exc, T0, T0_frac, L_SUBFR + 1, inter4_2, L_INTERPOL2, PIT_UP_SAMP );
288 :
289 : /*-----------------------------------------------------------------*
290 : * Gain clipping test to avoid unstable synthesis on frame erasure
291 : * or in case of floating point encoder & fixed p. decoder
292 : *-----------------------------------------------------------------*/
293 :
294 25 : clip_gain = gp_clip( st->element_mode, core_bitrate, voicing, 0, GENERIC, xn, st->clip_var );
295 :
296 : /*-----------------------------------------------------------------*
297 : * LP filtering of the adaptive excitation, codebook target computation
298 : *-----------------------------------------------------------------*/
299 :
300 25 : lp_flag = st->acelp_cfg.ltf_mode;
301 :
302 25 : lp_select = lp_filt_exc_enc( MODE1, GENERIC, 0, exc, h1, xn, y1, xn2, L_SUBFR, L_frame, g_corr, clip_gain, &gain_pit, &lp_flag );
303 :
304 25 : if ( lp_flag == NORMAL_OPERATION )
305 : {
306 25 : push_indice( hBstr, IND_LP_FILT_SELECT, lp_select, 1 );
307 : }
308 :
309 : /*-----------------------------------------------------------------*
310 : * Innovation encoding
311 : *-----------------------------------------------------------------*/
312 :
313 25 : inov_encode( st, core_bitrate, 0, L_frame, L_frame, GENERIC, st->bwidth, 0, 0, -1, Aq, gain_pit, cn, exc, h1, hLPDmem->tilt_code, pitch, xn2, code, y2, &unbits, L_SUBFR );
314 :
315 : /*-----------------------------------------------------------------*
316 : * Gain encoding
317 : *-----------------------------------------------------------------*/
318 :
319 25 : if ( L_frame == L_FRAME )
320 : {
321 5 : gain_enc_mless( hBstr, st->acelp_cfg.gains_mode, st->element_mode, L_frame, 0, -1, xn, y1, y2, code, st->old_Es_pred, &gain_pit, &gain_code, &gain_inov, &norm_gain_code, g_corr, clip_gain );
322 : }
323 : else
324 : {
325 20 : gain_enc_mless( hBstr, st->acelp_cfg.gains_mode, st->element_mode, L_frame, 0, -1, xn, y1, y2, code, st->old_Es_pred, &gain_pit, &gain_code, &gain_inov, &norm_gain_code, g_corr, clip_gain );
326 : }
327 :
328 25 : gp_clip_test_gain_pit( st->element_mode, core_bitrate, gain_pit, st->clip_var );
329 :
330 25 : hLPDmem->tilt_code = est_tilt( exc, gain_pit, code, gain_code, &voice_fac, L_SUBFR, 0 );
331 :
332 : /*-----------------------------------------------------------------*
333 : * Construct adaptive part of the excitation
334 : *-----------------------------------------------------------------*/
335 :
336 1625 : for ( i = 0; i < L_SUBFR; i++ )
337 : {
338 1600 : tmpF = gain_pit * exc[i];
339 1600 : exc[i] = tmpF + gain_code * code[i];
340 : }
341 :
342 : /* write reserved bits */
343 25 : if ( unbits )
344 : {
345 0 : push_indice( hBstr, IND_UNUSED, 0, unbits );
346 : }
347 :
348 : /*-----------------------------------------------------------------*
349 : * long term prediction on the 2nd sub frame
350 : *-----------------------------------------------------------------*/
351 :
352 25 : pred_lt4( &exc[L_SUBFR], &exc[L_SUBFR], T0, T0_frac, L_SUBFR + 1, inter4_2, L_INTERPOL2, PIT_UP_SAMP );
353 :
354 1625 : for ( i = 0; i < L_SUBFR; i++ )
355 : {
356 1600 : exc[i + L_SUBFR] *= gain_pit;
357 : }
358 :
359 25 : return;
360 : }
361 :
362 :
363 : /*-------------------------------------------------------------------*
364 : * bwe_switch_enc()
365 : *
366 : * Encode BWE in ACELP->HQ switching frame
367 : *-------------------------------------------------------------------*/
368 :
369 25 : static void bwe_switch_enc(
370 : Encoder_State *st, /* i/o: encoder state structure */
371 : const float *new_speech /* i : original input signal */
372 : )
373 : {
374 : int16_t i, k, delta, Loverlapp, d1, d1m, maxd1, ind1, fdelay, gapsize;
375 : float accA, accB, min_sq_cross, min_corr, E1, E2, gain;
376 : float tmp_mem[2 * L_FILT48k], tmp_mem2[2 * L_FILT48k], hb_synth_tmp[NS2SA( 48000, 10000000L )];
377 : const float *hp_filter;
378 : float synth_subfr_bwe[SWITCH_MAX_GAP]; /* synthesized bwe for core switching */
379 : int16_t n, L, input_frame;
380 :
381 25 : input_frame = (int16_t) ( st->input_Fs / FRAMES_PER_SEC );
382 :
383 25 : L = NS2SA( st->input_Fs, FRAME_SIZE_NS );
384 25 : n = (int16_t) ( (float) L * N_ZERO_MDCT_NS / FRAME_SIZE_NS );
385 :
386 : /* set multiplication factor according to the sampling rate */
387 25 : hp_filter = hp16000_48000;
388 25 : fdelay = 48;
389 25 : if ( st->input_Fs == 16000 )
390 : {
391 0 : delta = 1;
392 0 : if ( st->last_L_frame == L_FRAME )
393 : {
394 0 : hp_filter = hp12800_16000;
395 0 : fdelay = 20;
396 : }
397 : }
398 25 : else if ( st->input_Fs == 32000 )
399 : {
400 5 : delta = 2;
401 5 : if ( st->last_L_frame == L_FRAME )
402 : {
403 5 : hp_filter = hp12800_32000;
404 5 : fdelay = 40;
405 : }
406 : else
407 : {
408 0 : hp_filter = hp16000_32000;
409 0 : fdelay = 32;
410 : }
411 : }
412 : else
413 : {
414 20 : delta = 3;
415 20 : if ( st->last_L_frame == L_FRAME )
416 : {
417 0 : hp_filter = hp12800_48000;
418 0 : fdelay = 60;
419 : }
420 : }
421 :
422 25 : set_f( tmp_mem, 0, 2 * L_FILT48k );
423 25 : set_f( tmp_mem2, 0, 2 * L_FILT48k );
424 :
425 25 : Loverlapp = delta * SWITCH_OVERLAP_8k * 2;
426 25 : gapsize = delta * ( NS2SA( 16000, SWITCH_GAP_LENGTH_NS ) );
427 25 : set_f( synth_subfr_bwe, 0, SWITCH_MAX_GAP );
428 :
429 8185 : for ( i = 0; i < gapsize + fdelay; i++ )
430 : {
431 : /* target */
432 8160 : synth_subfr_bwe[i] = new_speech[i + L / 2 + n + Loverlapp - gapsize];
433 : }
434 :
435 1185 : for ( i = 0; i < fdelay; i++ )
436 : {
437 : /* put the 40 past samples into the memory */
438 1160 : tmp_mem[i] = new_speech[i + L / 2 + n + Loverlapp - gapsize - fdelay];
439 : }
440 :
441 : /* HP filtered target */
442 25 : fir( synth_subfr_bwe, hp_filter, synth_subfr_bwe, tmp_mem, gapsize + fdelay, fdelay, 0 );
443 25 : mvr2r( synth_subfr_bwe + (int16_t) ( fdelay / 2 ), synth_subfr_bwe, delta * ( NS2SA( 16000, SWITCH_GAP_LENGTH_NS ) ) - (int16_t) ( fdelay / 2 ) );
444 :
445 : /* codebook */
446 25 : fir( new_speech, hp_filter, hb_synth_tmp, tmp_mem2, input_frame >> 1, fdelay, 1 );
447 :
448 25 : min_sq_cross = -1;
449 25 : min_corr = 0;
450 25 : d1m = 0;
451 :
452 25 : maxd1 = (int16_t) ( ( ( input_frame >> 1 ) - gapsize - fdelay ) / delta );
453 :
454 : /* find delay */
455 1105 : for ( k = 0, d1 = 0; k < maxd1; d1 += delta, k++ )
456 : {
457 1080 : accA = accB = 0;
458 109080 : for ( i = 0; i < gapsize; i += delta )
459 : {
460 108000 : accA += hb_synth_tmp[d1 + i + fdelay] * hb_synth_tmp[d1 + i + fdelay];
461 108000 : accB += hb_synth_tmp[d1 + i + fdelay] * synth_subfr_bwe[i];
462 : }
463 1080 : if ( accB * accB * min_corr >= min_sq_cross * accA )
464 : {
465 135 : d1m = k;
466 135 : min_corr = accA;
467 135 : min_sq_cross = accB * accB;
468 : }
469 : }
470 :
471 25 : push_indice( st->hBstr, IND_CORE_SWITCHING_AUDIO_DELAY, d1m, AUDIODELAYBITS );
472 :
473 : /* find gain */
474 25 : E1 = 0.0f;
475 25 : E2 = 1.0f; /* to avoid /0 */
476 :
477 7025 : for ( i = 0; i < gapsize; i++ )
478 : {
479 7000 : E1 += synth_subfr_bwe[i] * synth_subfr_bwe[i];
480 7000 : E2 += hb_synth_tmp[i + d1m * delta + fdelay] * hb_synth_tmp[i + d1m * delta + fdelay];
481 : }
482 :
483 25 : gain = (float) sqrt( (float) ( E1 / E2 ) );
484 :
485 25 : ind1 = usquant( gain, &gain, MINVALUEOFFIRSTGAIN, DELTAOFFIRSTGAIN, ( 1 << NOOFGAINBITS1 ) );
486 25 : push_indice( st->hBstr, IND_CORE_SWITCHING_AUDIO_GAIN, ind1, NOOFGAINBITS1 );
487 :
488 25 : return;
489 : }
|