Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include <math.h>
40 : #include "cnst.h"
41 : #include "prot.h"
42 : #include "rom_com.h"
43 : #include "wmc_auto.h"
44 :
45 :
46 : /*-------------------------------------------------------------------*
47 : * DTFS_getSpEngyFromResAmp()
48 : *
49 : * Get speech energy from the DTFS
50 : *-------------------------------------------------------------------*/
51 :
52 1268 : static float DTFS_getSpEngyFromResAmp(
53 : DTFS_STRUCTURE X, /* i : DTFS */
54 : float lband, /* i : Low band end to get energy from */
55 : float hband, /* i : High band end to get energy from */
56 : const float *curr_lsp /* i : LPCs */
57 : )
58 : {
59 : int16_t i, k;
60 : float w, tmp, Re, Im;
61 : double en, freq, fdiff;
62 :
63 1268 : fdiff = (float) INT_FS_12k8 / X.lag;
64 :
65 1268 : if ( hband == X.upper_cut_off_freq )
66 : {
67 634 : hband = 4001.0;
68 : }
69 :
70 1268 : en = 0.0f;
71 25504 : for ( freq = 0.0, k = 0; k <= min( X.lag >> 1, X.nH_4kHz ); k++, freq += fdiff )
72 : {
73 24236 : if ( X.a[k] < 0.0 )
74 : {
75 0 : X.a[k] = 0.0;
76 : }
77 :
78 24236 : if ( freq < hband && freq >= lband )
79 : {
80 12118 : Re = 1.0f;
81 12118 : Im = 0.0f;
82 12118 : tmp = (float) ( PI2 * freq / (float) INT_FS_12k8 );
83 218124 : for ( i = 0, w = tmp; i < M + 1; i++, w += tmp )
84 : {
85 206006 : Re += (float) ( curr_lsp[i] * cos( w ) );
86 206006 : Im -= (float) ( curr_lsp[i] * sin( w ) );
87 : }
88 12118 : if ( k == 0 || ( X.lag % 2 == 0 && k == X.lag >> 1 ) )
89 : {
90 634 : en += X.a[k] * X.a[k] / ( Re * Re + Im * Im );
91 : }
92 : else
93 : {
94 11484 : en += 2.0 * X.a[k] * X.a[k] / ( Re * Re + Im * Im );
95 : }
96 : }
97 : }
98 1268 : return ( (float) en );
99 : }
100 :
101 : /*-------------------------------------------------------------------*
102 : * DTFS_quant_cw()
103 : *
104 : * DTFS quantization
105 : *-------------------------------------------------------------------*/
106 :
107 : #define P_CBSIZE 64
108 :
109 317 : static int16_t DTFS_quant_cw(
110 : DTFS_STRUCTURE *X, /* i/o: DTFS unquant inp, quant out */
111 : const int16_t pl, /* i : Previous lag */
112 : const float *curr_lpc, /* i : LPC */
113 : int16_t *POWER_IDX, /* o : Power index */
114 : int16_t *AMP_IDX, /* o : Amplitude index */
115 : float *lastLgainE, /* i/o: last frame lowband gain */
116 : float *lastHgainE, /* i/o: last frame highband gain */
117 : float *lasterbE /* i/o: last frame ERB vector */
118 : )
119 : {
120 317 : int16_t num_erb = 0;
121 317 : const float( *PowerCB )[2] = NULL;
122 : float G_CURR_ERB[NUM_ERB_WB];
123 : float G_A_POWER[2];
124 : float tmp, w[2], target1, target2, error, minerror;
125 : float mfreq[NUM_ERB_WB], diff_erb[NUM_ERB_WB], curr_erb[NUM_ERB_WB];
126 : int16_t j, slot[NUM_ERB_WB], bincount;
127 317 : int16_t returnFlag = 1;
128 : float amperror;
129 :
130 317 : if ( X->upper_cut_off_freq == 4000.0 )
131 : {
132 137 : num_erb = NUM_ERB_NB;
133 137 : PowerCB = PowerCB_NB;
134 : }
135 180 : else if ( X->upper_cut_off_freq == 6400.0 )
136 : {
137 180 : num_erb = NUM_ERB_WB;
138 180 : PowerCB = PowerCB_WB;
139 : }
140 :
141 : /* Getting the Speech Domain Energy LOG Ratio */
142 317 : w[0] = (float) max( 1E-10, log10( DTFS_getSpEngyFromResAmp( *X, 0.0, 1104.5, curr_lpc ) ) );
143 317 : w[1] = (float) max( 1E-10, log10( DTFS_getSpEngyFromResAmp( *X, 1104.5, X->upper_cut_off_freq, curr_lpc ) ) );
144 317 : tmp = w[0] + w[1];
145 317 : w[0] /= tmp;
146 317 : w[1] /= tmp;
147 :
148 : /* Power Quantization */
149 317 : G_A_POWER[0] = (float) log10( X->lag * DTFS_setEngyHarm( 92.0, 1104.5, 0.0, 1104.5, 1.0, X ) );
150 317 : G_A_POWER[1] = (float) log10( X->lag * DTFS_setEngyHarm( 1104.5, X->upper_cut_off_freq_of_interest, 1104.5, X->upper_cut_off_freq, 1.0, X ) );
151 317 : target1 = G_A_POWER[0] - *lastLgainE;
152 317 : target2 = G_A_POWER[1] - *lastHgainE;
153 317 : minerror = (float) HUGE_VAL;
154 317 : *POWER_IDX = -1;
155 20605 : for ( j = 0; j < P_CBSIZE; j++ )
156 : {
157 20288 : error = (float) ( w[0] * fabs( target1 - PowerCB[j][0] ) + w[1] * fabs( target2 - PowerCB[j][1] ) );
158 20288 : if ( ( target1 >= PowerCB[j][0] ) && ( target2 >= PowerCB[j][1] ) )
159 : {
160 8654 : error *= 0.8f;
161 : }
162 20288 : if ( error < minerror )
163 : {
164 1653 : minerror = error;
165 1653 : *POWER_IDX = j;
166 : }
167 : }
168 317 : DTFS_to_erb( *X, curr_erb );
169 :
170 7651 : for ( j = 0; j < num_erb; j++ )
171 : {
172 7334 : G_CURR_ERB[j] = curr_erb[j];
173 : }
174 317 : erb_slot( X->lag, slot, mfreq, num_erb );
175 : /* Amplitude Quantization */
176 317 : erb_diff( lasterbE, pl, curr_erb, X->lag, curr_lpc, diff_erb, AMP_IDX, num_erb );
177 :
178 : /* Amplitude Dequantization */
179 317 : erb_add( curr_erb, X->lag, lasterbE, pl, AMP_IDX, num_erb );
180 317 : curr_erb[0] = curr_erb[1] * 0.3f;
181 317 : curr_erb[num_erb - 2] = curr_erb[num_erb - 3] * 0.3f;
182 317 : curr_erb[num_erb - 1] = 0;
183 : /* Determine if the amplitude quantization is good enough */
184 317 : amperror = 0.0;
185 317 : bincount = 0;
186 3170 : for ( j = 1; j < 10; j++ )
187 : {
188 2853 : if ( slot[j] != 0 )
189 : {
190 1237 : amperror += (float) ( fabs( G_CURR_ERB[j] - curr_erb[j] ) );
191 1237 : bincount++;
192 : }
193 : }
194 317 : amperror /= bincount;
195 :
196 317 : if ( ( amperror > 0.47 ) && ( target1 > -0.4 ) )
197 : {
198 2 : returnFlag = 0; /* Bumping up */
199 : }
200 :
201 317 : DTFS_erb_inv( curr_erb, slot, mfreq, X, num_erb );
202 :
203 : /* Back up the lasterbE memory after power normalization */
204 317 : DTFS_setEngyHarm( 92.0, 1104.5, 0.0, 1104.5, 1.0, X );
205 317 : DTFS_setEngyHarm( 1104.5, X->upper_cut_off_freq_of_interest, 1104.5, X->upper_cut_off_freq, 1.0, X );
206 :
207 317 : DTFS_to_erb( *X, lasterbE );
208 :
209 : /* Power Dequantization */
210 317 : *lastLgainE += (float) PowerCB[*POWER_IDX][0];
211 317 : *lastHgainE += (float) PowerCB[*POWER_IDX][1];
212 317 : target1 = (float) pow( 10.0, (double) ( *lastLgainE ) ) / X->lag;
213 :
214 317 : if ( !( target1 >= 0.0 ) )
215 : {
216 0 : target1 = 0;
217 : }
218 :
219 317 : DTFS_setEngyHarm( 92.0f, 1104.5f, 0.0f, 1104.5f, target1, X );
220 317 : target2 = (float) pow( 10.0, (double) ( *lastHgainE ) ) / X->lag;
221 :
222 317 : if ( !( target2 >= 0.0 ) )
223 : {
224 0 : target2 = 0;
225 : }
226 :
227 317 : DTFS_setEngyHarm( 1104.5, X->upper_cut_off_freq_of_interest, 1104.5, X->upper_cut_off_freq, target2, X );
228 :
229 317 : return returnFlag; /* amp quant performance pass/fail */
230 : }
231 :
232 : /*-------------------------------------------------------------------*
233 : * DTFS_alignment_fine_new()
234 : *
235 : * Shift value for DTFS finer alignment.
236 : *-------------------------------------------------------------------*/
237 :
238 317 : static float DTFS_alignment_fine_new(
239 : DTFS_STRUCTURE X1_DTFS, /* i : X1 the reference DTFS to keep fixed */
240 : DTFS_STRUCTURE X2_DTFS, /* i : X2 the test DTFS to shift to find best match */
241 : float Eshift /* i : Expected shift - coarse value */
242 : )
243 : {
244 : int16_t k;
245 : float maxcorr, corr, Adiff, diff, tmp, tmp1, fshift, n;
246 :
247 317 : if ( X1_DTFS.lag < X2_DTFS.lag )
248 : {
249 0 : DTFS_zeroPadd( X2_DTFS.lag, &X1_DTFS );
250 : }
251 317 : maxcorr = (float) -HUGE_VAL;
252 317 : fshift = Eshift;
253 317 : Adiff = 20.0f;
254 317 : diff = 1.0f;
255 12997 : for ( n = Eshift - Adiff + 1; n <= Eshift + Adiff; n += diff )
256 : {
257 12680 : corr = tmp = 0.0;
258 : /* bit-exact optimization - PI2/X2_DTFS.lag should be counted as a single divide outside loops */
259 12680 : tmp1 = (float) ( PI2 * n / X2_DTFS.lag );
260 :
261 334520 : for ( k = 0; k <= min( X2_DTFS.lag >> 1, X2_DTFS.nH ); k++, tmp += tmp1 )
262 : {
263 321840 : corr += (float) ( ( X1_DTFS.a[k] * X2_DTFS.a[k] + X1_DTFS.b[k] * X2_DTFS.b[k] ) * cos( tmp ) );
264 321840 : corr += (float) ( ( X1_DTFS.b[k] * X2_DTFS.a[k] - X1_DTFS.a[k] * X2_DTFS.b[k] ) * sin( tmp ) );
265 : }
266 12680 : if ( corr * ( 1.0f - 0.01f * fabs( n - Eshift ) ) > maxcorr )
267 : {
268 1747 : fshift = n;
269 1747 : maxcorr = corr;
270 : }
271 : }
272 :
273 317 : return fshift; /* o : shift value to shift X2 by */
274 : }
275 :
276 :
277 : /*-------------------------------------------------------------------*
278 : * ppp_quarter_encoder()
279 : *
280 : * PPP quarter encoder
281 : *--------------------------------------------------------------------*/
282 :
283 317 : ivas_error ppp_quarter_encoder(
284 : int16_t *returnFlag, /* o : return value */
285 : BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */
286 : DTFS_STRUCTURE *CURRCW_Q, /* o : Quantized (amp/phase) DTFS */
287 : DTFS_STRUCTURE *TARGETCW, /* o : DTFS with quant phase but unquant Amp */
288 : const int16_t prevCW_lag, /* i : previous lag */
289 : DTFS_STRUCTURE vCURRCW_NQ, /* i : Unquantized DTFS */
290 : const float *curr_lpc, /* i : LPCS */
291 : float *lastLgainE, /* i/o: last low band gain */
292 : float *lastHgainE, /* i/o: last high band gain */
293 : float *lasterbE, /* i/o: last ERB vector */
294 : DTFS_STRUCTURE PREV_CW_E /* i : past DTFS */
295 : )
296 : {
297 : DTFS_STRUCTURE *PREVDTFS;
298 : float tmp, temp_pl, temp_l;
299 : int16_t l;
300 : int16_t POWER_IDX; /* Codebook index for the power quantization for PPP */
301 : int16_t AMP_IDX[2]; /* Codebook index for the Amplitude quantization for PPP */
302 317 : float Erot = 0.0, z = 0.0;
303 : ivas_error error;
304 :
305 317 : error = IVAS_ERR_OK;
306 317 : *returnFlag = 1;
307 :
308 317 : if ( ( error = DTFS_new( &PREVDTFS ) ) != IVAS_ERR_OK )
309 : {
310 0 : return error;
311 : }
312 :
313 :
314 317 : DTFS_copy( CURRCW_Q, vCURRCW_NQ );
315 317 : DTFS_copy( PREVDTFS, PREV_CW_E );
316 :
317 317 : l = CURRCW_Q->lag;
318 317 : temp_l = (float) CURRCW_Q->lag;
319 317 : temp_pl = (float) prevCW_lag;
320 :
321 317 : DTFS_adjustLag( PREVDTFS, l );
322 :
323 317 : z = ( ( L_FRAME - temp_l ) * ( temp_l + temp_pl ) ) / ( 2 * temp_l * temp_pl );
324 :
325 317 : Erot = (float) ( temp_l - rint_new( temp_l * ( z - floor( z ) ) ) );
326 :
327 317 : DTFS_phaseShift( PREVDTFS, (float) ( PI2 * Erot / CURRCW_Q->lag ) );
328 317 : DTFS_car2pol( PREVDTFS );
329 :
330 : /* Amplitude Quantization */
331 317 : DTFS_car2pol( CURRCW_Q ); /* at this point currCW_q=curr_nq */
332 :
333 317 : *returnFlag = DTFS_quant_cw( CURRCW_Q, prevCW_lag, curr_lpc, &POWER_IDX, AMP_IDX, lastLgainE, lastHgainE, lasterbE );
334 :
335 317 : push_indice( hBstr, IND_AMP0, AMP_IDX[0], 6 );
336 317 : push_indice( hBstr, IND_AMP1, AMP_IDX[1], 6 );
337 317 : push_indice( hBstr, IND_POWER, POWER_IDX, 6 );
338 :
339 317 : DTFS_copy( TARGETCW, *CURRCW_Q );
340 :
341 : /* Copying phase spectrum over */
342 317 : mvr2r( PREVDTFS->b, CURRCW_Q->b, (int16_t) ( CURRCW_Q->lag >> 1 ) + 1 );
343 :
344 317 : DTFS_pol2car( CURRCW_Q );
345 317 : DTFS_pol2car( TARGETCW );
346 317 : tmp = DTFS_alignment_fine_new( *TARGETCW, *CURRCW_Q, 0.0 );
347 :
348 317 : if ( ( ( tmp + 3 ) > 7 ) || ( ( tmp + 3 ) < 0 ) )
349 : {
350 138 : tmp = 0;
351 138 : *returnFlag = 0;
352 : }
353 :
354 317 : DTFS_phaseShift( CURRCW_Q, (float) ( PI2 * tmp / CURRCW_Q->lag ) );
355 :
356 317 : push_indice( hBstr, IND_GLOBAL_ALIGNMENT, (int16_t) ( tmp + 3 ), 3 );
357 :
358 317 : free( PREVDTFS );
359 :
360 317 : return error;
361 : }
362 :
363 :
364 : /*-------------------------------------------------------------------*
365 : * set_ppp_mode()
366 : *
367 : * Determine if the current frame should be coded by PPP or not
368 : * Impose PPP - CELP - CELP pattern
369 : *-------------------------------------------------------------------*/
370 :
371 2390 : void set_ppp_mode(
372 : Encoder_State *st, /* i/o: encoder state structure */
373 : const int16_t noisy_speech_HO, /* i : SC-VBR noisy speech HO flag */
374 : const int16_t clean_speech_HO, /* i : SC-VBR clean speech HO flag */
375 : const int16_t NB_speech_HO, /* i : SC-VBR NB speech HO flag */
376 : const int16_t localVAD_he /* i : HE-SAD flag without hangover*/
377 : )
378 : {
379 2390 : SC_VBR_ENC_HANDLE hSC_VBR = st->hSC_VBR;
380 :
381 2390 : if ( st->vad_flag == 1 &&
382 2330 : ( noisy_speech_HO == 1 || clean_speech_HO == 1 || NB_speech_HO == 1 ) &&
383 3 : ( st->localVAD == 0 || localVAD_he == 0 ) )
384 : {
385 3 : st->coder_type = UNVOICED;
386 : }
387 :
388 2390 : if ( st->coder_type == INACTIVE && st->vad_flag == 0 && hSC_VBR->last_nelp_mode == 1 ) /* avoid HO frame go to GSC */
389 : {
390 0 : st->coder_type = UNVOICED;
391 : }
392 :
393 : /* force the coder to NELP mode during the first five frames */
394 : /* this will indicate the decoder that the coder is operating in the VBR mode */
395 2390 : if ( st->ini_frame < 5 )
396 : {
397 30 : st->coder_type = UNVOICED;
398 30 : st->vad_flag = 1;
399 : }
400 :
401 : /* Pattern PPP-CELP-CELP (pppcountE holds number of consecutive PPP frames) */
402 2390 : if ( st->coder_type != VOICED || st->last_coder_type == TRANSITION )
403 : {
404 : /* ensure no transient to PPP transition */
405 1909 : hSC_VBR->pppcountE = 0;
406 : }
407 : else
408 : {
409 : /* current mode is voiced */
410 481 : hSC_VBR->pppcountE++;
411 :
412 481 : if ( ( hSC_VBR->pppcountE == 1 && hSC_VBR->last_last_ppp_mode != 1 && !hSC_VBR->rate_control ) ||
413 174 : ( hSC_VBR->pppcountE == 1 && hSC_VBR->mode_QQF ) )
414 : {
415 397 : hSC_VBR->ppp_mode = 1;
416 397 : st->core_brate = PPP_NELP_2k80;
417 : }
418 84 : else if ( hSC_VBR->pppcountE == 2 )
419 : {
420 74 : if ( hSC_VBR->last_ppp_mode == 1 && !hSC_VBR->mode_QQF )
421 : {
422 : /* QFF mode */
423 0 : hSC_VBR->ppp_mode = 0;
424 : }
425 : else
426 : {
427 : /* QQF Mode */
428 74 : hSC_VBR->ppp_mode = 1;
429 74 : st->core_brate = PPP_NELP_2k80;
430 : }
431 : }
432 : else
433 : {
434 10 : hSC_VBR->ppp_mode = 0;
435 10 : hSC_VBR->pppcountE = 0;
436 : }
437 : }
438 :
439 2390 : if ( hSC_VBR->ppp_mode == 0 && hSC_VBR->set_ppp_generic == 1 )
440 : {
441 0 : hSC_VBR->set_ppp_generic = 0;
442 0 : st->coder_type = GENERIC;
443 : }
444 :
445 2390 : if ( st->last_core == HQ_CORE )
446 : {
447 1 : hSC_VBR->ppp_mode = 0;
448 1 : hSC_VBR->set_ppp_generic = 0;
449 1 : st->coder_type = TRANSITION;
450 : }
451 :
452 2390 : if ( hSC_VBR->last_ppp_mode && !hSC_VBR->ppp_mode && st->sp_aud_decision1 && st->bwidth == NB && st->Opt_SC_VBR ) /*if it were about to go from ppp->HQ*/
453 : {
454 0 : hSC_VBR->avoid_HQ_VBR_NB = 1;
455 0 : st->coder_type = GENERIC;
456 : }
457 :
458 2390 : if ( hSC_VBR->last_nelp_mode && st->sp_aud_decision1 && st->bwidth == NB && st->Opt_SC_VBR ) /*if it were about to go from nelp->HQ*/
459 : {
460 0 : hSC_VBR->avoid_HQ_VBR_NB = 1;
461 0 : st->coder_type = GENERIC;
462 : }
463 :
464 2390 : if ( ( ( st->old_pitch_buf[( 2 * NB_SUBFR ) - 1] > PPP_LAG_THRLD ) || ( st->pitch[1] > PPP_LAG_THRLD ) || !st->last_Opt_SC_VBR ) && ( hSC_VBR->ppp_mode == 1 ) )
465 : {
466 4 : hSC_VBR->ppp_mode = 0;
467 4 : st->core_brate = ACELP_7k20;
468 : }
469 :
470 2390 : return;
471 : }
|