Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include <math.h>
40 : #include "cnst.h"
41 : #include "prot.h"
42 : #include "rom_com.h"
43 : #include "wmc_auto.h"
44 :
45 :
46 : /*-------------------------------------------------------------------*
47 : * DTFS_getSpEngyFromResAmp()
48 : *
49 : * Get speech energy from the DTFS
50 : *-------------------------------------------------------------------*/
51 :
52 0 : static float DTFS_getSpEngyFromResAmp(
53 : DTFS_STRUCTURE X, /* i : DTFS */
54 : float lband, /* i : Low band end to get energy from */
55 : float hband, /* i : High band end to get energy from */
56 : const float *curr_lsp /* i : LPCs */
57 : )
58 : {
59 : int16_t i, k;
60 : float w, tmp, Re, Im;
61 : double en, freq, fdiff;
62 :
63 0 : fdiff = (float) INT_FS_12k8 / X.lag;
64 :
65 0 : if ( hband == X.upper_cut_off_freq )
66 : {
67 0 : hband = 4001.0;
68 : }
69 :
70 0 : en = 0.0f;
71 0 : for ( freq = 0.0, k = 0; k <= min( X.lag >> 1, X.nH_4kHz ); k++, freq += fdiff )
72 : {
73 0 : if ( X.a[k] < 0.0 )
74 : {
75 0 : X.a[k] = 0.0;
76 : }
77 :
78 0 : if ( freq < hband && freq >= lband )
79 : {
80 0 : Re = 1.0f;
81 0 : Im = 0.0f;
82 0 : tmp = (float) ( PI2 * freq / (float) INT_FS_12k8 );
83 0 : for ( i = 0, w = tmp; i < M + 1; i++, w += tmp )
84 : {
85 0 : Re += (float) ( curr_lsp[i] * cos( w ) );
86 0 : Im -= (float) ( curr_lsp[i] * sin( w ) );
87 : }
88 0 : if ( k == 0 || ( X.lag % 2 == 0 && k == X.lag >> 1 ) )
89 : {
90 0 : en += X.a[k] * X.a[k] / ( Re * Re + Im * Im );
91 : }
92 : else
93 : {
94 0 : en += 2.0 * X.a[k] * X.a[k] / ( Re * Re + Im * Im );
95 : }
96 : }
97 : }
98 0 : return ( (float) en );
99 : }
100 :
101 : /*-------------------------------------------------------------------*
102 : * DTFS_quant_cw()
103 : *
104 : * DTFS quantization
105 : *-------------------------------------------------------------------*/
106 :
107 : #define P_CBSIZE 64
108 :
109 0 : static int16_t DTFS_quant_cw(
110 : DTFS_STRUCTURE *X, /* i/o: DTFS unquant inp, quant out */
111 : const int16_t pl, /* i : Previous lag */
112 : const float *curr_lpc, /* i : LPC */
113 : int16_t *POWER_IDX, /* o : Power index */
114 : int16_t *AMP_IDX, /* o : Amplitude index */
115 : float *lastLgainE, /* i/o: last frame lowband gain */
116 : float *lastHgainE, /* i/o: last frame highband gain */
117 : float *lasterbE /* i/o: last frame ERB vector */
118 : )
119 : {
120 0 : int16_t num_erb = 0;
121 0 : const float( *PowerCB )[2] = NULL;
122 : float G_CURR_ERB[NUM_ERB_WB];
123 : float G_A_POWER[2];
124 : float tmp, w[2], target1, target2, error, minerror;
125 : float mfreq[NUM_ERB_WB], diff_erb[NUM_ERB_WB], curr_erb[NUM_ERB_WB];
126 : int16_t j, slot[NUM_ERB_WB], bincount;
127 0 : int16_t returnFlag = 1;
128 : float amperror;
129 :
130 0 : if ( X->upper_cut_off_freq == 4000.0 )
131 : {
132 0 : num_erb = NUM_ERB_NB;
133 0 : PowerCB = PowerCB_NB;
134 : }
135 0 : else if ( X->upper_cut_off_freq == 6400.0 )
136 : {
137 0 : num_erb = NUM_ERB_WB;
138 0 : PowerCB = PowerCB_WB;
139 : }
140 :
141 : /* Getting the Speech Domain Energy LOG Ratio */
142 0 : w[0] = (float) max( 1E-10, log10( DTFS_getSpEngyFromResAmp( *X, 0.0, 1104.5, curr_lpc ) ) );
143 0 : w[1] = (float) max( 1E-10, log10( DTFS_getSpEngyFromResAmp( *X, 1104.5, X->upper_cut_off_freq, curr_lpc ) ) );
144 0 : tmp = w[0] + w[1];
145 0 : w[0] /= tmp;
146 0 : w[1] /= tmp;
147 :
148 : /* Power Quantization */
149 0 : G_A_POWER[0] = (float) log10( X->lag * DTFS_setEngyHarm( 92.0, 1104.5, 0.0, 1104.5, 1.0, X ) );
150 0 : G_A_POWER[1] = (float) log10( X->lag * DTFS_setEngyHarm( 1104.5, X->upper_cut_off_freq_of_interest, 1104.5, X->upper_cut_off_freq, 1.0, X ) );
151 0 : target1 = G_A_POWER[0] - *lastLgainE;
152 0 : target2 = G_A_POWER[1] - *lastHgainE;
153 0 : minerror = (float) HUGE_VAL;
154 0 : *POWER_IDX = -1;
155 0 : for ( j = 0; j < P_CBSIZE; j++ )
156 : {
157 0 : error = (float) ( w[0] * fabs( target1 - PowerCB[j][0] ) + w[1] * fabs( target2 - PowerCB[j][1] ) );
158 0 : if ( ( target1 >= PowerCB[j][0] ) && ( target2 >= PowerCB[j][1] ) )
159 : {
160 0 : error *= 0.8f;
161 : }
162 0 : if ( error < minerror )
163 : {
164 0 : minerror = error;
165 0 : *POWER_IDX = j;
166 : }
167 : }
168 0 : DTFS_to_erb( *X, curr_erb );
169 :
170 0 : for ( j = 0; j < num_erb; j++ )
171 : {
172 0 : G_CURR_ERB[j] = curr_erb[j];
173 : }
174 0 : erb_slot( X->lag, slot, mfreq, num_erb );
175 : /* Amplitude Quantization */
176 0 : erb_diff( lasterbE, pl, curr_erb, X->lag, curr_lpc, diff_erb, AMP_IDX, num_erb );
177 :
178 : /* Amplitude Dequantization */
179 0 : erb_add( curr_erb, X->lag, lasterbE, pl, AMP_IDX, num_erb );
180 0 : curr_erb[0] = curr_erb[1] * 0.3f;
181 0 : curr_erb[num_erb - 2] = curr_erb[num_erb - 3] * 0.3f;
182 0 : curr_erb[num_erb - 1] = 0;
183 : /* Determine if the amplitude quantization is good enough */
184 0 : amperror = 0.0;
185 0 : bincount = 0;
186 0 : for ( j = 1; j < 10; j++ )
187 : {
188 0 : if ( slot[j] != 0 )
189 : {
190 0 : amperror += (float) ( fabs( G_CURR_ERB[j] - curr_erb[j] ) );
191 0 : bincount++;
192 : }
193 : }
194 0 : amperror /= bincount;
195 :
196 0 : if ( ( amperror > 0.47 ) && ( target1 > -0.4 ) )
197 : {
198 0 : returnFlag = 0; /* Bumping up */
199 : }
200 :
201 0 : DTFS_erb_inv( curr_erb, slot, mfreq, X, num_erb );
202 :
203 : /* Back up the lasterbE memory after power normalization */
204 0 : DTFS_setEngyHarm( 92.0, 1104.5, 0.0, 1104.5, 1.0, X );
205 0 : DTFS_setEngyHarm( 1104.5, X->upper_cut_off_freq_of_interest, 1104.5, X->upper_cut_off_freq, 1.0, X );
206 :
207 0 : DTFS_to_erb( *X, lasterbE );
208 :
209 : /* Power Dequantization */
210 0 : *lastLgainE += (float) PowerCB[*POWER_IDX][0];
211 0 : *lastHgainE += (float) PowerCB[*POWER_IDX][1];
212 0 : target1 = (float) pow( 10.0, (double) ( *lastLgainE ) ) / X->lag;
213 :
214 0 : if ( !( target1 >= 0.0 ) )
215 : {
216 0 : target1 = 0;
217 : }
218 :
219 0 : DTFS_setEngyHarm( 92.0f, 1104.5f, 0.0f, 1104.5f, target1, X );
220 0 : target2 = (float) pow( 10.0, (double) ( *lastHgainE ) ) / X->lag;
221 :
222 0 : if ( !( target2 >= 0.0 ) )
223 : {
224 0 : target2 = 0;
225 : }
226 :
227 0 : DTFS_setEngyHarm( 1104.5, X->upper_cut_off_freq_of_interest, 1104.5, X->upper_cut_off_freq, target2, X );
228 :
229 0 : return returnFlag; /* amp quant performance pass/fail */
230 : }
231 :
232 : /*-------------------------------------------------------------------*
233 : * DTFS_alignment_fine_new()
234 : *
235 : * Shift value for DTFS finer alignment.
236 : *-------------------------------------------------------------------*/
237 :
238 0 : static float DTFS_alignment_fine_new(
239 : DTFS_STRUCTURE X1_DTFS, /* i : X1 the reference DTFS to keep fixed */
240 : DTFS_STRUCTURE X2_DTFS, /* i : X2 the test DTFS to shift to find best match */
241 : float Eshift /* i : Expected shift - coarse value */
242 : )
243 : {
244 : int16_t k;
245 : float maxcorr, corr, Adiff, diff, tmp, tmp1, fshift, n;
246 :
247 0 : if ( X1_DTFS.lag < X2_DTFS.lag )
248 : {
249 0 : DTFS_zeroPadd( X2_DTFS.lag, &X1_DTFS );
250 : }
251 0 : maxcorr = (float) -HUGE_VAL;
252 0 : fshift = Eshift;
253 0 : Adiff = 20.0f;
254 0 : diff = 1.0f;
255 0 : for ( n = Eshift - Adiff + 1; n <= Eshift + Adiff; n += diff )
256 : {
257 0 : corr = tmp = 0.0;
258 : /* bit-exact optimization - PI2/X2_DTFS.lag should be counted as a single divide outside loops */
259 0 : tmp1 = (float) ( PI2 * n / X2_DTFS.lag );
260 :
261 0 : for ( k = 0; k <= min( X2_DTFS.lag >> 1, X2_DTFS.nH ); k++, tmp += tmp1 )
262 : {
263 0 : corr += (float) ( ( X1_DTFS.a[k] * X2_DTFS.a[k] + X1_DTFS.b[k] * X2_DTFS.b[k] ) * cos( tmp ) );
264 0 : corr += (float) ( ( X1_DTFS.b[k] * X2_DTFS.a[k] - X1_DTFS.a[k] * X2_DTFS.b[k] ) * sin( tmp ) );
265 : }
266 0 : if ( corr * ( 1.0f - 0.01f * fabs( n - Eshift ) ) > maxcorr )
267 : {
268 0 : fshift = n;
269 0 : maxcorr = corr;
270 : }
271 : }
272 :
273 0 : return fshift; /* o : shift value to shift X2 by */
274 : }
275 :
276 :
277 : /*-------------------------------------------------------------------*
278 : * ppp_quarter_encoder()
279 : *
280 : * PPP quarter encoder
281 : *--------------------------------------------------------------------*/
282 :
283 0 : ivas_error ppp_quarter_encoder(
284 : int16_t *returnFlag, /* o : return value */
285 : BSTR_ENC_HANDLE hBstr, /* i/o: encoder bitstream handle */
286 : DTFS_STRUCTURE *CURRCW_Q, /* o : Quantized (amp/phase) DTFS */
287 : DTFS_STRUCTURE *TARGETCW, /* o : DTFS with quant phase but unquant Amp */
288 : const int16_t prevCW_lag, /* i : previous lag */
289 : DTFS_STRUCTURE vCURRCW_NQ, /* i : Unquantized DTFS */
290 : const float *curr_lpc, /* i : LPCS */
291 : float *lastLgainE, /* i/o: last low band gain */
292 : float *lastHgainE, /* i/o: last high band gain */
293 : float *lasterbE, /* i/o: last ERB vector */
294 : DTFS_STRUCTURE PREV_CW_E /* i : past DTFS */
295 : )
296 : {
297 : DTFS_STRUCTURE *PREVDTFS;
298 : float tmp, temp_pl, temp_l;
299 : int16_t l;
300 : int16_t POWER_IDX; /* Codebook index for the power quantization for PPP */
301 : int16_t AMP_IDX[2]; /* Codebook index for the Amplitude quantization for PPP */
302 0 : float Erot = 0.0, z = 0.0;
303 : ivas_error error;
304 :
305 0 : error = IVAS_ERR_OK;
306 0 : *returnFlag = 1;
307 :
308 0 : if ( ( error = DTFS_new( &PREVDTFS ) ) != IVAS_ERR_OK )
309 : {
310 0 : return error;
311 : }
312 :
313 :
314 0 : DTFS_copy( CURRCW_Q, vCURRCW_NQ );
315 0 : DTFS_copy( PREVDTFS, PREV_CW_E );
316 :
317 0 : l = CURRCW_Q->lag;
318 0 : temp_l = (float) CURRCW_Q->lag;
319 0 : temp_pl = (float) prevCW_lag;
320 :
321 0 : DTFS_adjustLag( PREVDTFS, l );
322 :
323 0 : z = ( ( L_FRAME - temp_l ) * ( temp_l + temp_pl ) ) / ( 2 * temp_l * temp_pl );
324 :
325 0 : Erot = (float) ( temp_l - rint_new( temp_l * ( z - floor( z ) ) ) );
326 :
327 0 : DTFS_phaseShift( PREVDTFS, (float) ( PI2 * Erot / CURRCW_Q->lag ) );
328 0 : DTFS_car2pol( PREVDTFS );
329 :
330 : /* Amplitude Quantization */
331 0 : DTFS_car2pol( CURRCW_Q ); /* at this point currCW_q=curr_nq */
332 :
333 0 : *returnFlag = DTFS_quant_cw( CURRCW_Q, prevCW_lag, curr_lpc, &POWER_IDX, AMP_IDX, lastLgainE, lastHgainE, lasterbE );
334 :
335 0 : push_indice( hBstr, IND_AMP0, AMP_IDX[0], 6 );
336 0 : push_indice( hBstr, IND_AMP1, AMP_IDX[1], 6 );
337 0 : push_indice( hBstr, IND_POWER, POWER_IDX, 6 );
338 :
339 0 : DTFS_copy( TARGETCW, *CURRCW_Q );
340 :
341 : /* Copying phase spectrum over */
342 0 : mvr2r( PREVDTFS->b, CURRCW_Q->b, (int16_t) ( CURRCW_Q->lag >> 1 ) + 1 );
343 :
344 0 : DTFS_pol2car( CURRCW_Q );
345 0 : DTFS_pol2car( TARGETCW );
346 0 : tmp = DTFS_alignment_fine_new( *TARGETCW, *CURRCW_Q, 0.0 );
347 :
348 0 : if ( ( ( tmp + 3 ) > 7 ) || ( ( tmp + 3 ) < 0 ) )
349 : {
350 0 : tmp = 0;
351 0 : *returnFlag = 0;
352 : }
353 :
354 0 : DTFS_phaseShift( CURRCW_Q, (float) ( PI2 * tmp / CURRCW_Q->lag ) );
355 :
356 0 : push_indice( hBstr, IND_GLOBAL_ALIGNMENT, (int16_t) ( tmp + 3 ), 3 );
357 :
358 0 : free( PREVDTFS );
359 :
360 0 : return error;
361 : }
362 :
363 :
364 : /*-------------------------------------------------------------------*
365 : * set_ppp_mode()
366 : *
367 : * Determine if the current frame should be coded by PPP or not
368 : * Impose PPP - CELP - CELP pattern
369 : *-------------------------------------------------------------------*/
370 :
371 0 : void set_ppp_mode(
372 : Encoder_State *st, /* i/o: encoder state structure */
373 : const int16_t noisy_speech_HO, /* i : SC-VBR noisy speech HO flag */
374 : const int16_t clean_speech_HO, /* i : SC-VBR clean speech HO flag */
375 : const int16_t NB_speech_HO, /* i : SC-VBR NB speech HO flag */
376 : const int16_t localVAD_he /* i : HE-SAD flag without hangover*/
377 : )
378 : {
379 0 : SC_VBR_ENC_HANDLE hSC_VBR = st->hSC_VBR;
380 :
381 0 : if ( st->vad_flag == 1 &&
382 0 : ( noisy_speech_HO == 1 || clean_speech_HO == 1 || NB_speech_HO == 1 ) &&
383 0 : ( st->localVAD == 0 || localVAD_he == 0 ) )
384 : {
385 0 : st->coder_type = UNVOICED;
386 : }
387 :
388 0 : if ( st->coder_type == INACTIVE && st->vad_flag == 0 && hSC_VBR->last_nelp_mode == 1 ) /* avoid HO frame go to GSC */
389 : {
390 0 : st->coder_type = UNVOICED;
391 : }
392 :
393 : /* force the coder to NELP mode during the first five frames */
394 : /* this will indicate the decoder that the coder is operating in the VBR mode */
395 0 : if ( st->ini_frame < 5 )
396 : {
397 0 : st->coder_type = UNVOICED;
398 0 : st->vad_flag = 1;
399 : }
400 :
401 : /* Pattern PPP-CELP-CELP (pppcountE holds number of consecutive PPP frames) */
402 0 : if ( st->coder_type != VOICED || st->last_coder_type == TRANSITION )
403 : {
404 : /* ensure no transient to PPP transition */
405 0 : hSC_VBR->pppcountE = 0;
406 : }
407 : else
408 : {
409 : /* current mode is voiced */
410 0 : hSC_VBR->pppcountE++;
411 :
412 0 : if ( ( hSC_VBR->pppcountE == 1 && hSC_VBR->last_last_ppp_mode != 1 && !hSC_VBR->rate_control ) ||
413 0 : ( hSC_VBR->pppcountE == 1 && hSC_VBR->mode_QQF ) )
414 : {
415 0 : hSC_VBR->ppp_mode = 1;
416 0 : st->core_brate = PPP_NELP_2k80;
417 : }
418 0 : else if ( hSC_VBR->pppcountE == 2 )
419 : {
420 0 : if ( hSC_VBR->last_ppp_mode == 1 && !hSC_VBR->mode_QQF )
421 : {
422 : /* QFF mode */
423 0 : hSC_VBR->ppp_mode = 0;
424 : }
425 : else
426 : {
427 : /* QQF Mode */
428 0 : hSC_VBR->ppp_mode = 1;
429 0 : st->core_brate = PPP_NELP_2k80;
430 : }
431 : }
432 : else
433 : {
434 0 : hSC_VBR->ppp_mode = 0;
435 0 : hSC_VBR->pppcountE = 0;
436 : }
437 : }
438 :
439 0 : if ( hSC_VBR->ppp_mode == 0 && hSC_VBR->set_ppp_generic == 1 )
440 : {
441 0 : hSC_VBR->set_ppp_generic = 0;
442 0 : st->coder_type = GENERIC;
443 : }
444 :
445 0 : if ( st->last_core == HQ_CORE )
446 : {
447 0 : hSC_VBR->ppp_mode = 0;
448 0 : hSC_VBR->set_ppp_generic = 0;
449 0 : st->coder_type = TRANSITION;
450 : }
451 :
452 0 : if ( hSC_VBR->last_ppp_mode && !hSC_VBR->ppp_mode && st->sp_aud_decision1 && st->bwidth == NB && st->Opt_SC_VBR ) /*if it were about to go from ppp->HQ*/
453 : {
454 0 : hSC_VBR->avoid_HQ_VBR_NB = 1;
455 0 : st->coder_type = GENERIC;
456 : }
457 :
458 0 : if ( hSC_VBR->last_nelp_mode && st->sp_aud_decision1 && st->bwidth == NB && st->Opt_SC_VBR ) /*if it were about to go from nelp->HQ*/
459 : {
460 0 : hSC_VBR->avoid_HQ_VBR_NB = 1;
461 0 : st->coder_type = GENERIC;
462 : }
463 :
464 0 : if ( ( ( st->old_pitch_buf[( 2 * NB_SUBFR ) - 1] > PPP_LAG_THRLD ) || ( st->pitch[1] > PPP_LAG_THRLD ) || !st->last_Opt_SC_VBR ) && ( hSC_VBR->ppp_mode == 1 ) )
465 : {
466 0 : hSC_VBR->ppp_mode = 0;
467 0 : st->core_brate = ACELP_7k20;
468 : }
469 :
470 0 : return;
471 : }
|