Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "rom_com.h"
45 : #include "prot.h"
46 : #include "wmc_auto.h"
47 :
48 : /*-------------------------------------------------------------------*
49 : * Local constants
50 : *-------------------------------------------------------------------*/
51 :
52 : #define BAND3k 15
53 : #define BIN_1k2 48
54 : #define BAND_2k 12
55 : #define BAND_0k4 4
56 : #define ALP 0.7f
57 : #define MALP ( 1.0f - ALP )
58 : #define ALPMY ( 0.86f )
59 : #define ALPY 1.5f
60 :
61 : #define CONTR_LIMIT 3012 /* Threshold to allow an increase in the contribution length */
62 :
63 :
64 : /*-------------------------------------------------------------------*
65 : * NoiseFill()
66 : *
67 : * noise fill function for unvoiced/inactive frames (used only in AMR-WB IO mode)
68 : *-------------------------------------------------------------------*/
69 :
70 0 : static void NoiseFill(
71 : float *exc_diffQ, /* i/o: Noise per band */
72 : int16_t *seed_tcx, /* i : Seed for noise */
73 : const int16_t Mbands_gn /* i : number of bands */
74 : )
75 : {
76 : int16_t StartBin, NB_Qbins, i_band, k;
77 0 : StartBin = 0;
78 0 : NB_Qbins = 0;
79 :
80 0 : for ( i_band = 0; i_band < Mbands_gn; i_band++ )
81 : {
82 0 : StartBin += NB_Qbins;
83 0 : NB_Qbins = crit_bins[i_band];
84 0 : for ( k = StartBin; k < NB_Qbins + StartBin; k++ )
85 : {
86 0 : exc_diffQ[k] += 0.75f * ( (float) own_random( seed_tcx ) / PCM16_TO_FLT_FAC );
87 : }
88 : }
89 :
90 0 : return;
91 : }
92 :
93 : /*-------------------------------------------------------------------*
94 : * Ener_per_band()
95 : *
96 : * Computed the energy per band (used only in AMR-WB IO mode)
97 : *-------------------------------------------------------------------*/
98 :
99 0 : static void Ener_per_band(
100 : const float exc_diff[], /* i : target signal */
101 : float y_gain4[] /* o : Energy per band to quantize */
102 : )
103 : {
104 : float etmp;
105 : const float *pt;
106 : int16_t i, j;
107 :
108 0 : pt = exc_diff;
109 0 : for ( j = 0; j < CRIT_NOIS_BAND; j++ )
110 : {
111 0 : etmp = 0.01f;
112 0 : for ( i = 0; i < crit_bins[j]; i++ )
113 : {
114 0 : etmp += ( *pt * *pt );
115 0 : pt++;
116 : }
117 0 : etmp = max( etmp, .01f );
118 0 : y_gain4[j] = (float) sqrt( etmp );
119 : }
120 :
121 0 : return;
122 : }
123 :
124 :
125 : /*-------------------------------------------------------------------*
126 : * Apply_gain()
127 : *
128 : * Rescaling of the modified excitation vector (used only in AMR-WB IO mode)
129 : *-------------------------------------------------------------------*/
130 :
131 0 : static void Apply_gain(
132 : float exc_diffQ[], /* i/o: Quantized excitation */
133 : float Ener_per_bd_iQ[], /* o : Target ener per band */
134 : float Ener_per_bd_yQ[] /* o : Ener per band for norm vector */
135 : )
136 : {
137 : int16_t i, i_band;
138 : int16_t StartBin, NB_Qbins;
139 : float y_gain;
140 :
141 : /*------------------------------------------------------------------
142 : * For all the bands
143 : * Find the energy ratio between modified vector and original vector
144 : *------------------------------------------------------------------*/
145 :
146 0 : StartBin = 0;
147 0 : NB_Qbins = 0;
148 0 : for ( i_band = 0; i_band < CRIT_NOIS_BAND; i_band++ )
149 : {
150 0 : StartBin += NB_Qbins;
151 0 : NB_Qbins = crit_bins[i_band];
152 0 : y_gain = (float) ( Ener_per_bd_iQ[i_band] / Ener_per_bd_yQ[i_band] );
153 :
154 : /*------------------------------------------------------------------
155 : * For bands below 400 Hz or for unvoiced/inactive frames
156 : * only apply the energy ratio
157 : *------------------------------------------------------------------*/
158 :
159 0 : for ( i = StartBin; i < NB_Qbins + StartBin; i++ )
160 : {
161 0 : exc_diffQ[i] *= y_gain;
162 : }
163 : }
164 :
165 0 : return;
166 : }
167 :
168 : /*-------------------------------------------------------------------*
169 : * normalize_spec()
170 : *
171 : * Spectrum normalization (zeroed of bins below a certain threshold) (used only in AMR-WB IO mode)
172 : *-------------------------------------------------------------------*/
173 :
174 0 : static void normalize_spec(
175 : float fac_up, /* i : Core bitrate */
176 : float fy_norm[], /* i/o: Frequency quantized parameter */
177 : const int16_t L_frame /* i : Section length */
178 : )
179 : {
180 : float max_val;
181 : int16_t idx, j;
182 :
183 0 : idx = emaximum( fy_norm, L_frame, &max_val );
184 :
185 0 : if ( fy_norm[idx] == 0 ) /* we can't use max(fy_norm[idx],1) because fy_norm[idx] can be negative */
186 : {
187 0 : fy_norm[idx] = 1;
188 : }
189 0 : max_val = (float) fabs( fac_up / fy_norm[idx] );
190 0 : for ( j = 0; j < L_frame; j++ )
191 : {
192 0 : fy_norm[j] *= max_val;
193 : }
194 :
195 0 : return;
196 : }
197 :
198 : /*-------------------------------------------------------------------*
199 : * gs_dec_amr_wb()
200 : *
201 : * Modification of decoded excitation vector depending of the content type (used only in AMR-WB IO mode)
202 : *-------------------------------------------------------------------*/
203 :
204 0 : static void gs_dec_amr_wb(
205 : const int32_t core_brate, /* i : bitrate allocated to the core */
206 : int16_t *seed_tcx, /* i/o: seed used for noise generation */
207 : const float dct_in[], /* i : cdt of residual signal */
208 : float dct_out[], /* i/o: dct of pitch only excitation */
209 : const float pitch[], /* i : pitch buffer */
210 : const float voice_fac, /* i : gain pitch */
211 : const int16_t clas, /* i : signal frame class */
212 : const int16_t coder_type, /* i : coder type */
213 : const int16_t VeryLowRateSTflag /* i : Enable the noise enhancement for very low rate stereo generic mode */
214 : )
215 : {
216 : int16_t i, mDiff_len;
217 : float exc_diffQ[L_FRAME16k];
218 : int16_t j;
219 : float etmp14;
220 : float ftmp, ftmp1;
221 : float Ener_per_bd_iQ[CRIT_NOIS_BAND];
222 : float Ener_per_bd_yQ[CRIT_NOIS_BAND];
223 :
224 : /*--------------------------------------------------------------------------------------*
225 : * compute the energy per band for the decoded excitation (in frequency domain)
226 : *--------------------------------------------------------------------------------------*/
227 :
228 0 : Ener_per_band( dct_in, Ener_per_bd_iQ );
229 :
230 : /*--------------------------------------------------------------------------------------*
231 : * adjust quantization noise for the low level to compensate for the poor 6 bit gainQ
232 : *--------------------------------------------------------------------------------------*/
233 :
234 0 : if ( core_brate < ACELP_12k65 )
235 : {
236 0 : ftmp = 0;
237 0 : for ( i = 0; i < CRIT_NOIS_BAND; i++ )
238 : {
239 0 : ftmp = max( Ener_per_bd_iQ[i], ftmp );
240 : }
241 :
242 0 : if ( ( coder_type == INACTIVE || clas == VOICED_TRANSITION ) && ftmp < 20.0f )
243 : {
244 0 : for ( i = 0; i < CRIT_NOIS_BAND; i++ )
245 : {
246 0 : Ener_per_bd_iQ[i] *= crit_bins_corr[i];
247 : }
248 : }
249 : }
250 :
251 : /*--------------------------------------------------------------------------------------*
252 : * Find the length of the temporal contribution, with a minimum contribution of 1.2kHz
253 : *--------------------------------------------------------------------------------------*/
254 :
255 0 : minimum( pitch, NB_SUBFR, &etmp14 );
256 0 : etmp14 = 12800.0f / etmp14;
257 0 : etmp14 *= 8.0f;
258 :
259 0 : if ( etmp14 <= CONTR_LIMIT && ( VeryLowRateSTflag || core_brate >= ACELP_12k65 ) )
260 : {
261 0 : etmp14 *= 2;
262 : }
263 :
264 0 : mDiff_len = (int16_t) ( etmp14 + 0.5f );
265 0 : etmp14 = PCM16_TO_FLT_FAC;
266 0 : j = 0;
267 0 : for ( i = 0; i < CRIT_NOIS_BAND; i++ )
268 : {
269 0 : if ( fabs( crit_bands_loc[i] - mDiff_len ) < etmp14 )
270 : {
271 0 : etmp14 = (float) fabs( crit_bands_loc[i] - mDiff_len );
272 0 : j += crit_bins[i];
273 : }
274 : }
275 :
276 0 : mDiff_len = j;
277 :
278 0 : if ( mDiff_len < BIN_1k2 )
279 : {
280 0 : mDiff_len = BIN_1k2;
281 : }
282 :
283 0 : if ( ( VeryLowRateSTflag && ( clas == VOICED_CLAS || clas == AUDIO_CLAS ) ) ) /* Do not apply normalization on VOICED signal in case of stereo */
284 : {
285 0 : mvr2r( dct_in, exc_diffQ, L_FRAME );
286 :
287 : /* normalization of the spectrum and noise fill */
288 0 : normalize_spec( 1.0f, exc_diffQ + mDiff_len, L_FRAME - mDiff_len );
289 : }
290 : else
291 : {
292 0 : mvr2r( dct_in, exc_diffQ, mDiff_len );
293 0 : set_f( exc_diffQ + mDiff_len, 0, L_FRAME - mDiff_len );
294 :
295 : /* normalization of the spectrum and noise fill */
296 0 : normalize_spec( 4, exc_diffQ, mDiff_len );
297 : }
298 0 : NoiseFill( exc_diffQ, seed_tcx, CRIT_NOIS_BAND );
299 :
300 :
301 : /*--------------------------------------------------------------------------------------*
302 : * Recompute energy per band of the modified excitation vector (in frequency domain)
303 : *--------------------------------------------------------------------------------------*/
304 :
305 0 : Ener_per_band( exc_diffQ, Ener_per_bd_yQ );
306 :
307 : /*--------------------------------------------------------------------------------------*
308 : * Compute tilt factor and amplify HF accordingly
309 : *--------------------------------------------------------------------------------------*/
310 :
311 0 : ftmp = ( 0.5f * ( 1.0f - voice_fac ) ); /* 1=unvoiced, 0=voiced */
312 :
313 0 : for ( i = 240; i < L_FRAME; i++ )
314 : {
315 0 : ftmp1 = ftmp * ( 0.067f * i - 15.0f );
316 0 : ftmp1 = max( 1.0f, ftmp1 );
317 0 : exc_diffQ[i] *= ftmp1;
318 : }
319 :
320 : /*--------------------------------------------------------------------------------------*
321 : * Match the energy of the modified excitation vector to the decoded excitation
322 : *--------------------------------------------------------------------------------------*/
323 :
324 0 : Apply_gain( exc_diffQ, Ener_per_bd_iQ, Ener_per_bd_yQ );
325 :
326 : /*--------------------------------------------------------------------------------------*
327 : * Copy to the output vector
328 : *--------------------------------------------------------------------------------------*/
329 :
330 0 : mvr2r( exc_diffQ, dct_out, L_FRAME );
331 :
332 0 : return;
333 : }
334 :
335 : /*-------------------------------------------------------------------*
336 : * improv_amr_wb_gs()
337 : *
338 : * Modify the decoded excitation to increase quality of
339 : * unvoiced and audio signals (used only in AMR-WB IO mode)
340 : *-------------------------------------------------------------------*/
341 :
342 0 : void improv_amr_wb_gs(
343 : const int16_t clas, /* i : signal frame class */
344 : const int16_t coder_type, /* i : coder type */
345 : const int32_t core_brate, /* i : bitrate allocated to the core */
346 : int16_t *seed_tcx, /* i/o: Seed used for noise generation */
347 : float *old_Aq, /* i/o: old LPC filter coefficient */
348 : float *mem_syn2, /* i/o: synthesis memory */
349 : const float lt_voice_fac, /* i/o: long term voice factor */
350 : const int16_t locattack, /* i : Flag for a detected attack */
351 : float *Aq, /* i/o: Decoded LP filter coefficient */
352 : float *exc2, /* i/o: Decoded complete excitation */
353 : float *mem_tmp, /* i/o: synthesis temporary memory */
354 : float *syn, /* i/o: Decoded synthesis to be updated */
355 : const float *pitch_buf, /* i : Decoded pitch buffer */
356 : const float Last_ener, /* i : Last energy */
357 : const int16_t rate_switching_reset, /* i : rate switching reset flag */
358 : const int16_t last_coder_type, /* i : Last coder_type */
359 : const int16_t VeryLowRateSTflag /* i : Enable the noise enhancement for very low rate stereo generic mode */
360 : )
361 : {
362 : int16_t i;
363 : float dct_exc_in[L_FRAME], dct_exc_out[L_FRAME];
364 : float Aq_orig[NB_SUBFR * ( M + 1 )], enr_LP_old, enr_LP_new;
365 :
366 : /*------------------------------------------------------------*
367 : * Condition to enter the section on excitation modification
368 : *------------------------------------------------------------*/
369 :
370 : /* Enter the modification for all inactive frames and also for unvoiced frames if bitrate is below 8k85 */
371 0 : if ( VeryLowRateSTflag ||
372 0 : ( ( locattack == 0 && core_brate <= ACELP_12k65 ) &&
373 0 : ( ( core_brate < ACELP_8k85 && clas != AUDIO_CLAS && ( clas == UNVOICED_CLAS || clas == VOICED_TRANSITION ) ) || coder_type == INACTIVE ) ) )
374 : {
375 : /*------------------------------------------------------------*
376 : * two differents paths:
377 : * unvoiced or inactive
378 : * generic audio sound
379 : * LP filter smoothing for inactive parts
380 : *------------------------------------------------------------*/
381 0 : *seed_tcx = (int16_t) ( (int16_t) ( pitch_buf[0] * 64.0f ) * (int16_t) ( pitch_buf[3] * 64.0f ) );
382 :
383 0 : if ( coder_type == INACTIVE && Last_ener > -3.0f && last_coder_type == UNVOICED && rate_switching_reset == 0 )
384 : {
385 0 : mvr2r( Aq, Aq_orig, NB_SUBFR * ( M + 1 ) );
386 :
387 0 : for ( i = 0; i < NB_SUBFR * ( M + 1 ); i++ )
388 : {
389 0 : Aq[i] = ALP * old_Aq[i] + MALP * Aq[i];
390 : }
391 :
392 : /* check the smoothed LP filter stability */
393 0 : enr_LP_old = enr_1_Az( old_Aq, L_SUBFR );
394 0 : for ( i = 0; i < NB_SUBFR; i++ )
395 : {
396 0 : enr_LP_new = enr_1_Az( Aq + i * ( M + 1 ), L_SUBFR );
397 :
398 0 : if ( enr_LP_new > 128 * enr_LP_old )
399 : {
400 : /* filter is unstable, do not modify the excitation */
401 0 : mvr2r( Aq_orig, Aq, NB_SUBFR * ( M + 1 ) );
402 0 : return;
403 : }
404 :
405 0 : enr_LP_old = enr_LP_new;
406 : }
407 : }
408 :
409 : /*------------------------------------------------------------*
410 : * Find frequency representation of the excitation
411 : * Do the excitation modification according to the content
412 : * Go back to time domain -> Overwrite excitation
413 : *------------------------------------------------------------*/
414 :
415 0 : edct( exc2, dct_exc_in, L_FRAME, EVS_MONO );
416 :
417 0 : set_f( exc2, 0, L_FRAME );
418 0 : set_f( dct_exc_out, 0, L_FRAME );
419 :
420 0 : gs_dec_amr_wb( core_brate, seed_tcx, dct_exc_in, dct_exc_out, pitch_buf, lt_voice_fac, clas, coder_type, VeryLowRateSTflag );
421 :
422 0 : edct( dct_exc_out, exc2, L_FRAME, EVS_MONO );
423 :
424 : /*------------------------------------------------------------*
425 : * Redo core synthesis at 12k8 Hz with the modified excitation
426 : *------------------------------------------------------------*/
427 :
428 0 : mvr2r( mem_tmp, mem_syn2, M );
429 0 : syn_12k8( L_FRAME, Aq, exc2, syn, mem_syn2, 1 );
430 : }
431 :
432 0 : return;
433 : }
|