Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "wmc_auto.h"
46 :
47 : #define SCLSYN_LAMBDA 0.3f
48 :
49 : /*-------------------------------------------------------------------*
50 : * FEC_scale_syn()
51 : *
52 : * Smooth speech energy evolution when recovering after erasure(s)
53 : *-------------------------------------------------------------------*/
54 :
55 461775 : void FEC_scale_syn(
56 : const int16_t L_frame, /* i : length of the frame */
57 : int16_t clas, /* i/o: frame classification */
58 : const int16_t last_good, /* i : last good frame classification */
59 : float *synth, /* i/o: synthesized speech at Fs = 12k8 Hz */
60 : const float *pitch, /* i : pitch values for each subframe */
61 : float enr_old, /* i : energy at the end of previous frame */
62 : float enr_q, /* i : transmitted energy for current frame */
63 : const int16_t coder_type, /* i : coder type */
64 : const int16_t LSF_Q_prediction, /* i : LSF prediction mode */
65 : int16_t *scaling_flag, /* i/o: flag to indicate energy control of syn */
66 : float *lp_ener_FEC_av, /* i/o: averaged voiced signal energy */
67 : float *lp_ener_FEC_max, /* i/o: averaged voiced signal energy */
68 : const int16_t bfi, /* i : current frame BFI */
69 : const int32_t total_brate, /* i : total bitrate */
70 : const int16_t prev_bfi, /* i : previous frame BFI */
71 : const int32_t last_core_brate, /* i : previous frame core bitrate */
72 : float *exc, /* i/o: excitation signal without enhancement */
73 : float *exc2, /* i/o: excitation signal with enhancement */
74 : const float Aq[], /* i : LP filter coefs */
75 : float *old_enr_LP, /* i/o: LP filter E of last good voiced frame */
76 : const float *mem_tmp, /* i : temp. initial synthesis filter states */
77 : float *mem_syn, /* o : initial synthesis filter states */
78 : const int16_t avoid_lpc_burst_on_recovery, /* i : if true the excitation energy is limited if LP has big gain */
79 : const int16_t force_scaling /* i : force scaling */
80 : )
81 : {
82 : int16_t i;
83 : float enr1, enr2, gain1, gain2;
84 : float scaling, ener_max, enr2_av, enr2_max;
85 : float enr_LP;
86 : float h1[L_FRAME / 2], tilt, rr0, rr1, pitch_dist, mean_pitch;
87 : int16_t k;
88 :
89 461775 : gain2 = 0.0f;
90 461775 : gain1 = 0.0f;
91 461775 : scaling = 1.0f;
92 461775 : enr_LP = 0.0f;
93 :
94 : /*-----------------------------------------------------------------*
95 : * Find the synthesis filter impulse response on voiced
96 : *-----------------------------------------------------------------*/
97 :
98 461775 : if ( clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS )
99 : {
100 265935 : if ( L_frame == L_FRAME )
101 : {
102 147465 : enr_LP = enr_1_Az( Aq + ( NB_SUBFR - 1 ) * ( M + 1 ), L_SUBFR );
103 : }
104 : else /* L_frame == L_FRAME16k */
105 : {
106 118470 : enr_LP = enr_1_Az( Aq + ( NB_SUBFR16k - 1 ) * ( M + 1 ), L_SUBFR );
107 : }
108 : }
109 :
110 : /*-----------------------------------------------------------------*
111 : * Define when to scale the synthesis
112 : *-----------------------------------------------------------------*/
113 :
114 461775 : if ( bfi )
115 : {
116 9 : *scaling_flag = 1; /* Always check synthesis on bad frames */
117 : }
118 461766 : else if ( prev_bfi )
119 : {
120 7221 : if ( ( LSF_Q_prediction == AUTO_REGRESSIVE ) || ( LSF_Q_prediction == MOVING_AVERAGE ) )
121 : {
122 6063 : *scaling_flag = 2; /* Decoded LSFs affected */
123 : }
124 1158 : else if ( coder_type != TRANSITION )
125 : {
126 498 : *scaling_flag = 1; /* SN, but not TC mode - LSF still affected by the interpolation */
127 : }
128 : else
129 : {
130 660 : *scaling_flag = 0; /* LSF still possibly affected due to interpolation */
131 : }
132 7221 : scaling = 1.5f;
133 : }
134 : else
135 : {
136 454545 : if ( ( LSF_Q_prediction == AUTO_REGRESSIVE ) && ( *scaling_flag == 2 ) )
137 : {
138 2619 : *scaling_flag = 2; /* Continue with energy control till the end of AR prediction */
139 : }
140 451926 : else if ( *scaling_flag > 0 )
141 : {
142 9291 : ( *scaling_flag )--; /* If scaling flag was equal to 2, add one control frame to account for the LSF interpolation */
143 : }
144 454545 : scaling = 2.0f;
145 : }
146 :
147 : /*-----------------------------------------------------------------*
148 : * Find the energy/gain at the end of the frame
149 : *-----------------------------------------------------------------*/
150 :
151 461775 : fer_energy( L_frame, clas, synth, pitch[( L_frame >> 6 ) - 1], &enr2, L_frame );
152 :
153 461775 : if ( bfi || ( total_brate == ACELP_7k20 ) || ( total_brate == ACELP_8k00 ) )
154 : {
155 : /* previous frame erased and no TC frame */
156 174 : if ( *scaling_flag > 0 )
157 : {
158 9 : enr2 += 0.01f;
159 :
160 9 : if ( bfi ) /* In all bad frames, limit the gain to 1 */
161 : {
162 9 : gain2 = (float) sqrt( enr_old / enr2 );
163 9 : if ( gain2 > 1.0f )
164 : {
165 9 : gain2 = 1.0f;
166 : }
167 :
168 : /* find the energy/gain at the beginning of the frame */
169 9 : fer_energy( L_frame, clas, synth, pitch[0], &enr1, 0 );
170 :
171 9 : enr1 += 0.1f;
172 9 : gain1 = (float) sqrt( enr_old / enr1 );
173 9 : if ( gain1 > 1.0f )
174 : {
175 0 : gain1 = 1.0f;
176 : }
177 : }
178 : else /* good frame */
179 : {
180 0 : if ( enr_q == 0 ) /* If E info (FEC protection bits) is not available in the bitstream */
181 : {
182 0 : enr_q = enr2;
183 :
184 0 : set_f( h1, 0.0f, L_FRAME / 2 );
185 0 : h1[0] = 1.0f;
186 0 : syn_filt( Aq + ( 3 * ( M + 1 ) ), M, h1, h1, L_FRAME / 2, h1 + ( M + 1 ), 0 );
187 0 : rr0 = dotp( h1, h1, L_FRAME / 2 - 1 ) + 0.001f;
188 0 : rr1 = dotp( h1, h1 + 1, L_FRAME / 2 - 1 );
189 0 : tilt = rr1 / rr0;
190 :
191 0 : pitch_dist = 0.0f;
192 0 : mean_pitch = pitch[0];
193 0 : for ( k = 0; k < ( NB_SUBFR - 1 ); k++ )
194 : {
195 0 : pitch_dist += abs( (int16_t) ( pitch[k + 1] + 0.5f ) - (int16_t) ( pitch[k] + 0.5f ) );
196 0 : mean_pitch += pitch[k + 1];
197 : }
198 0 : pitch_dist /= (float) ( NB_SUBFR - 1 );
199 0 : mean_pitch /= (float) ( NB_SUBFR );
200 :
201 0 : if ( ( tilt > 0.7f ) && /* HF resonnant filter */
202 0 : ( ( pitch_dist > 8.0f ) || ( mean_pitch < PIT_MIN ) ) && /* pitch unstable or very short */
203 0 : ( ( prev_bfi ) || ( ( coder_type == GENERIC ) && ( LSF_Q_prediction == AUTO_REGRESSIVE ) ) ) )
204 : {
205 0 : if ( enr_q > scaling * enr_old )
206 : {
207 0 : enr_q = scaling * enr_old;
208 : }
209 : }
210 : else
211 : {
212 0 : if ( ( clas <= VOICED_TRANSITION ) || ( clas >= INACTIVE_CLAS ) )
213 : {
214 0 : ener_max = *lp_ener_FEC_av;
215 : }
216 : else
217 : {
218 0 : ener_max = *lp_ener_FEC_max;
219 : }
220 :
221 0 : if ( enr_old > ener_max )
222 : {
223 0 : ener_max = enr_old;
224 : }
225 0 : if ( enr_q > scaling * ener_max )
226 : {
227 0 : enr_q = scaling * ener_max;
228 : }
229 : }
230 : }
231 :
232 0 : gain2 = (float) sqrt( enr_q / enr2 );
233 :
234 :
235 : /*-----------------------------------------------------------------*
236 : * Find the energy/gain at the beginning of the frame to ensure smooth transition after erasure(s)
237 : *-----------------------------------------------------------------*/
238 :
239 0 : if ( ( ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && ( clas == UNVOICED_CLAS || clas == INACTIVE_CLAS ) ) ||
240 0 : last_core_brate == SID_1k75 || last_core_brate == SID_2k40 || last_core_brate == FRAME_NO_DATA ) &&
241 : prev_bfi )
242 : {
243 : /* voiced -> unvoiced signal transition */
244 : /* CNG -> active signal transition */
245 0 : gain1 = gain2;
246 : }
247 : else
248 : {
249 : /* find the energy at the beginning of the frame */
250 0 : fer_energy( L_frame, clas, synth, pitch[0], &enr1, 0 );
251 :
252 0 : enr1 += 0.1f;
253 0 : gain1 = (float) sqrt( enr_old / enr1 );
254 0 : if ( gain1 > 1.2f )
255 : {
256 : /* prevent clipping */
257 0 : gain1 = 1.2f;
258 : }
259 :
260 : /* prevent amplifying the unvoiced or inactive part of the frame in case an offset is followed by an onset */
261 0 : if ( clas == ONSET && gain1 > gain2 && prev_bfi )
262 : {
263 0 : gain1 = gain2;
264 : }
265 : }
266 :
267 0 : enr2 = enr_q; /* Set the end frame energy to the scaled energy, to be used in the lp_ener_FEC */
268 : }
269 :
270 : /*------------------------------------------------------------------------------*
271 : * Smooth the energy evolution by exponentially evolving from gain1 to gain2
272 : *------------------------------------------------------------------------------*/
273 :
274 9 : gain2 *= ( 1.0f - AGC );
275 2313 : for ( i = 0; i < L_frame; i++ )
276 : {
277 2304 : gain1 = gain1 * AGC + gain2;
278 2304 : exc[i] *= gain1;
279 2304 : exc2[i] *= gain1;
280 : }
281 :
282 : /* smoothing is done in excitation domain, so redo synthesis */
283 9 : mvr2r( mem_tmp, mem_syn, M );
284 9 : syn_12k8( L_frame, Aq, exc2, synth, mem_syn, 1 );
285 : }
286 : }
287 : else
288 : {
289 : /* previous frame erased and no TC frame */
290 461601 : if ( prev_bfi && coder_type != TRANSITION )
291 : {
292 6561 : enr2 += 0.01f;
293 6561 : if ( enr_q == 0 ) /* If E info (FEC protection bits) is not available in the bitstream */
294 : {
295 2877 : enr_q = enr2;
296 :
297 2877 : set_f( h1, 0.0f, L_FRAME / 2 );
298 2877 : h1[0] = 1.0f;
299 2877 : syn_filt( Aq + ( 3 * ( M + 1 ) ), M, h1, h1, L_FRAME / 2, h1 + ( M + 1 ), 0 );
300 2877 : rr0 = dotp( h1, h1, L_FRAME / 2 - 1 ) + 0.001f;
301 2877 : rr1 = dotp( h1, h1 + 1, L_FRAME / 2 - 1 );
302 2877 : tilt = rr1 / rr0;
303 :
304 2877 : if ( ( ( ( total_brate == ACELP_13k20 ) || ( total_brate == ACELP_12k85 ) || ( total_brate == ACELP_12k15 ) || ( total_brate == ACELP_11k60 ) ||
305 48 : ( total_brate == ACELP_9k60 ) ) &&
306 42 : ( tilt > 0.7f ) && /* HF resonnant filter */
307 36 : ( ( clas == UNVOICED_CLAS ) || ( clas == INACTIVE_CLAS ) ) ) ) /* unvoiced classification */
308 : {
309 9 : if ( enr_q > scaling * enr_old )
310 : {
311 3 : enr_q = scaling * enr_old;
312 : }
313 : }
314 2868 : else if ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS )
315 : {
316 : /* voiced -> voiced recovery */
317 1656 : if ( ( *old_enr_LP != 0.0f ) && ( enr_LP > 2 * *old_enr_LP ) )
318 : {
319 219 : enr_q /= enr_LP;
320 219 : enr_q *= 2 * *old_enr_LP;
321 : }
322 1437 : else if ( avoid_lpc_burst_on_recovery && enr_LP > 20.0f )
323 : {
324 9 : enr_q = (float) ( enr_q * sqrt( 20.0f / enr_LP ) );
325 : }
326 : }
327 :
328 2877 : if ( ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS ) || force_scaling )
329 : {
330 1656 : if ( enr_q > enr_old )
331 : {
332 669 : enr_q = ( 1 - SCLSYN_LAMBDA ) * enr_old + SCLSYN_LAMBDA * enr_q;
333 : }
334 : }
335 : }
336 :
337 6561 : gain2 = (float) sqrt( enr_q / enr2 );
338 :
339 : /* do not allow E increase if enr_q index == 0 (lower end Q clipping) */
340 6561 : if ( enr_q < 1.1f )
341 : {
342 195 : if ( gain2 > 1.0f )
343 : {
344 3 : gain2 = 1.0f;
345 : }
346 : }
347 : else
348 : {
349 6366 : if ( gain2 > 1.2f )
350 : {
351 1311 : gain2 = 1.2f;
352 : }
353 : }
354 :
355 : /*-----------------------------------------------------------------*
356 : * Find the energy/gain at the beginning of the frame to ensure smooth transition after erasure(s)
357 : *-----------------------------------------------------------------*/
358 :
359 6561 : if ( clas == SIN_ONSET )
360 : {
361 : /* allow only slow increase */
362 69 : gain1 = 0.5f * gain2;
363 : }
364 6492 : else if ( ( last_good >= VOICED_TRANSITION && last_good < INACTIVE_CLAS && ( clas == UNVOICED_CLAS || clas == INACTIVE_CLAS ) ) || last_core_brate == SID_1k75 || last_core_brate == SID_2k40 || last_core_brate == FRAME_NO_DATA )
365 : {
366 : /* voiced -> unvoiced signal transition */
367 : /* CNG -> active signal transition */
368 603 : gain1 = gain2;
369 : }
370 : else
371 : {
372 : /* find the energy at the beginning of the frame */
373 5889 : fer_energy( L_frame, clas, synth, pitch[0], &enr1, 0 );
374 :
375 5889 : enr1 += 0.1f;
376 5889 : gain1 = (float) sqrt( enr_old / enr1 );
377 5889 : if ( gain1 > 1.2f )
378 : {
379 : /* prevent clipping */
380 1710 : gain1 = 1.2f;
381 : }
382 5889 : if ( avoid_lpc_burst_on_recovery && ( enr_LP > 20.0f ) && ( enr_LP <= 2 * *old_enr_LP ) && ( gain1 > 1.0f ) )
383 : {
384 9 : gain1 = 1.0f;
385 : }
386 :
387 : /* prevent amplifying the unvoiced or inactive part of the frame in case an offset is followed by an onset */
388 5889 : if ( clas == ONSET && gain1 > gain2 )
389 : {
390 39 : gain1 = gain2;
391 : }
392 : }
393 :
394 : /*-----------------------------------------------------------------*
395 : * Smooth the energy evolution by exponentially evolving from gain1 to gain2
396 : *-----------------------------------------------------------------*/
397 :
398 6561 : gain2 *= ( 1.0f - AGC );
399 1933665 : for ( i = 0; i < L_frame; i++ )
400 : {
401 1927104 : gain1 = gain1 * AGC + gain2;
402 1927104 : exc[i] *= gain1;
403 1927104 : exc2[i] *= gain1;
404 : }
405 :
406 : /* smoothing is done in excitation domain, so redo synthesis */
407 6561 : mvr2r( mem_tmp, mem_syn, M );
408 6561 : syn_12k8( L_frame, Aq, exc2, synth, mem_syn, 1 );
409 : }
410 : }
411 :
412 : /*-----------------------------------------------------------------*
413 : * Update low-pass filtered energy for voiced frames
414 : *-----------------------------------------------------------------*/
415 :
416 461775 : if ( !bfi && ( clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS ) )
417 : {
418 265926 : if ( clas == VOICED_TRANSITION )
419 : {
420 19173 : enr2_av = enr2;
421 19173 : fer_energy( L_frame, VOICED_CLAS, synth, pitch[( L_frame >> 6 ) - 1], &enr2_max, L_frame );
422 : }
423 : else
424 : {
425 246753 : enr2_max = enr2;
426 246753 : fer_energy( L_frame, UNVOICED_CLAS, synth, pitch[( L_frame >> 6 ) - 1], &enr2_av, L_frame );
427 : }
428 :
429 265926 : *lp_ener_FEC_av = 0.05f * enr2_av + 0.95f * *lp_ener_FEC_av;
430 265926 : *lp_ener_FEC_max = 0.05f * enr2_max + 0.95f * *lp_ener_FEC_max;
431 : }
432 :
433 : /*-----------------------------------------------------------------*
434 : * Update the LP filter energy for voiced frames
435 : *-----------------------------------------------------------------*/
436 :
437 461775 : if ( clas >= VOICED_TRANSITION && clas < INACTIVE_CLAS )
438 : {
439 265935 : *old_enr_LP = enr_LP;
440 : }
441 :
442 :
443 461775 : return;
444 : }
|