Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "wmc_auto.h"
46 :
47 : /*---------------------------------------------------------------------*
48 : * Local constants
49 : *---------------------------------------------------------------------*/
50 :
51 : #define K_COR_ENC 2.857f
52 : #define C_COR_ENC -1.286f
53 : #define K_EE_ENC 0.04167f
54 : #define C_EE_ENC 0.0f
55 : #define K_ZC_ENC -0.04f
56 : #define C_ZC_ENC 2.4f
57 : #define K_RELE_ENC 0.05f
58 : #define C_RELE_ENC 0.45f
59 : #define K_PC_ENC -0.07143f
60 : #define C_PC_ENC 1.857f
61 : #define K_SNR_ENC 0.1111f
62 : #define C_SNR_ENC -0.3333f
63 :
64 : /*-------------------------------------------------------------------*
65 : * signal_clas()
66 : *
67 : * Classification state machine for FEC
68 : * Coder type modification
69 : *-------------------------------------------------------------------*/
70 :
71 : /*! r: classification for current frames */
72 1136044 : int16_t signal_clas(
73 : Encoder_State *st, /* i/o: encoder state structure */
74 : const float *speech, /* i : pointer to speech signal for E computation */
75 : const float *ee, /* i : lf/hf E ration for 2 half-frames */
76 : const float relE, /* i : frame relative E to the long term average */
77 : const int16_t L_look, /* i : look-ahead */
78 : int16_t *clas_mod /* o : class flag for NOOP detection */
79 : )
80 : {
81 : float mean_voi2, mean_ee2, tmp;
82 : float een, corn, zcn, relEn, pcn, fmerit1;
83 : int16_t i, clas, pc, zc;
84 :
85 : /*----------------------------------------------------------------*
86 : * Calculate average voicing
87 : * Calculate average spectral tilt
88 : * Calculate zero-crossing rate
89 : * Calculate pitch stability
90 : *----------------------------------------------------------------*/
91 :
92 : /* average voicing on second half-frame and look-ahead */
93 1136044 : mean_voi2 = 0.5f * ( st->voicing[1] + st->voicing[2] );
94 :
95 : /* average spectral tilt in dB */
96 1136044 : tmp = ee[0] * ee[1];
97 1136044 : if ( tmp < 1.0f )
98 : {
99 108184 : tmp = 1.0f;
100 : }
101 :
102 1136044 : mean_ee2 = 0.5f * 20.0f * (float) log10( tmp );
103 :
104 : /* compute zero crossing rate */
105 1136044 : zc = 0;
106 291963308 : for ( i = L_look; i < L_FRAME + L_look; i++ )
107 : {
108 290827264 : if ( speech[i] <= 0.0f && speech[i - 1] > 0.0f )
109 : {
110 31328536 : zc++;
111 : }
112 : }
113 :
114 : /* compute pitch stability */
115 1136044 : pc = (int16_t) ( abs( st->pitch[1] - st->pitch[0] ) + abs( st->pitch[2] - st->pitch[1] ) );
116 1136044 : st->tdm_pc = pc;
117 :
118 : /*-----------------------------------------------------------------*
119 : * Transform parameters to the range <0:1>
120 : * Compute the merit function
121 : *-----------------------------------------------------------------*/
122 :
123 1136044 : een = K_EE_ENC * mean_ee2 + C_EE_ENC;
124 1136044 : if ( een > 1.0f )
125 : {
126 735726 : een = 1.0f;
127 : }
128 400318 : else if ( een < 0.0f )
129 : {
130 0 : een = 0.0f;
131 : }
132 :
133 1136044 : corn = K_COR_ENC * mean_voi2 + C_COR_ENC;
134 1136044 : if ( corn > 1.0f )
135 : {
136 330045 : corn = 1.0f;
137 : }
138 805999 : else if ( corn < 0.0f )
139 : {
140 172025 : corn = 0.0f;
141 : }
142 :
143 1136044 : zcn = K_ZC_ENC * zc + C_ZC_ENC;
144 1136044 : if ( zcn > 1.0f )
145 : {
146 794948 : zcn = 1.0f;
147 : }
148 341096 : else if ( zcn < 0.0f )
149 : {
150 148222 : zcn = 0.0f;
151 : }
152 :
153 1136044 : relEn = K_RELE_ENC * relE + C_RELE_ENC;
154 1136044 : if ( relEn > 1.0f )
155 : {
156 36934 : relEn = 1.0f;
157 : }
158 1099110 : else if ( relEn < 0.5f )
159 : {
160 811946 : relEn = 0.5f;
161 : }
162 :
163 1136044 : pcn = K_PC_ENC * pc + C_PC_ENC;
164 1136044 : if ( pcn > 1.0f )
165 : {
166 688199 : pcn = 1.0f;
167 : }
168 447845 : else if ( pcn < 0.0f )
169 : {
170 354708 : pcn = 0.0f;
171 : }
172 :
173 1136044 : fmerit1 = ( 1.0f / 6.0f ) * ( een + 2.0f * corn + zcn + relEn + pcn );
174 :
175 : /*-----------------------------------------------------------------*
176 : * FEC classification
177 : *-----------------------------------------------------------------*/
178 :
179 1136044 : st->fmerit_dt = st->prev_fmerit - fmerit1;
180 1136044 : st->prev_fmerit = fmerit1;
181 :
182 : /* FEC classification */
183 1136044 : if ( st->localVAD == 0 || st->coder_type == UNVOICED || relE < -6.0f )
184 : {
185 541961 : clas = UNVOICED_CLAS;
186 541961 : *clas_mod = clas;
187 : }
188 : else
189 : {
190 594083 : switch ( st->last_clas )
191 : {
192 452372 : case VOICED_CLAS:
193 : case ONSET:
194 : case VOICED_TRANSITION:
195 452372 : if ( fmerit1 < 0.49f )
196 : {
197 20440 : clas = UNVOICED_CLAS;
198 : }
199 431932 : else if ( fmerit1 < 0.66f )
200 : {
201 55127 : clas = VOICED_TRANSITION;
202 : }
203 : else
204 : {
205 376805 : clas = VOICED_CLAS;
206 : }
207 :
208 452372 : if ( fmerit1 < 0.45f )
209 : {
210 13018 : *clas_mod = UNVOICED_CLAS;
211 : }
212 439354 : else if ( fmerit1 < 0.66f )
213 : {
214 62549 : *clas_mod = VOICED_TRANSITION;
215 : }
216 : else
217 : {
218 376805 : *clas_mod = VOICED_CLAS;
219 : }
220 452372 : break;
221 :
222 141711 : case UNVOICED_CLAS:
223 : case UNVOICED_TRANSITION:
224 141711 : if ( fmerit1 > 0.63f )
225 : {
226 57681 : clas = ONSET;
227 : }
228 84030 : else if ( fmerit1 > 0.585f )
229 : {
230 11561 : clas = UNVOICED_TRANSITION;
231 : }
232 : else
233 : {
234 72469 : clas = UNVOICED_CLAS;
235 : }
236 141711 : *clas_mod = clas;
237 141711 : break;
238 :
239 0 : default:
240 0 : clas = UNVOICED_CLAS;
241 0 : *clas_mod = clas;
242 0 : break;
243 : }
244 : }
245 :
246 : /*-----------------------------------------------------------------*
247 : * Onset classification
248 : *-----------------------------------------------------------------*/
249 :
250 : /* tc_cnt == -1: frame after TC frame in continuous block of GC/VC frames */
251 : /* tc_cnt == 0: UC frame */
252 : /* tc_cnt == 1: onset/transition frame, coded by GC coder type */
253 : /* tc_cnt == 2: frame after onset/transition frame, coded by TC coder type */
254 :
255 1136044 : if ( clas == UNVOICED_CLAS )
256 : {
257 634870 : st->tc_cnt = 0;
258 : }
259 :
260 1136044 : if ( clas >= VOICED_TRANSITION && st->tc_cnt >= 0 )
261 : {
262 143936 : st->tc_cnt += 1;
263 : }
264 :
265 1136044 : if ( st->tc_cnt > 2 )
266 : {
267 39720 : st->tc_cnt = -1;
268 : }
269 :
270 1136044 : return clas;
271 : }
272 :
273 :
274 : /*-------------------------------------------------------------------*
275 : * select_TC()
276 : *
277 : * Select TC coder type for appropriate frames which is in general VOICED_TRANSITION,
278 : * VOICED_CLAS or ONSET frames following UNVOICED_CLAS frames
279 : *-------------------------------------------------------------------*/
280 :
281 1136044 : void select_TC(
282 : const int16_t codec_mode, /* i : codec mode */
283 : const int16_t tc_cnt, /* i : TC frame counter */
284 : int16_t *coder_type, /* i/o: coder type */
285 : const int16_t localVAD /* i : VAD without hangover */
286 : )
287 : {
288 1136044 : if ( codec_mode == MODE1 )
289 : {
290 : /*---------------------------------------------------------------------*
291 : * Select TC coder type for appropriate frames which is in general VOICED_TRANSITION,
292 : * VOICED_CLAS or ONSET frames following UNVOICED_CLAS frames
293 : *---------------------------------------------------------------------*/
294 :
295 1134994 : if ( localVAD != 0 && tc_cnt >= 1 )
296 : {
297 104127 : if ( tc_cnt == 1 )
298 : {
299 : /* onset/transition frame is always coded using GC coder type */
300 57639 : *coder_type = GENERIC;
301 : }
302 : else
303 : {
304 : /* frame after onset/transition frame is coded by TC coder type */
305 46488 : *coder_type = TRANSITION;
306 : }
307 : }
308 : }
309 :
310 1136044 : return;
311 : }
312 :
313 :
314 : /*-------------------------------------------------------------------*
315 : * coder_type_modif()
316 : *
317 : * Coder type modification
318 : *-------------------------------------------------------------------*/
319 :
320 1132253 : void coder_type_modif(
321 : Encoder_State *st, /* i/o: encoder state structure */
322 : const float relE /* i : frame relative E to the long term average */
323 : )
324 : {
325 : int16_t unmod_coder_type, vbr_generic_ho;
326 :
327 1132253 : if ( st->Opt_SC_VBR )
328 : {
329 0 : vbr_generic_ho = st->hSC_VBR->vbr_generic_ho;
330 : }
331 : else
332 : {
333 1132253 : vbr_generic_ho = -1;
334 : }
335 :
336 1132253 : if ( st->codec_mode == MODE1 )
337 : {
338 : /*---------------------------------------------------------------------*
339 : * Coder type modification
340 : *
341 : * Prevent UC coder type in certain conditions
342 : * Prevent VC coder type in certain conditions
343 : * Select TC coder type in appropriate frames
344 : *---------------------------------------------------------------------*/
345 :
346 : /* At higher rates, use GC coding instead of UC coding to improve quality */
347 1131203 : if ( ( st->element_mode == EVS_MONO && st->total_brate > ACELP_9k60 && st->coder_type == UNVOICED ) ||
348 1131108 : ( st->element_mode > EVS_MONO && st->total_brate > MAX_UNVOICED_BRATE && st->coder_type == UNVOICED ) )
349 : {
350 69941 : st->coder_type = GENERIC;
351 : }
352 :
353 : /* Prevent UC coding on mixed content at 9.6 kb/s */
354 1131203 : if ( st->total_brate >= ACELP_9k60 && st->coder_type == UNVOICED && st->audio_frame_cnt != 0 )
355 : {
356 2291 : st->coder_type = GENERIC;
357 : }
358 :
359 1131203 : unmod_coder_type = st->coder_type;
360 :
361 : /* Enforce GC coder type on inactive signal (this can be later overwritten to INACTIVE) */
362 1131203 : if ( st->localVAD == 0 && ( ( st->coder_type == UNVOICED && ( !st->Opt_SC_VBR ||
363 0 : ( st->Opt_SC_VBR && vbr_generic_ho == 0 && st->last_coder_type > UNVOICED ) ) ) ||
364 163148 : st->coder_type == TRANSITION || st->coder_type == VOICED ) )
365 : {
366 504 : st->coder_type = GENERIC;
367 : }
368 :
369 :
370 1131203 : if ( st->Opt_SC_VBR )
371 : {
372 0 : if ( st->coder_type == GENERIC && unmod_coder_type == UNVOICED && st->Opt_SC_VBR )
373 : {
374 0 : st->hSC_VBR->vbr_generic_ho = 1;
375 : }
376 :
377 0 : if ( st->coder_type > UNVOICED && st->Opt_SC_VBR )
378 : {
379 0 : st->hSC_VBR->vbr_generic_ho = 0;
380 : }
381 :
382 0 : if ( st->localVAD == 0 && st->coder_type == UNVOICED )
383 : {
384 0 : st->hSC_VBR->last_7k2_coder_type = GENERIC;
385 : }
386 : else
387 : {
388 0 : st->hSC_VBR->last_7k2_coder_type = st->coder_type;
389 : }
390 : }
391 :
392 1131203 : if ( st->element_mode == EVS_MONO )
393 : {
394 : /* At higher rates and with 16kHz core, allow only GC and TC coder type */
395 2050 : if ( st->total_brate > ACELP_16k40 && st->coder_type != GENERIC && st->coder_type != TRANSITION )
396 : {
397 496 : st->coder_type = GENERIC;
398 : }
399 : }
400 : else
401 : {
402 : /* At higher bitrates, disable UC and VC coder type; note that IC coder type is classified later */
403 1129153 : if ( ( st->total_brate > MAX_VOICED_BRATE && st->coder_type == VOICED ) ||
404 805172 : ( st->total_brate > MAX_UNVOICED_BRATE && st->coder_type == UNVOICED ) )
405 : {
406 323981 : st->coder_type = GENERIC;
407 : }
408 : }
409 :
410 : /* Patch for certain low-level signals for which the gain quantizer sometimes goes out of its dynamic range */
411 1131203 : if ( st->coder_type == VOICED && st->input_bwidth == NB && relE < -10.0f && st->total_brate <= ACELP_8k00 )
412 : {
413 0 : st->coder_type = GENERIC;
414 : }
415 : }
416 :
417 1132253 : return;
418 : }
|