Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include "cnst.h"
43 : #include "prot.h"
44 : #include "wmc_auto.h"
45 : #include <assert.h>
46 :
47 :
48 : /*--------------------------------------------------------------------------*
49 : * Local constants
50 : *--------------------------------------------------------------------------*/
51 :
52 : #define SMOOTH_FILT_COEFF 0.8f
53 : #define THRESH_UP 1.6f
54 : #define THRESH_DOWN 1.1f
55 : #define HQ_LOCAL 3
56 : #define TCX_LOCAL 1
57 :
58 : #define GAIN2_START_WB 6
59 : #define GAIN3_START_WB 12
60 : #define GAIN4_START_WB 9
61 : #define H1_START_WB 17
62 : #define H2_START_WB 14
63 : #define H_LENGTH_WB 3
64 : #define GAIN2_START_SWB 8
65 : #define GAIN3_START_SWB 16
66 : #define GAIN4_START_SWB 12
67 : #define H1_START_SWB 25
68 : #define H2_START_SWB 20
69 : #define H_LENGTH_SWB 5
70 :
71 :
72 : /*-------------------------------------------------------------------*
73 : * mdct_classifier()
74 : *
75 : * MDCT signal classifier for HQ_CORE/TCX_20_CORE
76 : *-------------------------------------------------------------------*/
77 :
78 : /*! r: HQ_CORE/TCX_20_CORE decision */
79 979795 : int16_t mdct_classifier(
80 : Encoder_State *st, /* i/o: Encoder state variable */
81 : const float *fft_buff, /* i : FFT spectrum from fft_rel */
82 : const float enerBuffer[], /* i : energy buffer */
83 : const int32_t brate /* i : current brate, IVAS: nominal bitrate, EVS: st->total_brate */
84 : )
85 : {
86 : int16_t c;
87 : float X[129];
88 : int16_t k;
89 : float y[2];
90 : float nf;
91 : float pe;
92 : int16_t np;
93 : float max_cand;
94 : int16_t max_i;
95 : float p_energy;
96 : float n_energy;
97 : int16_t d_acc;
98 : int16_t pos_last;
99 : float clas_sec;
100 : int16_t clas_final;
101 : int16_t i;
102 : float gain1, gain2, gain3, gain11, gain4;
103 : float peak_l, peak_h, avrg_l, avrg_h, peak_H1, avrg_H1, peak_H2, avrg_H2;
104 : int16_t condition1, condition2;
105 : int16_t condition3, condition4;
106 : int16_t gain2_start, gain3_start, gain4_start, H1_start, H2_start, H_length;
107 979795 : TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
108 :
109 979795 : if ( st->input_Fs == 32000 || st->input_Fs == 48000 )
110 : {
111 880199 : gain2_start = GAIN2_START_SWB;
112 880199 : gain3_start = GAIN3_START_SWB;
113 880199 : gain4_start = GAIN4_START_SWB;
114 880199 : H1_start = H1_START_SWB;
115 880199 : H2_start = H2_START_SWB;
116 880199 : H_length = H_LENGTH_SWB;
117 : }
118 99596 : else if ( st->input_Fs == 16000 )
119 : {
120 99596 : gain2_start = GAIN2_START_WB;
121 99596 : gain3_start = GAIN3_START_WB;
122 99596 : gain4_start = GAIN4_START_WB;
123 99596 : H1_start = H1_START_WB;
124 99596 : H2_start = H2_START_WB;
125 99596 : H_length = H_LENGTH_WB;
126 : }
127 : else
128 : {
129 0 : assert( !"Unknown sampling frequency in MDCT_classifier" );
130 : H1_start = -1; /* to avoid compilation warning */
131 : H2_start = -1; /* to avoid compilation warning */
132 : H_length = -1; /* to avoid compilation warning */
133 : gain2_start = -1; /* to avoid compilation warning */
134 : gain3_start = -1; /* to avoid compilation warning */
135 : gain4_start = -1; /* to avoid compilation warning */
136 : }
137 :
138 979795 : if ( st->element_mode != IVAS_CPE_DFT )
139 : {
140 94144512 : for ( k = 0; k < 127; k++ )
141 : {
142 93409008 : X[1 + k] = fft_buff[1 + k] * fft_buff[1 + k] + fft_buff[255 - k] * fft_buff[255 - k];
143 : }
144 :
145 2206512 : for ( k = 0; k < 2; k++ )
146 : {
147 1471008 : y[k] = fft_buff[k << 7] * fft_buff[k << 7];
148 : }
149 :
150 2206512 : for ( k = 0; k < 2; k++ )
151 : {
152 1471008 : X[k << 7] = y[k];
153 : }
154 : }
155 : else
156 : {
157 : float norm_val;
158 :
159 244291 : norm_val = ( L_FFT * L_FFT ) / 4.f;
160 31513539 : for ( k = 0; k < 128; k++ )
161 : {
162 31269248 : X[k + 1] = st->Bin_E_old[k] * norm_val;
163 : }
164 244291 : X[0] = X[1];
165 : }
166 :
167 979795 : nf = X[0];
168 979795 : pe = X[0];
169 979795 : np = 0;
170 979795 : max_cand = -1.0F;
171 979795 : max_i = 0;
172 979795 : p_energy = 0.0F;
173 979795 : n_energy = 0.0F;
174 979795 : d_acc = 0;
175 979795 : pos_last = -1;
176 :
177 126393555 : for ( k = 0; k < 128; k++ )
178 : {
179 125413760 : if ( X[k + 1] > nf )
180 : {
181 61461746 : nf = 0.9578F * nf + 0.0422F * X[k + 1];
182 : }
183 : else
184 : {
185 63952014 : nf = 0.6472F * nf + 0.3528F * X[k + 1];
186 : }
187 :
188 125413760 : if ( X[k + 1] > pe )
189 : {
190 22329119 : pe = 0.42237F * pe + 0.57763F * X[k + 1];
191 : }
192 : else
193 : {
194 103084641 : pe = 0.80285F * pe + 0.19715F * X[k + 1];
195 : }
196 :
197 125413760 : if ( X[k + 1] > pe * 0.64F )
198 : {
199 35442453 : if ( X[k + 1] > max_cand )
200 : {
201 28459488 : max_cand = X[k + 1];
202 28459488 : max_i = (int16_t) ( 2 + k );
203 : }
204 : }
205 : else
206 : {
207 89971307 : if ( max_i > 0 )
208 : {
209 17031340 : if ( np > 0 )
210 : {
211 16051637 : d_acc = (int16_t) ( (int16_t) ( d_acc + max_i ) - pos_last );
212 : }
213 17031340 : np++;
214 17031340 : pos_last = max_i;
215 : }
216 :
217 89971307 : max_cand = -1.0F;
218 89971307 : max_i = 0;
219 : }
220 :
221 125413760 : p_energy += pe * pe;
222 125413760 : n_energy += nf * nf;
223 : }
224 :
225 979795 : if ( np > 1 )
226 : {
227 970333 : nf = (float) d_acc / ( (float) np - 1.0F );
228 : }
229 : else
230 : {
231 9462 : nf = 0.0F;
232 : }
233 979795 : gain1 = 0.0f;
234 979795 : gain2 = 0.0f;
235 979795 : gain3 = 0.0f;
236 :
237 8618963 : for ( i = 0; i < gain2_start; i++ )
238 : {
239 7639168 : gain1 += enerBuffer[i] / gain2_start;
240 7639168 : gain2 += enerBuffer[gain2_start + i] / gain2_start;
241 7639168 : gain3 += enerBuffer[gain3_start + i] / gain2_start;
242 : }
243 :
244 979795 : gain11 = gain2_start * ( gain1 - enerBuffer[0] / gain2_start ) / ( gain2_start - 1 );
245 979795 : gain4 = 0.0f;
246 12438547 : for ( i = 0; i < gain4_start; i++ )
247 : {
248 11458752 : gain4 += enerBuffer[gain4_start + i] / gain4_start;
249 : }
250 :
251 979795 : peak_H1 = enerBuffer[H1_start];
252 979795 : avrg_H1 = enerBuffer[H1_start];
253 4699783 : for ( i = 1; i < H_length; i++ )
254 : {
255 3719988 : if ( enerBuffer[H1_start + i] > peak_H1 )
256 : {
257 980760 : peak_H1 = enerBuffer[H1_start + i];
258 : }
259 3719988 : avrg_H1 += enerBuffer[H1_start + i];
260 : }
261 :
262 979795 : peak_H2 = enerBuffer[H2_start];
263 979795 : avrg_H2 = enerBuffer[H2_start];
264 4699783 : for ( i = 1; i < H_length; i++ )
265 : {
266 3719988 : if ( enerBuffer[H2_start + i] > peak_H2 )
267 : {
268 872014 : peak_H2 = enerBuffer[H2_start + i];
269 : }
270 3719988 : avrg_H2 += enerBuffer[H2_start + i];
271 : }
272 :
273 979795 : peak_l = 0.0f;
274 979795 : avrg_l = EPSILON;
275 979795 : peak_h = 0.0f;
276 979795 : avrg_h = EPSILON;
277 32333235 : for ( i = 0; i < 32; i++ )
278 : {
279 31353440 : avrg_l += X[20 + i];
280 31353440 : avrg_h += X[96 + i];
281 31353440 : if ( X[20 + i] > peak_l )
282 : {
283 4378589 : peak_l = X[20 + i];
284 : }
285 31353440 : if ( X[96 + i] > peak_h )
286 : {
287 4043550 : peak_h = X[96 + i];
288 : }
289 : }
290 :
291 979795 : condition1 = nf > 12.0F;
292 979795 : condition2 = p_energy - n_energy * 147.87276f > 0;
293 2824450 : condition3 = gain3 > 1.2f * gain2 ||
294 1828055 : ( gain3 >= 0.8f * gain2 && 5 * peak_H1 > 2.0f * avrg_H1 ) ||
295 848260 : ( 2.6f * peak_l * avrg_h < peak_h * avrg_l || peak_l * avrg_h > 2.6f * peak_h * avrg_l );
296 42794 : condition4 = ( gain4 > 0.8f * gain11 && 2.56f * peak_l * avrg_h > peak_h * avrg_l && peak_l * avrg_h < 5.12f * peak_h * avrg_l ) ||
297 939341 : ( gain4 > 0.3f * gain11 && 32 * peak_h < 1.5f * avrg_h && 5 * peak_H2 < 1.5f * avrg_H2 ) ||
298 1959590 : ( 2.56f * peak_l * avrg_h < peak_h * avrg_l && 32 * peak_h > 1.5f * avrg_h ) || ( peak_l * avrg_h > 2.56f * peak_h * avrg_l && 32 * peak_h < 1.5f * avrg_h );
299 :
300 979795 : if ( ( ( brate >= HQ_MDCTCLASS_CROSSOVER_BRATE ) && st->input_Fs > 16000 && ( ( !condition1 && condition2 ) || ( condition1 && !condition2 ) || condition3 ) ) || ( ( ( brate < HQ_MDCTCLASS_CROSSOVER_BRATE ) || st->input_Fs == 16000 ) && condition4 ) )
301 : {
302 79585 : c = HQ_LOCAL;
303 : }
304 : else
305 : {
306 900210 : c = TCX_LOCAL;
307 : }
308 :
309 : /* Smooth decision from instantaneous decision*/
310 979795 : clas_sec = ( SMOOTH_FILT_COEFF * hTcxEnc->clas_sec_old ) + ( ( 1 - SMOOTH_FILT_COEFF ) * c );
311 :
312 : /* Do thresholding with hysteresis */
313 979795 : if ( ( hTcxEnc->clas_final_old == HQ_CORE || hTcxEnc->clas_final_old == TCX_20_CORE ) && ( ( hTcxEnc->last_gain1 > 0.5f * gain1 && hTcxEnc->last_gain1 < 2.0f * gain1 ) && ( hTcxEnc->last_gain2 > 0.5f * gain2 && hTcxEnc->last_gain2 < 2.0f * gain2 ) ) )
314 : {
315 625472 : clas_final = hTcxEnc->clas_final_old;
316 : }
317 354323 : else if ( clas_sec > hTcxEnc->clas_sec_old && clas_sec > THRESH_UP ) /* Going up? */
318 : {
319 15897 : clas_final = HQ_CORE;
320 : }
321 338426 : else if ( clas_sec < THRESH_DOWN ) /* Going down */
322 : {
323 276069 : clas_final = TCX_20_CORE;
324 : }
325 : else
326 : {
327 62357 : clas_final = hTcxEnc->clas_final_old;
328 : }
329 :
330 : /* Prevent the usage of HQ_CORE on noisy-speech or inactive */
331 979795 : if ( ( st->mdct_sw_enable == MODE2 || st->element_mode > EVS_MONO ) && ( st->flag_noisy_speech_snr == 1 || st->vad_flag == 0 ) && clas_final == HQ_CORE )
332 : {
333 766 : clas_final = TCX_20_CORE;
334 : }
335 :
336 : /* Restrict usage of HQ_core to supported operating range */
337 : /* EVS: brate == st->total_brate */
338 : /* IVAS: brate is the nominal bitrate while st->total_brate may fluctuate. This sets a hard limit for HQ at HQ_16k40 */
339 979795 : if ( st->total_brate <= HQ_16k40 || brate < HQ_16k40 || st->bwidth == NB || brate > IVAS_48k )
340 : {
341 404592 : clas_final = TCX_20_CORE;
342 : }
343 :
344 : /* Memory update */
345 979795 : hTcxEnc->clas_sec_old = clas_sec;
346 979795 : hTcxEnc->clas_final_old = clas_final;
347 979795 : hTcxEnc->last_gain1 = gain1;
348 979795 : hTcxEnc->last_gain2 = gain2;
349 :
350 979795 : return clas_final;
351 : }
352 :
353 : /*--------------------------------------------------------------------------*
354 : * MDCT_classifier_reset()
355 : *
356 : * reset MDCT classifier memories
357 : *--------------------------------------------------------------------------*/
358 :
359 111819 : void MDCT_classifier_reset(
360 : TCX_ENC_HANDLE hTcxEnc /* i/o: TCX Encoder Handle */
361 : )
362 : {
363 111819 : hTcxEnc->clas_sec_old = 1.0f;
364 111819 : hTcxEnc->clas_final_old = 1;
365 111819 : hTcxEnc->last_gain1 = 0.0f;
366 111819 : hTcxEnc->last_gain2 = 0.0f;
367 :
368 111819 : return;
369 : }
|