Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include "cnst.h"
43 : #include "prot.h"
44 : #include "wmc_auto.h"
45 : #include <assert.h>
46 :
47 :
48 : /*--------------------------------------------------------------------------*
49 : * Local constants
50 : *--------------------------------------------------------------------------*/
51 :
52 : #define SMOOTH_FILT_COEFF 0.8f
53 : #define THRESH_UP 1.6f
54 : #define THRESH_DOWN 1.1f
55 : #define HQ_LOCAL 3
56 : #define TCX_LOCAL 1
57 :
58 : #define GAIN2_START_WB 6
59 : #define GAIN3_START_WB 12
60 : #define GAIN4_START_WB 9
61 : #define H1_START_WB 17
62 : #define H2_START_WB 14
63 : #define H_LENGTH_WB 3
64 : #define GAIN2_START_SWB 8
65 : #define GAIN3_START_SWB 16
66 : #define GAIN4_START_SWB 12
67 : #define H1_START_SWB 25
68 : #define H2_START_SWB 20
69 : #define H_LENGTH_SWB 5
70 :
71 :
72 : /*-------------------------------------------------------------------*
73 : * mdct_classifier()
74 : *
75 : * MDCT signal classifier for HQ_CORE/TCX_20_CORE
76 : *-------------------------------------------------------------------*/
77 :
78 : /*! r: HQ_CORE/TCX_20_CORE decision */
79 1117581 : int16_t mdct_classifier(
80 : Encoder_State *st, /* i/o: Encoder state variable */
81 : const float *fft_buff, /* i : FFT spectrum from fft_rel */
82 : const float enerBuffer[], /* i : energy buffer */
83 : const int32_t brate /* i : current brate, IVAS: nominal bitrate, EVS: st->total_brate */
84 : )
85 : {
86 : int16_t c;
87 : float X[129];
88 : int16_t k;
89 : float y[2];
90 : float nf;
91 : float pe;
92 : int16_t np;
93 : float max_cand;
94 : int16_t max_i;
95 : float p_energy;
96 : float n_energy;
97 : int16_t d_acc;
98 : int16_t pos_last;
99 : float clas_sec;
100 : int16_t clas_final;
101 : int16_t i;
102 : float gain1, gain2, gain3, gain11, gain4;
103 : float peak_l, peak_h, avrg_l, avrg_h, peak_H1, avrg_H1, peak_H2, avrg_H2;
104 : int16_t condition1, condition2;
105 : int16_t condition3, condition4;
106 : int16_t gain2_start, gain3_start, gain4_start, H1_start, H2_start, H_length;
107 1117581 : TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
108 :
109 1117581 : if ( st->input_Fs == 32000 || st->input_Fs == 48000 )
110 : {
111 1004982 : gain2_start = GAIN2_START_SWB;
112 1004982 : gain3_start = GAIN3_START_SWB;
113 1004982 : gain4_start = GAIN4_START_SWB;
114 1004982 : H1_start = H1_START_SWB;
115 1004982 : H2_start = H2_START_SWB;
116 1004982 : H_length = H_LENGTH_SWB;
117 : }
118 112599 : else if ( st->input_Fs == 16000 )
119 : {
120 112599 : gain2_start = GAIN2_START_WB;
121 112599 : gain3_start = GAIN3_START_WB;
122 112599 : gain4_start = GAIN4_START_WB;
123 112599 : H1_start = H1_START_WB;
124 112599 : H2_start = H2_START_WB;
125 112599 : H_length = H_LENGTH_WB;
126 : }
127 : else
128 : {
129 0 : assert( !"Unknown sampling frequency in MDCT_classifier" );
130 : H1_start = -1; /* to avoid compilation warning */
131 : H2_start = -1; /* to avoid compilation warning */
132 : H_length = -1; /* to avoid compilation warning */
133 : gain2_start = -1; /* to avoid compilation warning */
134 : gain3_start = -1; /* to avoid compilation warning */
135 : gain4_start = -1; /* to avoid compilation warning */
136 : }
137 :
138 1117581 : if ( st->element_mode != IVAS_CPE_DFT )
139 : {
140 110331008 : for ( k = 0; k < 127; k++ )
141 : {
142 109469047 : X[1 + k] = fft_buff[1 + k] * fft_buff[1 + k] + fft_buff[255 - k] * fft_buff[255 - k];
143 : }
144 :
145 2585883 : for ( k = 0; k < 2; k++ )
146 : {
147 1723922 : y[k] = fft_buff[k << 7] * fft_buff[k << 7];
148 : }
149 :
150 2585883 : for ( k = 0; k < 2; k++ )
151 : {
152 1723922 : X[k << 7] = y[k];
153 : }
154 : }
155 : else
156 : {
157 : float norm_val;
158 :
159 255620 : norm_val = ( L_FFT * L_FFT ) / 4.f;
160 32974980 : for ( k = 0; k < 128; k++ )
161 : {
162 32719360 : X[k + 1] = st->Bin_E_old[k] * norm_val;
163 : }
164 255620 : X[0] = X[1];
165 : }
166 :
167 1117581 : nf = X[0];
168 1117581 : pe = X[0];
169 1117581 : np = 0;
170 1117581 : max_cand = -1.0F;
171 1117581 : max_i = 0;
172 1117581 : p_energy = 0.0F;
173 1117581 : n_energy = 0.0F;
174 1117581 : d_acc = 0;
175 1117581 : pos_last = -1;
176 :
177 144167949 : for ( k = 0; k < 128; k++ )
178 : {
179 143050368 : if ( X[k + 1] > nf )
180 : {
181 70034121 : nf = 0.9578F * nf + 0.0422F * X[k + 1];
182 : }
183 : else
184 : {
185 73016247 : nf = 0.6472F * nf + 0.3528F * X[k + 1];
186 : }
187 :
188 143050368 : if ( X[k + 1] > pe )
189 : {
190 25344656 : pe = 0.42237F * pe + 0.57763F * X[k + 1];
191 : }
192 : else
193 : {
194 117705712 : pe = 0.80285F * pe + 0.19715F * X[k + 1];
195 : }
196 :
197 143050368 : if ( X[k + 1] > pe * 0.64F )
198 : {
199 40193091 : if ( X[k + 1] > max_cand )
200 : {
201 32347369 : max_cand = X[k + 1];
202 32347369 : max_i = (int16_t) ( 2 + k );
203 : }
204 : }
205 : else
206 : {
207 102857277 : if ( max_i > 0 )
208 : {
209 19496849 : if ( np > 0 )
210 : {
211 18379360 : d_acc = (int16_t) ( (int16_t) ( d_acc + max_i ) - pos_last );
212 : }
213 19496849 : np++;
214 19496849 : pos_last = max_i;
215 : }
216 :
217 102857277 : max_cand = -1.0F;
218 102857277 : max_i = 0;
219 : }
220 :
221 143050368 : p_energy += pe * pe;
222 143050368 : n_energy += nf * nf;
223 : }
224 :
225 1117581 : if ( np > 1 )
226 : {
227 1108119 : nf = (float) d_acc / ( (float) np - 1.0F );
228 : }
229 : else
230 : {
231 9462 : nf = 0.0F;
232 : }
233 1117581 : gain1 = 0.0f;
234 1117581 : gain2 = 0.0f;
235 1117581 : gain3 = 0.0f;
236 :
237 9833031 : for ( i = 0; i < gain2_start; i++ )
238 : {
239 8715450 : gain1 += enerBuffer[i] / gain2_start;
240 8715450 : gain2 += enerBuffer[gain2_start + i] / gain2_start;
241 8715450 : gain3 += enerBuffer[gain3_start + i] / gain2_start;
242 : }
243 :
244 1117581 : gain11 = gain2_start * ( gain1 - enerBuffer[0] / gain2_start ) / ( gain2_start - 1 );
245 1117581 : gain4 = 0.0f;
246 14190756 : for ( i = 0; i < gain4_start; i++ )
247 : {
248 13073175 : gain4 += enerBuffer[gain4_start + i] / gain4_start;
249 : }
250 :
251 1117581 : peak_H1 = enerBuffer[H1_start];
252 1117581 : avrg_H1 = enerBuffer[H1_start];
253 5362707 : for ( i = 1; i < H_length; i++ )
254 : {
255 4245126 : if ( enerBuffer[H1_start + i] > peak_H1 )
256 : {
257 1129964 : peak_H1 = enerBuffer[H1_start + i];
258 : }
259 4245126 : avrg_H1 += enerBuffer[H1_start + i];
260 : }
261 :
262 1117581 : peak_H2 = enerBuffer[H2_start];
263 1117581 : avrg_H2 = enerBuffer[H2_start];
264 5362707 : for ( i = 1; i < H_length; i++ )
265 : {
266 4245126 : if ( enerBuffer[H2_start + i] > peak_H2 )
267 : {
268 998639 : peak_H2 = enerBuffer[H2_start + i];
269 : }
270 4245126 : avrg_H2 += enerBuffer[H2_start + i];
271 : }
272 :
273 1117581 : peak_l = 0.0f;
274 1117581 : avrg_l = EPSILON;
275 1117581 : peak_h = 0.0f;
276 1117581 : avrg_h = EPSILON;
277 36880173 : for ( i = 0; i < 32; i++ )
278 : {
279 35762592 : avrg_l += X[20 + i];
280 35762592 : avrg_h += X[96 + i];
281 35762592 : if ( X[20 + i] > peak_l )
282 : {
283 4884496 : peak_l = X[20 + i];
284 : }
285 35762592 : if ( X[96 + i] > peak_h )
286 : {
287 4635304 : peak_h = X[96 + i];
288 : }
289 : }
290 :
291 1117581 : condition1 = nf > 12.0F;
292 1117581 : condition2 = p_energy - n_energy * 147.87276f > 0;
293 3219039 : condition3 = gain3 > 1.2f * gain2 ||
294 2083760 : ( gain3 >= 0.8f * gain2 && 5 * peak_H1 > 2.0f * avrg_H1 ) ||
295 966179 : ( 2.6f * peak_l * avrg_h < peak_h * avrg_l || peak_l * avrg_h > 2.6f * peak_h * avrg_l );
296 50230 : condition4 = ( gain4 > 0.8f * gain11 && 2.56f * peak_l * avrg_h > peak_h * avrg_l && peak_l * avrg_h < 5.12f * peak_h * avrg_l ) ||
297 1069935 : ( gain4 > 0.3f * gain11 && 32 * peak_h < 1.5f * avrg_h && 5 * peak_H2 < 1.5f * avrg_H2 ) ||
298 2235162 : ( 2.56f * peak_l * avrg_h < peak_h * avrg_l && 32 * peak_h > 1.5f * avrg_h ) || ( peak_l * avrg_h > 2.56f * peak_h * avrg_l && 32 * peak_h < 1.5f * avrg_h );
299 :
300 1117581 : if ( ( ( brate >= HQ_MDCTCLASS_CROSSOVER_BRATE ) && st->input_Fs > 16000 && ( ( !condition1 && condition2 ) || ( condition1 && !condition2 ) || condition3 ) ) || ( ( ( brate < HQ_MDCTCLASS_CROSSOVER_BRATE ) || st->input_Fs == 16000 ) && condition4 ) )
301 : {
302 91470 : c = HQ_LOCAL;
303 : }
304 : else
305 : {
306 1026111 : c = TCX_LOCAL;
307 : }
308 :
309 : /* Smooth decision from instantaneous decision*/
310 1117581 : clas_sec = ( SMOOTH_FILT_COEFF * hTcxEnc->clas_sec_old ) + ( ( 1 - SMOOTH_FILT_COEFF ) * c );
311 :
312 : /* Do thresholding with hysteresis */
313 1117581 : if ( ( hTcxEnc->clas_final_old == HQ_CORE || hTcxEnc->clas_final_old == TCX_20_CORE ) && ( ( hTcxEnc->last_gain1 > 0.5f * gain1 && hTcxEnc->last_gain1 < 2.0f * gain1 ) && ( hTcxEnc->last_gain2 > 0.5f * gain2 && hTcxEnc->last_gain2 < 2.0f * gain2 ) ) )
314 : {
315 710043 : clas_final = hTcxEnc->clas_final_old;
316 : }
317 407538 : else if ( clas_sec > hTcxEnc->clas_sec_old && clas_sec > THRESH_UP ) /* Going up? */
318 : {
319 19807 : clas_final = HQ_CORE;
320 : }
321 387731 : else if ( clas_sec < THRESH_DOWN ) /* Going down */
322 : {
323 312698 : clas_final = TCX_20_CORE;
324 : }
325 : else
326 : {
327 75033 : clas_final = hTcxEnc->clas_final_old;
328 : }
329 :
330 : /* Prevent the usage of HQ_CORE on noisy-speech or inactive */
331 1117581 : if ( ( st->mdct_sw_enable == MODE2 || st->element_mode > EVS_MONO ) && ( st->flag_noisy_speech_snr == 1 || st->vad_flag == 0 ) && clas_final == HQ_CORE )
332 : {
333 948 : clas_final = TCX_20_CORE;
334 : }
335 :
336 : /* Restrict usage of HQ_core to supported operating range */
337 : /* EVS: brate == st->total_brate */
338 : /* IVAS: brate is the nominal bitrate while st->total_brate may fluctuate. This sets a hard limit for HQ at HQ_16k40 */
339 1117581 : if ( st->total_brate <= HQ_16k40 || brate < HQ_16k40 || st->bwidth == NB || brate > IVAS_48k )
340 : {
341 472332 : clas_final = TCX_20_CORE;
342 : }
343 :
344 : /* Memory update */
345 1117581 : hTcxEnc->clas_sec_old = clas_sec;
346 1117581 : hTcxEnc->clas_final_old = clas_final;
347 1117581 : hTcxEnc->last_gain1 = gain1;
348 1117581 : hTcxEnc->last_gain2 = gain2;
349 :
350 1117581 : return clas_final;
351 : }
352 :
353 : /*--------------------------------------------------------------------------*
354 : * MDCT_classifier_reset()
355 : *
356 : * reset MDCT classifier memories
357 : *--------------------------------------------------------------------------*/
358 :
359 129627 : void MDCT_classifier_reset(
360 : TCX_ENC_HANDLE hTcxEnc /* i/o: TCX Encoder Handle */
361 : )
362 : {
363 129627 : hTcxEnc->clas_sec_old = 1.0f;
364 129627 : hTcxEnc->clas_final_old = 1;
365 129627 : hTcxEnc->last_gain1 = 0.0f;
366 129627 : hTcxEnc->last_gain2 = 0.0f;
367 :
368 129627 : return;
369 : }
|