Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "rom_enc.h"
45 : #include "rom_com.h"
46 : #include "prot.h"
47 : #include "wmc_auto.h"
48 : #include <assert.h>
49 : #include "ivas_cnst.h"
50 :
51 : /*-------------------------------------------------------------------*
52 : * Local prototypes
53 : *-------------------------------------------------------------------*/
54 :
55 : static void find_enr( const float data[], float band[], float *ptE, float *Etot, const int16_t min_band, const int16_t max_band, float *Bin_E, const int16_t bin_freq, float *band_ener );
56 :
57 : static void find_enr_dft( CPE_ENC_HANDLE hCPE, const int32_t input_Fs, float DFT_past_DMX[], float band[], float *ptE, float *Etot, const int16_t min_band, const int16_t max_band, float *Bin_E, float *band_ener );
58 :
59 : /*-------------------------------------------------------------------*
60 : * analy_sp()
61 : *
62 : * Spectral analysis
63 : *-------------------------------------------------------------------*/
64 :
65 1217770 : void analy_sp(
66 : const int16_t element_mode, /* i : element mode */
67 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
68 : const int32_t input_Fs, /* i : input sampling rate */
69 : float *speech, /* i : speech buffer */
70 : float *Bin_E, /* o : per bin log energy spectrum */
71 : float *Bin_E_old, /* o : per bin log energy spectrum for mid-frame */
72 : float *fr_bands, /* o : per band energy spectrum (2 analyses) */
73 : float lf_E[], /* o : per bin E for first VOIC_BINS bins (without DC) */
74 : float *Etot, /* o : total input energy */
75 : const int16_t min_band, /* i : minimum critical band */
76 : const int16_t max_band, /* i : maximum critical band */
77 : float *band_ener, /* o : energy in critical frequency bands without minimum noise floor E_MIN */
78 : float *PS, /* o : Per bin energy spectrum */
79 : float *fft_buff /* o : FFT coefficients */
80 : )
81 : {
82 : int16_t i_subfr, i;
83 : float *pt_bands, *pt_fft, *pt;
84 : const float *pt1;
85 :
86 : /*-----------------------------------------------------------------*
87 : * Perform two spectral analyses
88 : * Find energy per critical frequency band and total energy in dB
89 : *-----------------------------------------------------------------*/
90 :
91 1217770 : pt_bands = fr_bands;
92 1217770 : pt_fft = fft_buff;
93 1217770 : *Etot = 0.0f;
94 :
95 1217770 : if ( element_mode != IVAS_CPE_DFT )
96 : {
97 3474273 : for ( i_subfr = 0; i_subfr <= 1; i_subfr++ )
98 : {
99 : /* set pointer to the beginning of the signal for spectral analysis */
100 2316182 : if ( i_subfr == 0 )
101 : {
102 : /* set the pointer for first analysis window */
103 1158091 : pt = speech + 3 * ( L_SUBFR / 2 ) - L_FFT / 2;
104 : }
105 : else
106 : {
107 : /* set the pointer for second analysis window */
108 1158091 : pt = speech + 7 * ( L_SUBFR / 2 ) - L_FFT / 2;
109 : }
110 :
111 : /* 1st half of the window */
112 2316182 : pt1 = sqrt_han_window;
113 298787478 : for ( i = 0; i < L_FFT / 2; i++ )
114 : {
115 296471296 : pt_fft[i] = *pt++ * *pt1++;
116 : }
117 : /* 2nd half of the window */
118 298787478 : for ( i = L_FFT / 2; i < L_FFT; i++ )
119 : {
120 296471296 : pt_fft[i] = *pt++ * *pt1--;
121 : }
122 :
123 : /* compute the spectrum */
124 2316182 : fft_rel( pt_fft, L_FFT, LOG2_L_FFT );
125 :
126 : /* find energy per critical band */
127 2316182 : find_enr( pt_fft, pt_bands, lf_E + i_subfr * VOIC_BINS, Etot, min_band, max_band, Bin_E + i_subfr * ( L_FFT / 2 ), (int16_t) BIN, band_ener + i_subfr * NB_BANDS );
128 :
129 2316182 : pt_bands += NB_BANDS;
130 2316182 : pt_fft += L_FFT;
131 : }
132 : }
133 : else
134 : {
135 : /* find energy per critical band */
136 59679 : find_enr_dft( hCPE, input_Fs, hCPE->hStereoDft->DFT[0], pt_bands, lf_E, Etot, min_band, max_band, Bin_E, band_ener );
137 59679 : mvr2r( lf_E, lf_E + VOIC_BINS, VOIC_BINS );
138 59679 : mvr2r( Bin_E, Bin_E + ( L_FFT / 2 ), L_FFT / 2 );
139 59679 : mvr2r( band_ener, band_ener + NB_BANDS, NB_BANDS );
140 59679 : mvr2r( pt_bands, pt_bands + NB_BANDS, NB_BANDS );
141 59679 : *Etot *= 2.f;
142 : }
143 :
144 : /* Average total log energy over both half-frames */
145 1217770 : *Etot = 10.0f * (float) log10( 0.5f * *Etot );
146 :
147 : /* Per-bin log-energy spectrum */
148 1217770 : Bin_E[L_FFT / 2 - 1] = Bin_E[L_FFT / 2 - 2];
149 1217770 : Bin_E[L_FFT - 1] = Bin_E[L_FFT - 2];
150 :
151 157092330 : for ( i = 0; i < L_FFT / 2; i++ )
152 : {
153 155874560 : Bin_E_old[i] = Bin_E[i];
154 155874560 : PS[i] = ( Bin_E[i] + 1e-5f + Bin_E[i + L_FFT / 2] + 1e-5f ) / 2.0f;
155 155874560 : Bin_E[i] = (float) ( 10.0f * log( PS[i] ) );
156 : }
157 :
158 1217770 : return;
159 : }
160 :
161 : /*------------------------------------------------------------------------*
162 : * find_enr()
163 : *
164 : * find input signal energy for each critical band and first 74 LF bins
165 : * The energy is normalized by the number of frequency bins in a channel
166 : *------------------------------------------------------------------------*/
167 :
168 2316182 : static void find_enr(
169 : const float data[], /* i : fft result, for the format see fft_rel.c */
170 : float band[], /* o : per band energy */
171 : float *ptE, /* o : per bin energy for low frequencies */
172 : float *Etot, /* i/o: total energy */
173 : const int16_t min_band, /* i : minimum critical band */
174 : const int16_t max_band, /* i : maximum critical band */
175 : float *Bin_E, /* o : Per bin energy */
176 : const int16_t bin_freq, /* i : Number of frequency bins */
177 : float *band_ener /* o : per band energy without E_MIN */
178 : )
179 : {
180 : int16_t i, cnt;
181 : float freq, tmp;
182 : const float *ptR, *ptI;
183 : int16_t voic_band;
184 : float norm_val;
185 :
186 2316182 : norm_val = 4.0f / ( L_FFT * L_FFT );
187 :
188 2316182 : voic_band = VOIC_BAND_8k;
189 2316182 : if ( bin_freq == 50 )
190 : {
191 2316182 : voic_band = VOIC_BAND;
192 : }
193 :
194 2316182 : ptR = &data[1]; /* first real */
195 2316182 : ptI = &data[L_FFT - 1]; /* first imaginary */
196 :
197 : /* for low frequency bins, save per bin energy for the use in find_tilt() */
198 2316182 : freq = bin_freq;
199 41691276 : for ( i = 0; i < voic_band; i++ ) /* up to maximum allowed voiced critical band */
200 : {
201 39375094 : band[i] = 0.0f;
202 39375094 : cnt = 0;
203 210772562 : while ( freq <= crit_bands[i] )
204 : {
205 171397468 : *ptE = *ptR * *ptR + *ptI * *ptI; /* energy */
206 171397468 : *ptE *= norm_val; /* normalization - corresponds to FFT normalization by 2/L_FFT */
207 171397468 : *Bin_E++ = *ptE;
208 171397468 : band[i] += *ptE++;
209 171397468 : ptR++;
210 171397468 : ptI--;
211 :
212 171397468 : freq += bin_freq;
213 171397468 : cnt++;
214 : }
215 :
216 39375094 : band[i] *= inv_tbl[cnt]; /* normalization per frequency bin */
217 :
218 39375094 : band_ener[i] = band[i]; /* per band energy without E_MIN */
219 :
220 39375094 : if ( band[i] < E_MIN )
221 : {
222 4262681 : band[i] = E_MIN;
223 : }
224 : }
225 :
226 : /* continue computing the energy per critical band for higher frequencies */
227 2316182 : if ( bin_freq == 50 )
228 : {
229 9264728 : for ( i = voic_band; i < NB_BANDS; i++ )
230 : {
231 6948546 : band[i] = 0.0f;
232 6948546 : cnt = 0;
233 129706192 : while ( freq <= crit_bands[i] )
234 : {
235 122757646 : *Bin_E = *ptR * *ptR + *ptI * *ptI;
236 122757646 : *Bin_E *= norm_val;
237 122757646 : band[i] += *Bin_E;
238 122757646 : Bin_E++;
239 122757646 : ptR++;
240 122757646 : ptI--;
241 :
242 122757646 : freq += bin_freq;
243 122757646 : cnt++;
244 : }
245 :
246 6948546 : band[i] *= inv_tbl[cnt]; /* normalization per frequency bin */
247 :
248 6948546 : band_ener[i] = band[i]; /* per band energy without E_MIN */
249 :
250 6948546 : if ( band[i] < E_MIN )
251 : {
252 791067 : band[i] = E_MIN;
253 : }
254 : }
255 : }
256 :
257 : /* find the total log energy */
258 2316182 : tmp = *Etot;
259 48608902 : for ( i = min_band; i <= max_band; i++ )
260 : {
261 46292720 : tmp += band[i];
262 : }
263 :
264 2316182 : *Etot = tmp;
265 :
266 2316182 : return;
267 : }
268 :
269 : /*------------------------------------------------------------------------*
270 : * find_enr_dft()
271 : *
272 : * find input signal energy for each critical band using the DFT buffers
273 : *------------------------------------------------------------------------*/
274 :
275 59679 : static void find_enr_dft(
276 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder structure */
277 : const int32_t input_Fs, /* i : input sampling rate */
278 : float DFT_past_DMX[],
279 : float band[], /* o : per band energy */
280 : float *ptE, /* o : per bin energy for low frequencies */
281 : float *Etot, /* i/o: total energy */
282 : const int16_t min_band, /* i : minimum critical band */
283 : const int16_t max_band, /* i : maximum critical band */
284 : float *Bin_E, /* o : Per bin energy */
285 : float *band_ener /* o : per band energy without E_MIN */
286 : )
287 : {
288 : int16_t i, cnt;
289 : float freq, tmp;
290 : const float *ptR, *ptI;
291 : float norm_val;
292 59679 : int16_t bin_cnt = 0;
293 :
294 : float c, s;
295 : /* One window - 40ms*12.8kHz = 512 samples */
296 59679 : float c_1 = cosf( PI2 / STEREO_DFT_N_12k8_ENC );
297 59679 : float s_1 = sinf( PI2 / STEREO_DFT_N_12k8_ENC );
298 59679 : float g_1 = 1.f + 0.68f * 0.68f;
299 59679 : float g_2 = 2 * 0.68f;
300 : float g;
301 : float scaleWin;
302 :
303 : float BinE[STEREO_DFT_N_12k8_ENC / 2]; /* NB_BANDS = 20 (= 6350Hz) = highest band available for SR 12.8 -> bin_cnt = 158 */
304 59679 : float bin_freq = input_Fs / (float) hCPE->hStereoDft->NFFT; /* adaptive frequency bin width */
305 :
306 59679 : scaleWin = 1 / ( 2 * hCPE->hStereoDft->win_ana_energy );
307 59679 : scaleWin *= (float) BIN / bin_freq;
308 59679 : norm_val = scaleWin * 4.0f / ( hCPE->hStereoDft->NFFT * hCPE->hStereoDft->NFFT );
309 :
310 59679 : ptR = &DFT_past_DMX[2]; /* first real */
311 59679 : ptI = &DFT_past_DMX[3]; /* first imaginary */
312 59679 : c = c_1;
313 59679 : s = s_1;
314 :
315 : /* for low frequency bins, save per bin energy for the use in find_tilt() */
316 59679 : freq = bin_freq;
317 1193580 : for ( i = 0; i < NB_BANDS - 1; i++ ) /* up to maximum allowed voiced critical band */
318 : {
319 1133901 : band[i] = 0.0f;
320 1133901 : cnt = 0;
321 :
322 : /* bins up to crit_band 17 (<= 3700 Hz):
323 : * bin_cnt old (bin_width 50 Hz): 74 (74 * FRAMES_PER_SEC = 3700)
324 : * bin_cnt new (bin_width 40 Hz): 92 (92 * 40 = 3680)
325 : */
326 :
327 13785849 : while ( freq <= crit_bands[i] )
328 : {
329 12651948 : BinE[bin_cnt] = *ptR * *ptR + *ptI * *ptI; /* energy */
330 12651948 : g = g_1 - g_2 * c;
331 12651948 : tmp = c * c_1 - s * s_1;
332 12651948 : s = s * c_1 + c * s_1;
333 12651948 : c = tmp;
334 12651948 : BinE[bin_cnt] *= norm_val * g; /* normalization - corresponds to FFT normalization by 2/L_FFT */
335 12651948 : band[i] += BinE[bin_cnt];
336 12651948 : ptR += 2;
337 12651948 : ptI += 2;
338 :
339 12651948 : freq += bin_freq;
340 12651948 : cnt++;
341 12651948 : bin_cnt++;
342 : }
343 :
344 1133901 : band[i] *= inv_tbl[cnt]; /* normalization per frequency bin */
345 :
346 1133901 : band_ener[i] = band[i]; /* per band energy without E_MIN */
347 :
348 1133901 : if ( band[i] < E_MIN )
349 : {
350 67804 : band[i] = E_MIN;
351 : }
352 : }
353 :
354 : /* continue computing the energy per critical band for higher frequencies */
355 :
356 : /* old version, FFT 256 @ SR12.8 (-> bin_width = 50 Hz):
357 : NB_BANDS = 20 (= 6350Hz) = highest band available for SR 12.8 -> bin_cnt = 127 = L_FFT/2-1*/
358 :
359 : /* new version: DFT (1200/800/400) @ input SR (48/32/16) (-> bin_width = 40 Hz):
360 : *
361 : */
362 : /* NB_BANDS = 20 (= 6350Hz) = highest band available for SR 12.8 -> bin_cnt = 158 */
363 : /* NB_BANDS = 21 (= 7700Hz) = highest band available for SR 16 -> bin_cnt = 192 */
364 : /* NB_BANDS = 24 (= 15500Hz) = highest band available for SR 32 -> bin_cnt = 387 */
365 : /* NB_BANDS = 24 (= 15500Hz) = highest band available for SR 48 -> bin_cnt = 387 */
366 :
367 119358 : for ( ; i < NB_BANDS; i++ )
368 : {
369 59679 : band[i] = 0.0f;
370 59679 : cnt = 0;
371 2625876 : while ( freq < 6399.0f )
372 : {
373 2566197 : BinE[bin_cnt] = *ptR * *ptR + *ptI * *ptI;
374 2566197 : g = g_1 - g_2 * c;
375 2566197 : BinE[bin_cnt] *= norm_val * g;
376 2566197 : band[i] += BinE[bin_cnt];
377 2566197 : ptR += 2;
378 2566197 : ptI += 2;
379 :
380 2566197 : freq += bin_freq;
381 2566197 : cnt++;
382 2566197 : bin_cnt++;
383 : }
384 :
385 59679 : band[i] *= inv_tbl[cnt]; /* normalization per frequency bin */
386 :
387 59679 : band_ener[i] = band[i]; /* per band energy without E_MIN */
388 :
389 59679 : if ( band[i] < E_MIN )
390 : {
391 3053 : band[i] = E_MIN;
392 : }
393 : }
394 :
395 : /* put bin energies from BinE into Bin_E[L_FFT/2-1] (interpolate 40 Hz bin values to fit into 50 Hz bins) */
396 : /* Last value of Bin_E is handled outside this function*/
397 59679 : assert( bin_cnt == ( STEREO_DFT_N_12k8_ENC / 2 - 1 ) );
398 59679 : BinE[STEREO_DFT_N_12k8_ENC / 2 - 1] = BinE[STEREO_DFT_N_12k8_ENC / 2 - 2];
399 59679 : lerp( BinE, Bin_E, L_FFT / 2, STEREO_DFT_N_12k8_ENC / 2 );
400 59679 : mvr2r( Bin_E, ptE, VOIC_BINS );
401 :
402 :
403 : /* find the total log energy */
404 59679 : tmp = *Etot;
405 1253259 : for ( i = min_band; i <= max_band; i++ )
406 : {
407 1193580 : tmp += band[i];
408 : }
409 :
410 59679 : *Etot = tmp;
411 :
412 59679 : return;
413 : }
|