Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "rom_com.h"
46 : #include "rom_enc.h"
47 : #include "wmc_auto.h"
48 : #include "ivas_prot.h"
49 : #include "ivas_rom_enc.h"
50 :
51 : /*-------------------------------------------------------------------*
52 : * Local constants
53 : *
54 : *-------------------------------------------------------------------*/
55 :
56 : #define CLDFB_NO_CHANNELS_HB 20
57 :
58 :
59 : /*-------------------------------------------------------------------*
60 : * wb_pre_proc()
61 : *
62 : * - Resampling of input signal when input signal sampling rate
63 : * is above 16kHz
64 : * - Common WB TBE and WB BWE pre-processing
65 : *-------------------------------------------------------------------*/
66 :
67 43722 : void wb_pre_proc(
68 : Encoder_State *st, /* i/o: encoder state structure */
69 : const int16_t last_element_mode, /* i : last element mode */
70 : const float *new_inp_resamp16k, /* i : original input signal */
71 : float *hb_speech /* o : HB target signal (6-8kHz) at 16kHz */
72 : )
73 : {
74 : int16_t Sample_Delay_WB_BWE, ramp_flag;
75 : TD_BWE_ENC_HANDLE hBWE_TD;
76 : FD_BWE_ENC_HANDLE hBWE_FD;
77 : float decim_state1[( 2 * ALLPASSSECTIONS_STEEP + 1 )], decim_state2[( 2 * ALLPASSSECTIONS_STEEP + 1 )];
78 : float old_input[NS2SA( 16000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + STEREO_DFT_OVL_16k + L_FRAME16k];
79 : float *highband_new_speech, highband_old_speech[( L_LOOK_12k8 + L_SUBFR + L_FRAME ) * 5 / 16];
80 43722 : int16_t fSwitchFromIO = 0;
81 : int16_t ppp_mode;
82 :
83 43722 : hBWE_TD = st->hBWE_TD;
84 43722 : hBWE_FD = st->hBWE_FD;
85 :
86 43722 : if ( st->Opt_SC_VBR )
87 : {
88 0 : ppp_mode = st->hSC_VBR->ppp_mode;
89 : }
90 : else
91 : {
92 43722 : ppp_mode = 0;
93 : }
94 :
95 43722 : if ( ( st->last_total_brate == ACELP_6k60 ) ||
96 43717 : ( st->last_total_brate == ACELP_8k85 ) ||
97 43716 : ( st->last_total_brate == ACELP_12k65 ) ||
98 43669 : ( st->last_total_brate == ACELP_14k25 ) ||
99 43569 : ( st->last_total_brate == ACELP_15k85 ) ||
100 43365 : ( st->last_total_brate == ACELP_18k25 ) ||
101 43354 : ( st->last_total_brate == ACELP_19k85 ) ||
102 43352 : ( st->last_total_brate == ACELP_23k05 ) ||
103 43249 : ( st->last_total_brate == ACELP_23k85 ) )
104 : {
105 546 : fSwitchFromIO = 1;
106 : }
107 :
108 43722 : set_f( old_input, 0, NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + STEREO_DFT_OVL_16k + L_FRAME16k );
109 :
110 43722 : if ( st->extl == WB_BWE || st->extl == WB_TBE || st->igf )
111 : {
112 20980 : ramp_flag = 0;
113 20980 : if ( ( st->last_extl != WB_TBE && st->last_extl != WB_BWE && !st->igf ) || ( st->igf && fSwitchFromIO ) )
114 : {
115 603 : ramp_flag = 1;
116 : }
117 :
118 20980 : if ( !ppp_mode )
119 : {
120 20980 : if ( st->element_mode == IVAS_CPE_DFT )
121 : {
122 5033 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
123 :
124 5033 : if ( last_element_mode == IVAS_CPE_TD )
125 : {
126 1 : set_f( hBWE_TD->decim_state1, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
127 1 : set_f( hBWE_TD->decim_state2, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
128 1 : ramp_flag = 1;
129 : }
130 :
131 : /*Get past signal*/
132 5033 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - STEREO_DFT_OVL_16k, old_input, STEREO_DFT_OVL_16k );
133 5033 : mvr2r( hBWE_FD->old_input_wb, old_input + STEREO_DFT_OVL_16k, Sample_Delay_WB_BWE );
134 :
135 : /*Get new signal*/
136 5033 : mvr2r( new_inp_resamp16k, &old_input[Sample_Delay_WB_BWE + STEREO_DFT_OVL_16k], L_FRAME16k );
137 :
138 : /*compute hb_speech on delayed input*/
139 5033 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
140 :
141 : /*Update memory*/
142 5033 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( L_SUBFR * 5 / 16 ), STEREO_DFT_OVL_16k / 4 );
143 5033 : mvr2r( hb_speech + STEREO_DFT_OVL_16k / 4, hb_speech, ( L_FRAME16k - STEREO_DFT_OVL_16k ) / 4 );
144 :
145 : /*rest without memory update*/
146 5033 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
147 5033 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
148 :
149 5033 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k - STEREO_DFT_OVL_16k ) / 4, STEREO_DFT_OVL_16k, decim_state1, decim_state2, 0 );
150 : }
151 15947 : else if ( st->element_mode == IVAS_CPE_TD )
152 : {
153 1199 : int16_t l_recalc_16k = L_MEM_RECALC_16K + L_FILT16k + 1; /* Note: "+1" is used because L_FILT16k is not divisible by 4 */
154 1199 : int16_t l_recalc_4k = ( L_MEM_RECALC_16K + L_FILT16k + 1 ) / 4;
155 1199 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
156 :
157 1199 : if ( last_element_mode == IVAS_CPE_DFT )
158 : {
159 17 : set_f( hBWE_TD->decim_state1, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
160 17 : set_f( hBWE_TD->decim_state2, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
161 17 : ramp_flag = 1;
162 : }
163 :
164 : /*Get past signal*/
165 1199 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - l_recalc_16k, old_input, l_recalc_16k );
166 1199 : mvr2r( hBWE_FD->old_input_wb, old_input + l_recalc_16k, Sample_Delay_WB_BWE );
167 :
168 1199 : old_input[Sample_Delay_WB_BWE] = hBWE_FD->mem_old_wtda_swb;
169 :
170 : /*Get new signal*/
171 1199 : mvr2r( new_inp_resamp16k, old_input + Sample_Delay_WB_BWE + l_recalc_16k, L_FRAME16k );
172 :
173 : /*compute hb_speech on delayed input*/
174 1199 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
175 :
176 : /*Update memory*/
177 1199 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 ) - l_recalc_4k, l_recalc_4k );
178 1199 : mvr2r( hb_speech + l_recalc_4k, hb_speech, ( L_FRAME16k / 4 ) - l_recalc_4k );
179 :
180 : /*rest without memory update*/
181 1199 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
182 1199 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
183 :
184 1199 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k / 4 ) - l_recalc_4k, l_recalc_16k, decim_state1, decim_state2, 0 );
185 : }
186 14748 : else if ( st->element_mode == IVAS_SCE )
187 : {
188 14748 : if ( st->input_Fs == 16000 )
189 : {
190 0 : flip_spectrum_and_decimby4( new_inp_resamp16k, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
191 : }
192 : else
193 : {
194 14748 : int16_t l_recalc_16k = L_FILT16k + 1; /* Note: "+1" is used because L_FILT16k is not divisible by 4 */
195 14748 : int16_t l_recalc_4k = ( L_FILT16k + 1 ) / 4;
196 :
197 14748 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
198 :
199 : /*Get past signal*/
200 14748 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - l_recalc_16k, old_input, l_recalc_16k );
201 14748 : mvr2r( hBWE_FD->old_input_wb, old_input + l_recalc_16k, Sample_Delay_WB_BWE );
202 :
203 : /*Get new signal*/
204 14748 : mvr2r( new_inp_resamp16k, old_input + Sample_Delay_WB_BWE + l_recalc_16k, L_FRAME16k );
205 :
206 : /*compute hb_speech on delayed input*/
207 14748 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
208 :
209 : /* update hBWE_TD->old_speech_wb memory */
210 14748 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 ) - l_recalc_4k, l_recalc_4k );
211 14748 : mvr2r( hb_speech + l_recalc_4k, hb_speech, ( L_FRAME16k / 4 ) - l_recalc_4k );
212 :
213 : /*rest without memory update*/
214 14748 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
215 14748 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
216 :
217 14748 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k / 4 ) - l_recalc_4k, l_recalc_16k, decim_state1, decim_state2, 0 );
218 : }
219 : }
220 : else
221 : {
222 0 : flip_spectrum_and_decimby4( new_inp_resamp16k, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
223 : }
224 :
225 20980 : if ( st->extl != WB_TBE )
226 : {
227 : /* Update the previous wideband speech buffer in case of a WB_BWE frame - this code is in wb_tbe_enc */
228 13739 : Sample_Delay_WB_BWE = ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16;
229 :
230 13739 : highband_new_speech = highband_old_speech + Sample_Delay_WB_BWE;
231 :
232 13739 : mvr2r( hBWE_TD->old_speech_wb, highband_old_speech, Sample_Delay_WB_BWE );
233 13739 : mvr2r( hb_speech, highband_new_speech, L_FRAME16k / 4 );
234 13739 : mvr2r( highband_old_speech + L_FRAME16k / 4, hBWE_TD->old_speech_wb, Sample_Delay_WB_BWE );
235 : }
236 : }
237 : }
238 : else
239 : {
240 22742 : set_f( hBWE_TD->decim_state1, 0.0f, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
241 22742 : set_f( hBWE_TD->decim_state2, 0.0f, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
242 22742 : set_f( hBWE_TD->old_speech_wb, 0.0f, ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 );
243 : }
244 :
245 43722 : if ( hBWE_FD != NULL )
246 : {
247 43722 : hBWE_FD->mem_old_wtda_swb = new_inp_resamp16k[L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k - 1];
248 : }
249 :
250 : /* st->old_input_wb and st->old_wtda_wb must be updated each frame, or there are often some clicks during WB TBE <-> WB BWE switching */
251 43722 : if ( ( st->extl != WB_BWE || ( st->extl == WB_BWE && st->extl_brate == 0 ) ) && !ppp_mode )
252 : {
253 39622 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
254 :
255 39622 : mvr2r( new_inp_resamp16k, &old_input[Sample_Delay_WB_BWE], L_FRAME16k );
256 39622 : mvr2r( hBWE_FD->old_input_wb, old_input, Sample_Delay_WB_BWE );
257 39622 : mvr2r( new_inp_resamp16k + L_FRAME16k - Sample_Delay_WB_BWE, hBWE_FD->old_input_wb, Sample_Delay_WB_BWE );
258 39622 : if ( ( st->extl != SWB_BWE ) && ( st->extl != FB_BWE ) )
259 : {
260 39622 : mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME16k );
261 : }
262 : }
263 :
264 43722 : return;
265 : }
266 :
267 :
268 : /*-------------------------------------------------------------------*
269 : * swb_pre_proc()
270 : *
271 : * - Calculate the 6 to 14 kHz (or 7.5 - 15.5 kHz) SHB target signal
272 : * for SWB TBE or SWB BWE coding
273 : * - Common SWB TBE and SWB BWE pre-processing
274 : *-------------------------------------------------------------------*/
275 :
276 399091 : void swb_pre_proc(
277 : Encoder_State *st, /* i/o: encoder state structure */
278 : float *new_swb_speech, /* o : original input signal at 32kHz */
279 : float *shb_speech, /* o : SHB target signal (6-14kHz) at 16kHz */
280 : float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : real buffer */
281 : float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : imag buffer */
282 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
283 : )
284 : {
285 : int16_t Sample_Delay_SWB_BWE, inner_frame, delay;
286 : TD_BWE_ENC_HANDLE hBWE_TD;
287 : FD_BWE_ENC_HANDLE hBWE_FD;
288 : int32_t inner_Fs, input_Fs;
289 : float old_input[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k];
290 : float spchTmp[L_FRAME32k], spchTmp2[L_FRAME32k];
291 : int16_t i, j, L_resamp;
292 : int16_t startB, endB;
293 : float *realBufferFlipped[CLDFB_NO_COL_MAX];
294 : float *imagBufferFlipped[CLDFB_NO_COL_MAX];
295 : float realBufferTmp[CLDFB_NO_COL_MAX][20];
296 : float imagBufferTmp[CLDFB_NO_COL_MAX][20];
297 : int16_t ts, nB, uB;
298 : float sign, lbEner, v, t, regression;
299 : const float *thr, *regV;
300 : int16_t Sample_Delay_SWB_BWE32k, lMemRecalc32k, dft_ovl32k;
301 :
302 399091 : lMemRecalc32k = NS2SA( 32000, L_MEM_RECALC_NS );
303 :
304 : /* initialization */
305 399091 : hBWE_TD = st->hBWE_TD;
306 399091 : hBWE_FD = st->hBWE_FD;
307 399091 : input_Fs = st->input_Fs;
308 :
309 6784547 : for ( j = 0; j < CLDFB_NO_COL_MAX; j++ )
310 : {
311 6385456 : set_f( realBufferTmp[j], 0, 20 );
312 6385456 : set_f( imagBufferTmp[j], 0, 20 );
313 6385456 : realBufferFlipped[j] = realBufferTmp[j];
314 6385456 : imagBufferFlipped[j] = imagBufferTmp[j];
315 : }
316 :
317 399091 : set_f( old_input, 0.0f, NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k );
318 :
319 399091 : if ( input_Fs == 32000 )
320 : {
321 58507 : if ( st->element_mode > EVS_MONO )
322 : {
323 57457 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
324 57457 : if ( st->L_frame == L_FRAME16k )
325 : {
326 32484 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
327 : }
328 :
329 57457 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
330 57457 : mvr2r( st->input - L_FRAME32k, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
331 :
332 57457 : if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
333 : {
334 3947 : mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
335 : }
336 53510 : else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
337 : {
338 26896 : mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
339 : }
340 : }
341 :
342 58507 : mvr2r( st->input, new_swb_speech, L_FRAME32k );
343 :
344 58507 : if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE && st->extl != SWB_BWE_HIGHRATE )
345 : {
346 57541 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
347 57541 : if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
348 : {
349 32363 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
350 : }
351 57541 : if ( st->element_mode > EVS_MONO )
352 : {
353 56497 : Sample_Delay_SWB_BWE -= NS2SA( 32000, DELAY_FIR_RESAMPL_NS );
354 : }
355 :
356 57541 : mvr2r( hBWE_FD->old_fdbwe_speech, &old_input[Sample_Delay_SWB_BWE], L_FRAME32k );
357 :
358 57541 : set_f( old_input, 0, Sample_Delay_SWB_BWE );
359 57541 : mvr2r( hBWE_FD->old_fdbwe_speech + L_FRAME32k - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
360 57541 : if ( st->extl != WB_BWE )
361 : {
362 57505 : mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME32k );
363 : }
364 : }
365 :
366 58507 : if ( st->extl != SWB_BWE && st->extl != FB_BWE )
367 : {
368 57532 : mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
369 : }
370 : }
371 : else /* 48 kHz */
372 : {
373 :
374 340584 : Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
375 340584 : Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS );
376 340584 : if ( st->L_frame == L_FRAME16k )
377 : {
378 121156 : Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
379 121156 : Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_16k_NS );
380 : }
381 :
382 340584 : dft_ovl32k = 0;
383 340584 : if ( st->element_mode == IVAS_CPE_DFT )
384 : {
385 20882 : dft_ovl32k = (int16_t) ( STEREO_DFT_OVL_MAX * 32000 / 48000 );
386 : }
387 :
388 340584 : if ( st->codec_mode == MODE1 )
389 : {
390 339609 : if ( st->element_mode > EVS_MONO )
391 : {
392 :
393 338534 : if ( st->element_mode == IVAS_CPE_TD )
394 : {
395 : }
396 336497 : else if ( st->bwidth == FB )
397 : {
398 210242 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
399 : }
400 :
401 338534 : mvr2r( st->input - L_FRAME48k, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
402 :
403 338534 : if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
404 : {
405 1920 : if ( st->bwidth == SWB )
406 : {
407 : /* buffers hBWE_FD->old_input[] and hBWE_FD->old_wtda_swb[] need to be at 32 kHz (inner) sampling rate */
408 :
409 430 : decimate_2_over_3_allpass( st->input - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, spchTmp, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
410 :
411 430 : mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( lMemRecalc32k - Sample_Delay_SWB_BWE32k ), lMemRecalc32k - Sample_Delay_SWB_BWE32k );
412 430 : mvr2r( spchTmp + lMemRecalc32k - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
413 : }
414 : else /* FB_BWE */
415 : {
416 1490 : mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
417 1490 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
418 : }
419 : }
420 336614 : else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
421 : {
422 19868 : if ( st->bwidth == SWB )
423 : {
424 10051 : lerp( st->input - hCPE->hStereoDft->dft_ovl, spchTmp, dft_ovl32k - Sample_Delay_SWB_BWE32k, hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
425 :
426 10051 : mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( dft_ovl32k - Sample_Delay_SWB_BWE32k ), dft_ovl32k - Sample_Delay_SWB_BWE32k );
427 : }
428 : else
429 : {
430 9817 : mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
431 : }
432 : }
433 : }
434 :
435 339609 : if ( ( st->extl != SWB_BWE && st->extl != FB_BWE && st->core == ACELP_CORE ) || ( st->element_mode == IVAS_CPE_DFT && st->core != ACELP_CORE ) /*resampling not needed for MDCT cores*/ )
436 : {
437 : /* move the resampling out of the TDBWE path as new_swb_speech is not needed for TDBWE. */
438 116176 : mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
439 : }
440 : else
441 : {
442 223433 : if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE )
443 : {
444 : /* resample 48 kHz to 32kHz */
445 : #ifdef NONBE_1244_FIX_SWB_BWE_MEMORY
446 213478 : if ( ( st->last_bwidth == FB && st->element_mode == EVS_MONO ) || ( st->bwidth == FB && st->element_mode > EVS_MONO ) ) // note: once EVS i CR fixed, the condition will simplify to "if ( st->bwidth == FB )" only
447 : #else
448 : if ( st->last_bwidth == FB )
449 : #endif
450 : {
451 157915 : inner_frame = L_FRAME48k;
452 157915 : inner_Fs = 48000;
453 157915 : mvr2r( hBWE_FD->old_fdbwe_speech, new_swb_speech, L_FRAME48k );
454 : }
455 : else
456 : {
457 55563 : inner_frame = L_FRAME32k;
458 55563 : inner_Fs = 32000;
459 :
460 55563 : if ( st->element_mode != IVAS_CPE_DFT )
461 : {
462 55337 : decimate_2_over_3_allpass( hBWE_FD->old_fdbwe_speech, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
463 : }
464 : else
465 : {
466 226 : lerp( hBWE_FD->old_fdbwe_speech, new_swb_speech, inner_frame, L_FRAME48k );
467 : }
468 :
469 55563 : if ( st->element_mode == IVAS_CPE_DFT && st->idchan == 0 )
470 : {
471 63506 : for ( i = 0; i < STEREO_DFT_OVL_32k; i++ )
472 : {
473 63280 : hCPE->hStereoDft->output_mem_dmx_32k[i] = new_swb_speech[inner_frame - STEREO_DFT_OVL_32k + i] * hCPE->hStereoDft->win_32k[STEREO_DFT_OVL_32k - 1 - i];
474 : }
475 : }
476 : }
477 :
478 213478 : Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
479 213478 : if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
480 : {
481 60470 : Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
482 : }
483 213478 : if ( st->element_mode > EVS_MONO )
484 : {
485 213072 : Sample_Delay_SWB_BWE -= NS2SA( inner_Fs, DELAY_FIR_RESAMPL_NS );
486 : }
487 :
488 213478 : mvr2r( new_swb_speech, &old_input[Sample_Delay_SWB_BWE], inner_frame );
489 213478 : set_f( old_input, 0, Sample_Delay_SWB_BWE );
490 213478 : mvr2r( new_swb_speech + inner_frame - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
491 213478 : mvr2r( old_input, hBWE_FD->old_wtda_swb, inner_frame );
492 : }
493 :
494 : /* resample 48 kHz to 32kHz */
495 223433 : if ( st->bwidth == FB )
496 : {
497 162494 : mvr2r( st->input, new_swb_speech, L_FRAME48k );
498 : }
499 : else
500 : {
501 60939 : if ( st->element_mode == IVAS_CPE_TD )
502 : {
503 : float dec_2_over_3_mem_tmp[L_FILT_2OVER3], dec_2_over_3_mem_lp_tmp[L_FILT_2OVER3_LP];
504 :
505 19 : decimate_2_over_3_allpass( st->input, L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
506 :
507 19 : mvr2r( hBWE_TD->dec_2_over_3_mem, dec_2_over_3_mem_tmp, L_FILT_2OVER3 );
508 19 : mvr2r( hBWE_TD->dec_2_over_3_mem_lp, dec_2_over_3_mem_lp_tmp, L_FILT_2OVER3_LP );
509 :
510 19 : decimate_2_over_3_allpass( st->input + L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, new_swb_speech + L_FRAME32k - lMemRecalc32k, dec_2_over_3_mem_tmp, dec_2_over_3_mem_lp_tmp );
511 : }
512 60920 : else if ( st->element_mode != IVAS_CPE_DFT )
513 : {
514 59350 : decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
515 : }
516 : else /* IVAS_CPE_DFT */
517 : {
518 1570 : stereo_dft_enc_synthesize( hCPE->hStereoDft, new_swb_speech, st->idchan, input_Fs, 32000, 0 );
519 :
520 1570 : mvr2r( new_swb_speech - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
521 : }
522 : }
523 : }
524 : }
525 : else
526 : {
527 : /* resample 48 kHz to 32kHz */
528 975 : if ( st->bwidth == FB )
529 : {
530 0 : mvr2r( st->input, new_swb_speech, L_FRAME48k );
531 : }
532 : else
533 : {
534 975 : decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
535 : }
536 : }
537 : }
538 :
539 399091 : if ( ( st->core == ACELP_CORE && st->extl != SWB_BWE_HIGHRATE && st->extl != FB_BWE_HIGHRATE ) ||
540 238920 : ( ( st->total_brate == ACELP_9k60 || st->rf_mode ) && st->bwidth == SWB && st->element_mode == EVS_MONO ) )
541 160171 : {
542 160171 : float CldfbHB = 0;
543 :
544 160171 : if ( st->element_mode == IVAS_CPE_DFT )
545 : {
546 35725 : CldfbHB = stereo_dft_enc_synthesize( hCPE->hStereoDft, old_input + STEREO_DFT_OVL_16k, st->idchan, input_Fs, 16000, st->L_frame );
547 :
548 : /* delay corresponding to CLDFB delay */
549 35725 : mvr2r( old_input + STEREO_DFT_OVL_16k - 20, shb_speech, L_FRAME16k );
550 35725 : mvr2r( old_input, hBWE_TD->old_speech_shb + L_LOOK_16k + L_SUBFR16k - ( STEREO_DFT_OVL_16k - 20 ), STEREO_DFT_OVL_16k - 20 );
551 35725 : mvr2r( old_input, hCPE->hStereoICBWE->mem_shb_speech_ref, STEREO_DFT_OVL_16k - 20 );
552 :
553 35725 : if ( CldfbHB <= 0 )
554 : {
555 1079 : CldfbHB = 1.0f;
556 : }
557 35725 : hBWE_TD->cldfbHBLT = 0.9f * hBWE_TD->cldfbHBLT + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
558 :
559 35725 : lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
560 35725 : hCPE->hStereoICBWE->icbweRefEner = 0.05f * (float) sqrt( hCPE->hStereoDft->icbweRefEner );
561 35725 : lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
562 35725 : thr = icbwe_thr_DFT;
563 35725 : regV = icbwe_regressionValuesDFT;
564 : }
565 : else
566 : {
567 124446 : if ( st->L_frame == L_FRAME )
568 : {
569 68402 : startB = 34;
570 68402 : endB = 14;
571 1162834 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
572 : {
573 22983072 : for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
574 : {
575 21888640 : sign = ( ts % 2 ) ? 1.0f : -1.0f;
576 21888640 : realBufferFlipped[ts][uB] = -sign * realBuffer[ts][nB];
577 21888640 : imagBufferFlipped[ts][uB] = sign * imagBuffer[ts][nB];
578 : }
579 : }
580 : }
581 : else
582 : {
583 56044 : startB = 39;
584 56044 : endB = 19;
585 952748 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
586 : {
587 18830784 : for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
588 : {
589 17934080 : realBufferFlipped[ts][uB] = -realBuffer[ts][nB];
590 17934080 : imagBufferFlipped[ts][uB] = imagBuffer[ts][nB];
591 : }
592 : }
593 : }
594 :
595 1368906 : for ( nB = 0; nB < 10; nB++ )
596 : {
597 21155820 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
598 : {
599 19911360 : CldfbHB += ( realBufferFlipped[ts][nB] * realBufferFlipped[ts][nB] + imagBufferFlipped[ts][nB] * imagBufferFlipped[ts][nB] );
600 : }
601 : }
602 124446 : if ( CldfbHB <= 0 )
603 : {
604 9 : CldfbHB = 1.0f;
605 : }
606 124446 : hBWE_TD->cldfbHBLT = 0.9f * hBWE_TD->cldfbHBLT + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
607 :
608 124446 : if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
609 : {
610 35 : hCPE->hStereoICBWE->icbweRefEner = EPSILON;
611 735 : for ( nB = 20; nB < 40; nB++ )
612 : {
613 11900 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
614 : {
615 11200 : hCPE->hStereoICBWE->icbweRefEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
616 : }
617 : }
618 35 : hCPE->hStereoICBWE->icbweRefEner = 0.05f * sqrtf( hCPE->hStereoICBWE->icbweRefEner );
619 : }
620 :
621 124446 : lbEner = EPSILON;
622 2613366 : for ( nB = 0; nB < 20; nB++ )
623 : {
624 42311640 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
625 : {
626 39822720 : lbEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
627 : }
628 : }
629 124446 : lbEner = 0.05f * sqrtf( lbEner );
630 124446 : thr = icbwe_thr_TDM;
631 124446 : regV = icbwe_regressionValuesTDM;
632 :
633 124446 : cldfbSynthesis( realBufferFlipped, imagBufferFlipped, shb_speech, -1, st->cldfbSynTd );
634 : }
635 :
636 160171 : if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
637 : {
638 35760 : hCPE->hStereoICBWE->MSFlag = 0; /* Init the multi-source flag */
639 35760 : v = 0.3333f * sum_f( st->voicing, 3 );
640 35760 : t = log10f( ( hCPE->hStereoICBWE->icbweRefEner + 1e-6f ) / ( lbEner + 1e-6f ) );
641 :
642 : /* Three Level Decision Tree to calculate a regression value first */
643 35760 : if ( t < thr[0] ) /* level 1 */
644 : {
645 26551 : if ( t < thr[1] ) /* level 2 */
646 : {
647 23236 : regression = ( v < thr[3] ) ? regV[0] : regV[1]; /* level 3 */
648 : }
649 : else
650 : {
651 3315 : regression = ( v < thr[4] ) ? regV[2] : regV[3]; /* level 3 */
652 : }
653 : }
654 : else
655 : {
656 9209 : if ( t < thr[2] ) /* level 2 */
657 : {
658 2366 : regression = ( v < thr[5] ) ? regV[4] : regV[5]; /* level 3 */
659 : }
660 : else
661 : {
662 6843 : regression = ( v < thr[6] ) ? regV[6] : regV[7]; /* level 3 */
663 : }
664 : }
665 :
666 : /* Convert the regression to a hard decision (classification) */
667 35760 : if ( regression > 0.79f && !( st->bwidth < SWB || hCPE->hCoreCoder[0]->vad_flag == 0 ) )
668 : {
669 5796 : hCPE->hStereoICBWE->MSFlag = 1;
670 : }
671 : }
672 :
673 160171 : if ( st->extl != WB_TBE && st->extl != SWB_TBE && st->extl != FB_TBE )
674 : {
675 : /* Update the previous superwideband speech buffer in case of a SWB_BWE frame - this code is in swb_tbe_enc */
676 39514 : delay = L_LOOK_16k + L_SUBFR16k;
677 39514 : mvr2r( shb_speech + L_FRAME16k - delay, hBWE_TD->old_speech_shb, delay );
678 : }
679 : }
680 : else
681 : {
682 238920 : if ( ( st->bwidth == FB || st->core == ACELP_CORE ) && ( st->element_mode == EVS_MONO ) )
683 : {
684 669 : InitSWBencBufferStates( st->hBWE_TD, shb_speech );
685 : }
686 : else
687 : {
688 238251 : if ( st->element_mode == IVAS_CPE_DFT )
689 : {
690 12399 : if ( st->L_frame == L_FRAME )
691 : {
692 3571 : L_resamp = 560; /* 6.4 kHz core -> 6 - 14 kHz SHB target. 20 ms is 560 samples in 28 kHz sample rate */
693 : }
694 : else
695 : {
696 8828 : L_resamp = 620; /* 8 kHz core -> 7.5 - 15.5 kHz SHB target. 20 ms is 620 samples in 31 kHz sample rate */
697 : }
698 :
699 : /* Dirty downsampling to match Nyquist to upper frequency limit of target */
700 12399 : lerp( st->input, new_swb_speech, L_resamp, (int16_t) ( input_Fs / 50 ) );
701 :
702 : /* flip the spectrum */
703 12399 : mvr2r( new_swb_speech, spchTmp, L_resamp );
704 3748959 : for ( i = 0; i < L_resamp; i = i + 2 )
705 : {
706 3736560 : spchTmp[i] = -spchTmp[i];
707 : }
708 :
709 : /* Dirty upsampling to match Nyquist/2 to lower frequency limit of target (reversed spectrum)*/
710 12399 : lerp( spchTmp, spchTmp2, L_FRAME32k, L_resamp );
711 12399 : mvr2r( spchTmp2, spchTmp, L_FRAME32k );
712 : }
713 : else
714 : {
715 : /* flip the spectrum */
716 225852 : mvr2r( new_swb_speech, spchTmp, L_FRAME32k );
717 :
718 72498492 : for ( i = 0; i < L_FRAME32k; i = i + 2 )
719 : {
720 72272640 : spchTmp[i] = -spchTmp[i];
721 : }
722 : }
723 :
724 238251 : Decimate_allpass_steep( spchTmp, hBWE_TD->state_ana_filt_shb, L_FRAME32k, shb_speech );
725 :
726 238251 : mvr2r( shb_speech + L_FRAME16k - ( L_LOOK_16k + L_SUBFR16k ), hBWE_TD->old_speech_shb, L_LOOK_16k + L_SUBFR16k );
727 :
728 : /*Compute the past overlap for potential next iDFTs SHB*/
729 238251 : if ( st->element_mode == IVAS_CPE_DFT )
730 : {
731 1748259 : for ( i = 0; i < STEREO_DFT_OVL_16k; i++ )
732 : {
733 1735860 : hCPE->hStereoDft->output_mem_dmx_16k_shb[i] = shb_speech[20 + i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i];
734 : }
735 : }
736 : }
737 :
738 238920 : if ( st->element_mode != IVAS_CPE_DFT )
739 : {
740 : /* Reset CLDFB synthesis buffer */
741 226521 : set_f( st->cldfbSynTd->cldfb_state, 0.0f, st->cldfbSynTd->p_filter_length );
742 : }
743 : else
744 : {
745 12399 : hCPE->hStereoDft->flip_sign = -hCPE->hStereoDft->flip_sign; /* Make sure sign is updated even if DFT SHB target is not generated */
746 : }
747 : }
748 :
749 : /* Memory reset to compensate for 0.9375 ms offset when transitioning from IO to SWB */
750 : /* When switching from n >1 to n = 1, we keep the enc/dec delay as 8.75/3.25 and below code not needed;
751 : only when n = 1 start, it will be 9.6875/2.3125 in that case this reset is needed for IO->BWE.*/
752 399091 : if ( st->last_extl == -1 && st->element_mode == EVS_MONO )
753 : {
754 439 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
755 19684 : for ( i = 0; i < delay; i++ )
756 : {
757 19245 : shb_speech[i] = (float) i * ( 0.03f * shb_speech[2 * delay - 1 - i] );
758 : }
759 : }
760 :
761 399091 : return;
762 : }
|