Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "rom_com.h"
46 : #include "rom_enc.h"
47 : #include "wmc_auto.h"
48 : #include "ivas_prot.h"
49 : #include "ivas_rom_enc.h"
50 :
51 : /*-------------------------------------------------------------------*
52 : * Local constants
53 : *
54 : *-------------------------------------------------------------------*/
55 :
56 : #define CLDFB_NO_CHANNELS_HB 20
57 :
58 :
59 : /*-------------------------------------------------------------------*
60 : * wb_pre_proc()
61 : *
62 : * - Resampling of input signal when input signal sampling rate
63 : * is above 16kHz
64 : * - Common WB TBE and WB BWE pre-processing
65 : *-------------------------------------------------------------------*/
66 :
67 817356 : void wb_pre_proc(
68 : Encoder_State *st, /* i/o: encoder state structure */
69 : const int16_t last_element_mode, /* i : last element mode */
70 : const float *new_inp_resamp16k, /* i : original input signal */
71 : float *hb_speech /* o : HB target signal (6-8kHz) at 16kHz */
72 : )
73 : {
74 : int16_t Sample_Delay_WB_BWE, ramp_flag;
75 : TD_BWE_ENC_HANDLE hBWE_TD;
76 : FD_BWE_ENC_HANDLE hBWE_FD;
77 : float decim_state1[( 2 * ALLPASSSECTIONS_STEEP + 1 )], decim_state2[( 2 * ALLPASSSECTIONS_STEEP + 1 )];
78 : float old_input[NS2SA( 16000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + STEREO_DFT_OVL_16k + L_FRAME16k];
79 : float *highband_new_speech, highband_old_speech[( L_LOOK_12k8 + L_SUBFR + L_FRAME ) * 5 / 16];
80 817356 : int16_t fSwitchFromIO = 0;
81 : int16_t ppp_mode;
82 :
83 817356 : hBWE_TD = st->hBWE_TD;
84 817356 : hBWE_FD = st->hBWE_FD;
85 :
86 817356 : if ( st->Opt_SC_VBR )
87 : {
88 1330 : ppp_mode = st->hSC_VBR->ppp_mode;
89 : }
90 : else
91 : {
92 816026 : ppp_mode = 0;
93 : }
94 :
95 817356 : if ( ( st->last_total_brate == ACELP_6k60 ) ||
96 817087 : ( st->last_total_brate == ACELP_8k85 ) ||
97 817066 : ( st->last_total_brate == ACELP_12k65 ) ||
98 815972 : ( st->last_total_brate == ACELP_14k25 ) ||
99 815119 : ( st->last_total_brate == ACELP_15k85 ) ||
100 814051 : ( st->last_total_brate == ACELP_18k25 ) ||
101 813969 : ( st->last_total_brate == ACELP_19k85 ) ||
102 813839 : ( st->last_total_brate == ACELP_23k05 ) ||
103 811407 : ( st->last_total_brate == ACELP_23k85 ) )
104 : {
105 9076 : fSwitchFromIO = 1;
106 : }
107 :
108 817356 : set_f( old_input, 0, NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + STEREO_DFT_OVL_16k + L_FRAME16k );
109 :
110 817356 : if ( st->extl == WB_BWE || st->extl == WB_TBE || st->igf )
111 : {
112 410747 : ramp_flag = 0;
113 410747 : if ( ( st->last_extl != WB_TBE && st->last_extl != WB_BWE && !st->igf ) || ( st->igf && fSwitchFromIO ) )
114 : {
115 8306 : ramp_flag = 1;
116 : }
117 :
118 410747 : if ( !ppp_mode )
119 : {
120 410747 : if ( st->element_mode == IVAS_CPE_DFT )
121 : {
122 85365 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
123 :
124 85365 : if ( last_element_mode == IVAS_CPE_TD )
125 : {
126 3 : set_f( hBWE_TD->decim_state1, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
127 3 : set_f( hBWE_TD->decim_state2, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
128 3 : ramp_flag = 1;
129 : }
130 :
131 : /*Get past signal*/
132 85365 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - STEREO_DFT_OVL_16k, old_input, STEREO_DFT_OVL_16k );
133 85365 : mvr2r( hBWE_FD->old_input_wb, old_input + STEREO_DFT_OVL_16k, Sample_Delay_WB_BWE );
134 :
135 : /*Get new signal*/
136 85365 : mvr2r( new_inp_resamp16k, &old_input[Sample_Delay_WB_BWE + STEREO_DFT_OVL_16k], L_FRAME16k );
137 :
138 : /*compute hb_speech on delayed input*/
139 85365 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
140 :
141 : /*Update memory*/
142 85365 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( L_SUBFR * 5 / 16 ), STEREO_DFT_OVL_16k / 4 );
143 85365 : mvr2r( hb_speech + STEREO_DFT_OVL_16k / 4, hb_speech, ( L_FRAME16k - STEREO_DFT_OVL_16k ) / 4 );
144 :
145 : /*rest without memory update*/
146 85365 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
147 85365 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
148 :
149 85365 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k - STEREO_DFT_OVL_16k ) / 4, STEREO_DFT_OVL_16k, decim_state1, decim_state2, 0 );
150 : }
151 325382 : else if ( st->element_mode == IVAS_CPE_TD )
152 : {
153 3374 : int16_t l_recalc_16k = L_MEM_RECALC_16K + L_FILT16k + 1; /* Note: "+1" is used because L_FILT16k is not divisible by 4 */
154 3374 : int16_t l_recalc_4k = ( L_MEM_RECALC_16K + L_FILT16k + 1 ) / 4;
155 3374 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
156 :
157 3374 : if ( last_element_mode == IVAS_CPE_DFT )
158 : {
159 52 : set_f( hBWE_TD->decim_state1, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
160 52 : set_f( hBWE_TD->decim_state2, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
161 52 : ramp_flag = 1;
162 : }
163 :
164 : /*Get past signal*/
165 3374 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - l_recalc_16k, old_input, l_recalc_16k );
166 3374 : mvr2r( hBWE_FD->old_input_wb, old_input + l_recalc_16k, Sample_Delay_WB_BWE );
167 :
168 3374 : old_input[Sample_Delay_WB_BWE] = hBWE_FD->mem_old_wtda_swb;
169 :
170 : /*Get new signal*/
171 3374 : mvr2r( new_inp_resamp16k, old_input + Sample_Delay_WB_BWE + l_recalc_16k, L_FRAME16k );
172 :
173 : /*compute hb_speech on delayed input*/
174 3374 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
175 :
176 : /*Update memory*/
177 3374 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 ) - l_recalc_4k, l_recalc_4k );
178 3374 : mvr2r( hb_speech + l_recalc_4k, hb_speech, ( L_FRAME16k / 4 ) - l_recalc_4k );
179 :
180 : /*rest without memory update*/
181 3374 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
182 3374 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
183 :
184 3374 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k / 4 ) - l_recalc_4k, l_recalc_16k, decim_state1, decim_state2, 0 );
185 : }
186 322008 : else if ( st->element_mode == IVAS_SCE )
187 : {
188 311805 : if ( st->input_Fs == 16000 )
189 : {
190 8559 : flip_spectrum_and_decimby4( new_inp_resamp16k, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
191 : }
192 : else
193 : {
194 303246 : int16_t l_recalc_16k = L_FILT16k + 1; /* Note: "+1" is used because L_FILT16k is not divisible by 4 */
195 303246 : int16_t l_recalc_4k = ( L_FILT16k + 1 ) / 4;
196 :
197 303246 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
198 :
199 : /*Get past signal*/
200 303246 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - l_recalc_16k, old_input, l_recalc_16k );
201 303246 : mvr2r( hBWE_FD->old_input_wb, old_input + l_recalc_16k, Sample_Delay_WB_BWE );
202 :
203 : /*Get new signal*/
204 303246 : mvr2r( new_inp_resamp16k, old_input + Sample_Delay_WB_BWE + l_recalc_16k, L_FRAME16k );
205 :
206 : /*compute hb_speech on delayed input*/
207 303246 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
208 :
209 : /* update hBWE_TD->old_speech_wb memory */
210 303246 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 ) - l_recalc_4k, l_recalc_4k );
211 303246 : mvr2r( hb_speech + l_recalc_4k, hb_speech, ( L_FRAME16k / 4 ) - l_recalc_4k );
212 :
213 : /*rest without memory update*/
214 303246 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
215 303246 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
216 :
217 303246 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k / 4 ) - l_recalc_4k, l_recalc_16k, decim_state1, decim_state2, 0 );
218 : }
219 : }
220 : else
221 : {
222 10203 : flip_spectrum_and_decimby4( new_inp_resamp16k, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
223 : }
224 :
225 410747 : if ( st->extl != WB_TBE )
226 : {
227 : /* Update the previous wideband speech buffer in case of a WB_BWE frame - this code is in wb_tbe_enc */
228 335669 : Sample_Delay_WB_BWE = ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16;
229 :
230 335669 : highband_new_speech = highband_old_speech + Sample_Delay_WB_BWE;
231 :
232 335669 : mvr2r( hBWE_TD->old_speech_wb, highband_old_speech, Sample_Delay_WB_BWE );
233 335669 : mvr2r( hb_speech, highband_new_speech, L_FRAME16k / 4 );
234 335669 : mvr2r( highband_old_speech + L_FRAME16k / 4, hBWE_TD->old_speech_wb, Sample_Delay_WB_BWE );
235 : }
236 : }
237 : }
238 : else
239 : {
240 406609 : set_f( hBWE_TD->decim_state1, 0.0f, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
241 406609 : set_f( hBWE_TD->decim_state2, 0.0f, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
242 406609 : set_f( hBWE_TD->old_speech_wb, 0.0f, ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 );
243 : }
244 :
245 817356 : if ( hBWE_FD != NULL )
246 : {
247 817356 : hBWE_FD->mem_old_wtda_swb = new_inp_resamp16k[L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k - 1];
248 : }
249 :
250 : /* st->old_input_wb and st->old_wtda_wb must be updated each frame, or there are often some clicks during WB TBE <-> WB BWE switching */
251 817356 : if ( ( st->extl != WB_BWE || ( st->extl == WB_BWE && st->extl_brate == 0 ) ) && !ppp_mode )
252 : {
253 789509 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
254 :
255 789509 : mvr2r( new_inp_resamp16k, &old_input[Sample_Delay_WB_BWE], L_FRAME16k );
256 789509 : mvr2r( hBWE_FD->old_input_wb, old_input, Sample_Delay_WB_BWE );
257 789509 : mvr2r( new_inp_resamp16k + L_FRAME16k - Sample_Delay_WB_BWE, hBWE_FD->old_input_wb, Sample_Delay_WB_BWE );
258 789509 : if ( ( st->extl != SWB_BWE ) && ( st->extl != FB_BWE ) )
259 : {
260 789509 : mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME16k );
261 : }
262 : }
263 :
264 817356 : return;
265 : }
266 :
267 :
268 : /*-------------------------------------------------------------------*
269 : * swb_pre_proc()
270 : *
271 : * - Calculate the 6 to 14 kHz (or 7.5 - 15.5 kHz) SHB target signal
272 : * for SWB TBE or SWB BWE coding
273 : * - Common SWB TBE and SWB BWE pre-processing
274 : *-------------------------------------------------------------------*/
275 :
276 4186726 : void swb_pre_proc(
277 : Encoder_State *st, /* i/o: encoder state structure */
278 : float *new_swb_speech, /* o : original input signal at 32kHz */
279 : float *shb_speech, /* o : SHB target signal (6-14kHz) at 16kHz */
280 : float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : real buffer */
281 : float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : imag buffer */
282 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
283 : )
284 : {
285 : int16_t Sample_Delay_SWB_BWE, inner_frame, delay;
286 : TD_BWE_ENC_HANDLE hBWE_TD;
287 : FD_BWE_ENC_HANDLE hBWE_FD;
288 : int32_t inner_Fs, input_Fs;
289 : float old_input[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k];
290 : float spchTmp[L_FRAME32k], spchTmp2[L_FRAME32k];
291 : int16_t i, j, L_resamp;
292 : int16_t startB, endB;
293 : float *realBufferFlipped[CLDFB_NO_COL_MAX];
294 : float *imagBufferFlipped[CLDFB_NO_COL_MAX];
295 : float realBufferTmp[CLDFB_NO_COL_MAX][20];
296 : float imagBufferTmp[CLDFB_NO_COL_MAX][20];
297 : int16_t ts, nB, uB;
298 : float sign, lbEner, v, t, regression;
299 : const float *thr, *regV;
300 : int16_t Sample_Delay_SWB_BWE32k, lMemRecalc32k, dft_ovl32k;
301 :
302 4186726 : lMemRecalc32k = NS2SA( 32000, L_MEM_RECALC_NS );
303 :
304 : /* initialization */
305 4186726 : hBWE_TD = st->hBWE_TD;
306 4186726 : hBWE_FD = st->hBWE_FD;
307 4186726 : input_Fs = st->input_Fs;
308 :
309 71174342 : for ( j = 0; j < CLDFB_NO_COL_MAX; j++ )
310 : {
311 66987616 : set_f( realBufferTmp[j], 0, 20 );
312 66987616 : set_f( imagBufferTmp[j], 0, 20 );
313 66987616 : realBufferFlipped[j] = realBufferTmp[j];
314 66987616 : imagBufferFlipped[j] = imagBufferTmp[j];
315 : }
316 :
317 4186726 : set_f( old_input, 0.0f, NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k );
318 :
319 4186726 : if ( input_Fs == 32000 )
320 : {
321 828554 : if ( st->element_mode > EVS_MONO )
322 : {
323 793257 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
324 793257 : if ( st->L_frame == L_FRAME16k )
325 : {
326 486649 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
327 : }
328 :
329 793257 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
330 793257 : mvr2r( st->input - L_FRAME32k, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
331 :
332 793257 : if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
333 : {
334 13908 : mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
335 : }
336 779349 : else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
337 : {
338 287344 : mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
339 : }
340 : }
341 :
342 828554 : mvr2r( st->input, new_swb_speech, L_FRAME32k );
343 :
344 828554 : if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE && st->extl != SWB_BWE_HIGHRATE )
345 : {
346 808662 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
347 808662 : if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
348 : {
349 482745 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
350 : }
351 808662 : if ( st->element_mode > EVS_MONO )
352 : {
353 775816 : Sample_Delay_SWB_BWE -= NS2SA( 32000, DELAY_FIR_RESAMPL_NS );
354 : }
355 :
356 808662 : mvr2r( hBWE_FD->old_fdbwe_speech, &old_input[Sample_Delay_SWB_BWE], L_FRAME32k );
357 :
358 808662 : set_f( old_input, 0, Sample_Delay_SWB_BWE );
359 808662 : mvr2r( hBWE_FD->old_fdbwe_speech + L_FRAME32k - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
360 808662 : if ( st->extl != WB_BWE )
361 : {
362 805559 : mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME32k );
363 : }
364 : }
365 :
366 828554 : if ( st->extl != SWB_BWE && st->extl != FB_BWE )
367 : {
368 809286 : mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
369 : }
370 : }
371 : else /* 48 kHz */
372 : {
373 :
374 3358172 : Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
375 3358172 : Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS );
376 3358172 : if ( st->L_frame == L_FRAME16k )
377 : {
378 1297036 : Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
379 1297036 : Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_16k_NS );
380 : }
381 :
382 3358172 : dft_ovl32k = 0;
383 3358172 : if ( st->element_mode == IVAS_CPE_DFT )
384 : {
385 354053 : dft_ovl32k = (int16_t) ( STEREO_DFT_OVL_MAX * 32000 / 48000 );
386 : }
387 :
388 3358172 : if ( st->codec_mode == MODE1 )
389 : {
390 3341553 : if ( st->element_mode > EVS_MONO )
391 : {
392 :
393 3326171 : if ( st->element_mode == IVAS_CPE_TD )
394 : {
395 : }
396 3289165 : else if ( st->bwidth == FB )
397 : {
398 1928451 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
399 : }
400 :
401 3326171 : mvr2r( st->input - L_FRAME48k, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
402 :
403 3326171 : if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
404 : {
405 36535 : if ( st->bwidth == SWB )
406 : {
407 : /* buffers hBWE_FD->old_input[] and hBWE_FD->old_wtda_swb[] need to be at 32 kHz (inner) sampling rate */
408 :
409 5872 : decimate_2_over_3_allpass( st->input - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, spchTmp, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
410 :
411 5872 : mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( lMemRecalc32k - Sample_Delay_SWB_BWE32k ), lMemRecalc32k - Sample_Delay_SWB_BWE32k );
412 5872 : mvr2r( spchTmp + lMemRecalc32k - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
413 : }
414 : else /* FB_BWE */
415 : {
416 30663 : mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
417 30663 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
418 : }
419 : }
420 3289636 : else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
421 : {
422 335396 : if ( st->bwidth == SWB )
423 : {
424 175261 : lerp( st->input - hCPE->hStereoDft->dft_ovl, spchTmp, dft_ovl32k - Sample_Delay_SWB_BWE32k, hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
425 :
426 175261 : mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( dft_ovl32k - Sample_Delay_SWB_BWE32k ), dft_ovl32k - Sample_Delay_SWB_BWE32k );
427 : }
428 : else
429 : {
430 160135 : mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
431 : }
432 : }
433 : }
434 :
435 3341553 : if ( ( st->extl != SWB_BWE && st->extl != FB_BWE && st->core == ACELP_CORE ) || ( st->element_mode == IVAS_CPE_DFT && st->core != ACELP_CORE ) /*resampling not needed for MDCT cores*/ )
436 : {
437 : /* move the resampling out of the TDBWE path as new_swb_speech is not needed for TDBWE. */
438 1260738 : mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
439 : }
440 : else
441 : {
442 2080815 : if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE )
443 : {
444 : /* resample 48 kHz to 32kHz */
445 : #ifdef NONBE_1244_FIX_SWB_BWE_MEMORY
446 2013801 : if ( ( st->last_bwidth == FB && st->element_mode == EVS_MONO ) || ( st->bwidth == FB && st->element_mode > EVS_MONO ) ) // note: once EVS i CR fixed, the condition will simplify to "if ( st->bwidth == FB )" only
447 : #else
448 : if ( st->last_bwidth == FB )
449 : #endif
450 : {
451 1493699 : inner_frame = L_FRAME48k;
452 1493699 : inner_Fs = 48000;
453 1493699 : mvr2r( hBWE_FD->old_fdbwe_speech, new_swb_speech, L_FRAME48k );
454 : }
455 : else
456 : {
457 520102 : inner_frame = L_FRAME32k;
458 520102 : inner_Fs = 32000;
459 :
460 520102 : if ( st->element_mode != IVAS_CPE_DFT )
461 : {
462 515093 : decimate_2_over_3_allpass( hBWE_FD->old_fdbwe_speech, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
463 : }
464 : else
465 : {
466 5009 : lerp( hBWE_FD->old_fdbwe_speech, new_swb_speech, inner_frame, L_FRAME48k );
467 : }
468 :
469 520102 : if ( st->element_mode == IVAS_CPE_DFT && st->idchan == 0 )
470 : {
471 1407529 : for ( i = 0; i < STEREO_DFT_OVL_32k; i++ )
472 : {
473 1402520 : hCPE->hStereoDft->output_mem_dmx_32k[i] = new_swb_speech[inner_frame - STEREO_DFT_OVL_32k + i] * hCPE->hStereoDft->win_32k[STEREO_DFT_OVL_32k - 1 - i];
474 : }
475 : }
476 : }
477 :
478 2013801 : Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
479 2013801 : if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
480 : {
481 720152 : Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
482 : }
483 2013801 : if ( st->element_mode > EVS_MONO )
484 : {
485 2008287 : Sample_Delay_SWB_BWE -= NS2SA( inner_Fs, DELAY_FIR_RESAMPL_NS );
486 : }
487 :
488 2013801 : mvr2r( new_swb_speech, &old_input[Sample_Delay_SWB_BWE], inner_frame );
489 2013801 : set_f( old_input, 0, Sample_Delay_SWB_BWE );
490 2013801 : mvr2r( new_swb_speech + inner_frame - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
491 2013801 : mvr2r( old_input, hBWE_FD->old_wtda_swb, inner_frame );
492 : }
493 :
494 : /* resample 48 kHz to 32kHz */
495 2080815 : if ( st->bwidth == FB )
496 : {
497 1511319 : mvr2r( st->input, new_swb_speech, L_FRAME48k );
498 : }
499 : else
500 : {
501 569496 : if ( st->element_mode == IVAS_CPE_TD )
502 : {
503 : float dec_2_over_3_mem_tmp[L_FILT_2OVER3], dec_2_over_3_mem_lp_tmp[L_FILT_2OVER3_LP];
504 :
505 353 : decimate_2_over_3_allpass( st->input, L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
506 :
507 353 : mvr2r( hBWE_TD->dec_2_over_3_mem, dec_2_over_3_mem_tmp, L_FILT_2OVER3 );
508 353 : mvr2r( hBWE_TD->dec_2_over_3_mem_lp, dec_2_over_3_mem_lp_tmp, L_FILT_2OVER3_LP );
509 :
510 353 : decimate_2_over_3_allpass( st->input + L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, new_swb_speech + L_FRAME32k - lMemRecalc32k, dec_2_over_3_mem_tmp, dec_2_over_3_mem_lp_tmp );
511 : }
512 569143 : else if ( st->element_mode != IVAS_CPE_DFT )
513 : {
514 543900 : decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
515 : }
516 : else /* IVAS_CPE_DFT */
517 : {
518 25243 : stereo_dft_enc_synthesize( hCPE->hStereoDft, new_swb_speech, st->idchan, input_Fs, 32000, 0 );
519 :
520 25243 : mvr2r( new_swb_speech - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
521 : }
522 : }
523 : }
524 : }
525 : else
526 : {
527 : /* resample 48 kHz to 32kHz */
528 16619 : if ( st->bwidth == FB )
529 : {
530 3155 : mvr2r( st->input, new_swb_speech, L_FRAME48k );
531 : }
532 : else
533 : {
534 13464 : decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
535 : }
536 : }
537 : }
538 :
539 4186726 : if ( ( st->core == ACELP_CORE && st->extl != SWB_BWE_HIGHRATE && st->extl != FB_BWE_HIGHRATE ) ||
540 2634961 : ( ( st->total_brate == ACELP_9k60 || st->rf_mode ) && st->bwidth == SWB && st->element_mode == EVS_MONO ) )
541 1552454 : {
542 1552454 : float CldfbHB = 0;
543 :
544 1552454 : if ( st->element_mode == IVAS_CPE_DFT )
545 : {
546 359739 : CldfbHB = stereo_dft_enc_synthesize( hCPE->hStereoDft, old_input + STEREO_DFT_OVL_16k, st->idchan, input_Fs, 16000, st->L_frame );
547 :
548 : /* delay corresponding to CLDFB delay */
549 359739 : mvr2r( old_input + STEREO_DFT_OVL_16k - 20, shb_speech, L_FRAME16k );
550 359739 : mvr2r( old_input, hBWE_TD->old_speech_shb + L_LOOK_16k + L_SUBFR16k - ( STEREO_DFT_OVL_16k - 20 ), STEREO_DFT_OVL_16k - 20 );
551 359739 : mvr2r( old_input, hCPE->hStereoICBWE->mem_shb_speech_ref, STEREO_DFT_OVL_16k - 20 );
552 :
553 359739 : if ( CldfbHB <= 0 )
554 : {
555 14607 : CldfbHB = 1.0f;
556 : }
557 359739 : hBWE_TD->cldfbHBLT = 0.9f * hBWE_TD->cldfbHBLT + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
558 :
559 359739 : lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
560 359739 : hCPE->hStereoICBWE->icbweRefEner = 0.05f * (float) sqrt( hCPE->hStereoDft->icbweRefEner );
561 359739 : lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
562 359739 : thr = icbwe_thr_DFT;
563 359739 : regV = icbwe_regressionValuesDFT;
564 : }
565 : else
566 : {
567 1192715 : if ( st->L_frame == L_FRAME )
568 : {
569 729386 : startB = 34;
570 729386 : endB = 14;
571 12399562 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
572 : {
573 245073696 : for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
574 : {
575 233403520 : sign = ( ts % 2 ) ? 1.0f : -1.0f;
576 233403520 : realBufferFlipped[ts][uB] = -sign * realBuffer[ts][nB];
577 233403520 : imagBufferFlipped[ts][uB] = sign * imagBuffer[ts][nB];
578 : }
579 : }
580 : }
581 : else
582 : {
583 463329 : startB = 39;
584 463329 : endB = 19;
585 7876593 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
586 : {
587 155678544 : for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
588 : {
589 148265280 : realBufferFlipped[ts][uB] = -realBuffer[ts][nB];
590 148265280 : imagBufferFlipped[ts][uB] = imagBuffer[ts][nB];
591 : }
592 : }
593 : }
594 :
595 13119865 : for ( nB = 0; nB < 10; nB++ )
596 : {
597 202761550 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
598 : {
599 190834400 : CldfbHB += ( realBufferFlipped[ts][nB] * realBufferFlipped[ts][nB] + imagBufferFlipped[ts][nB] * imagBufferFlipped[ts][nB] );
600 : }
601 : }
602 1192715 : if ( CldfbHB <= 0 )
603 : {
604 19096 : CldfbHB = 1.0f;
605 : }
606 1192715 : hBWE_TD->cldfbHBLT = 0.9f * hBWE_TD->cldfbHBLT + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
607 :
608 1192715 : if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
609 : {
610 1553 : hCPE->hStereoICBWE->icbweRefEner = EPSILON;
611 32613 : for ( nB = 20; nB < 40; nB++ )
612 : {
613 528020 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
614 : {
615 496960 : hCPE->hStereoICBWE->icbweRefEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
616 : }
617 : }
618 1553 : hCPE->hStereoICBWE->icbweRefEner = 0.05f * sqrtf( hCPE->hStereoICBWE->icbweRefEner );
619 : }
620 :
621 1192715 : lbEner = EPSILON;
622 25047015 : for ( nB = 0; nB < 20; nB++ )
623 : {
624 405523100 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
625 : {
626 381668800 : lbEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
627 : }
628 : }
629 1192715 : lbEner = 0.05f * sqrtf( lbEner );
630 1192715 : thr = icbwe_thr_TDM;
631 1192715 : regV = icbwe_regressionValuesTDM;
632 :
633 1192715 : cldfbSynthesis( realBufferFlipped, imagBufferFlipped, shb_speech, -1, st->cldfbSynTd );
634 : }
635 :
636 1552454 : if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
637 : {
638 361292 : hCPE->hStereoICBWE->MSFlag = 0; /* Init the multi-source flag */
639 361292 : v = 0.3333f * sum_f( st->voicing, 3 );
640 361292 : t = log10f( ( hCPE->hStereoICBWE->icbweRefEner + 1e-6f ) / ( lbEner + 1e-6f ) );
641 :
642 : /* Three Level Decision Tree to calculate a regression value first */
643 361292 : if ( t < thr[0] ) /* level 1 */
644 : {
645 258074 : if ( t < thr[1] ) /* level 2 */
646 : {
647 225330 : regression = ( v < thr[3] ) ? regV[0] : regV[1]; /* level 3 */
648 : }
649 : else
650 : {
651 32744 : regression = ( v < thr[4] ) ? regV[2] : regV[3]; /* level 3 */
652 : }
653 : }
654 : else
655 : {
656 103218 : if ( t < thr[2] ) /* level 2 */
657 : {
658 15852 : regression = ( v < thr[5] ) ? regV[4] : regV[5]; /* level 3 */
659 : }
660 : else
661 : {
662 87366 : regression = ( v < thr[6] ) ? regV[6] : regV[7]; /* level 3 */
663 : }
664 : }
665 :
666 : /* Convert the regression to a hard decision (classification) */
667 361292 : if ( regression > 0.79f && !( st->bwidth < SWB || hCPE->hCoreCoder[0]->vad_flag == 0 ) )
668 : {
669 74222 : hCPE->hStereoICBWE->MSFlag = 1;
670 : }
671 : }
672 :
673 1552454 : if ( st->extl != WB_TBE && st->extl != SWB_TBE && st->extl != FB_TBE )
674 : {
675 : /* Update the previous superwideband speech buffer in case of a SWB_BWE frame - this code is in swb_tbe_enc */
676 463808 : delay = L_LOOK_16k + L_SUBFR16k;
677 463808 : mvr2r( shb_speech + L_FRAME16k - delay, hBWE_TD->old_speech_shb, delay );
678 : }
679 : }
680 : else
681 : {
682 2634272 : if ( ( st->bwidth == FB || st->core == ACELP_CORE ) && ( st->element_mode == EVS_MONO ) )
683 : {
684 12643 : InitSWBencBufferStates( st->hBWE_TD, shb_speech );
685 : }
686 : else
687 : {
688 2621629 : if ( st->element_mode == IVAS_CPE_DFT )
689 : {
690 299976 : if ( st->L_frame == L_FRAME )
691 : {
692 97181 : L_resamp = 560; /* 6.4 kHz core -> 6 - 14 kHz SHB target. 20 ms is 560 samples in 28 kHz sample rate */
693 : }
694 : else
695 : {
696 202795 : L_resamp = 620; /* 8 kHz core -> 7.5 - 15.5 kHz SHB target. 20 ms is 620 samples in 31 kHz sample rate */
697 : }
698 :
699 : /* Dirty downsampling to match Nyquist to upper frequency limit of target */
700 299976 : lerp( st->input, new_swb_speech, L_resamp, (int16_t) ( input_Fs / 50 ) );
701 :
702 : /* flip the spectrum */
703 299976 : mvr2r( new_swb_speech, spchTmp, L_resamp );
704 90377106 : for ( i = 0; i < L_resamp; i = i + 2 )
705 : {
706 90077130 : spchTmp[i] = -spchTmp[i];
707 : }
708 :
709 : /* Dirty upsampling to match Nyquist/2 to lower frequency limit of target (reversed spectrum)*/
710 299976 : lerp( spchTmp, spchTmp2, L_FRAME32k, L_resamp );
711 299976 : mvr2r( spchTmp2, spchTmp, L_FRAME32k );
712 : }
713 : else
714 : {
715 : /* flip the spectrum */
716 2321653 : mvr2r( new_swb_speech, spchTmp, L_FRAME32k );
717 :
718 745250613 : for ( i = 0; i < L_FRAME32k; i = i + 2 )
719 : {
720 742928960 : spchTmp[i] = -spchTmp[i];
721 : }
722 : }
723 :
724 2621629 : Decimate_allpass_steep( spchTmp, hBWE_TD->state_ana_filt_shb, L_FRAME32k, shb_speech );
725 :
726 2621629 : mvr2r( shb_speech + L_FRAME16k - ( L_LOOK_16k + L_SUBFR16k ), hBWE_TD->old_speech_shb, L_LOOK_16k + L_SUBFR16k );
727 :
728 : /*Compute the past overlap for potential next iDFTs SHB*/
729 2621629 : if ( st->element_mode == IVAS_CPE_DFT )
730 : {
731 42296616 : for ( i = 0; i < STEREO_DFT_OVL_16k; i++ )
732 : {
733 41996640 : hCPE->hStereoDft->output_mem_dmx_16k_shb[i] = shb_speech[20 + i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i];
734 : }
735 : }
736 : }
737 :
738 2634272 : if ( st->element_mode != IVAS_CPE_DFT )
739 : {
740 : /* Reset CLDFB synthesis buffer */
741 2334296 : set_f( st->cldfbSynTd->cldfb_state, 0.0f, st->cldfbSynTd->p_filter_length );
742 : }
743 : else
744 : {
745 299976 : hCPE->hStereoDft->flip_sign = -hCPE->hStereoDft->flip_sign; /* Make sure sign is updated even if DFT SHB target is not generated */
746 : }
747 : }
748 :
749 : /* Memory reset to compensate for 0.9375 ms offset when transitioning from IO to SWB */
750 : /* When switching from n >1 to n = 1, we keep the enc/dec delay as 8.75/3.25 and below code not needed;
751 : only when n = 1 start, it will be 9.6875/2.3125 in that case this reset is needed for IO->BWE.*/
752 4186726 : if ( st->last_extl == -1 && st->element_mode == EVS_MONO )
753 : {
754 12249 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
755 475419 : for ( i = 0; i < delay; i++ )
756 : {
757 463170 : shb_speech[i] = (float) i * ( 0.03f * shb_speech[2 * delay - 1 - i] );
758 : }
759 : }
760 :
761 4186726 : return;
762 : }
|