Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include <math.h>
43 : #include "cnst.h"
44 : #include "prot.h"
45 : #include "rom_com.h"
46 : #include "rom_enc.h"
47 : #include "wmc_auto.h"
48 : #include "ivas_prot.h"
49 : #include "ivas_rom_enc.h"
50 :
51 : /*-------------------------------------------------------------------*
52 : * Local constants
53 : *
54 : *-------------------------------------------------------------------*/
55 :
56 : #define CLDFB_NO_CHANNELS_HB 20
57 :
58 :
59 : /*-------------------------------------------------------------------*
60 : * wb_pre_proc()
61 : *
62 : * - Resampling of input signal when input signal sampling rate
63 : * is above 16kHz
64 : * - Common WB TBE and WB BWE pre-processing
65 : *-------------------------------------------------------------------*/
66 :
67 95169 : void wb_pre_proc(
68 : Encoder_State *st, /* i/o: encoder state structure */
69 : const int16_t last_element_mode, /* i : last element mode */
70 : const float *new_inp_resamp16k, /* i : original input signal */
71 : float *hb_speech /* o : HB target signal (6-8kHz) at 16kHz */
72 : )
73 : {
74 : int16_t Sample_Delay_WB_BWE, ramp_flag;
75 : TD_BWE_ENC_HANDLE hBWE_TD;
76 : FD_BWE_ENC_HANDLE hBWE_FD;
77 : float decim_state1[( 2 * ALLPASSSECTIONS_STEEP + 1 )], decim_state2[( 2 * ALLPASSSECTIONS_STEEP + 1 )];
78 : float old_input[NS2SA( 16000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + STEREO_DFT_OVL_16k + L_FRAME16k];
79 : float *highband_new_speech, highband_old_speech[( L_LOOK_12k8 + L_SUBFR + L_FRAME ) * 5 / 16];
80 95169 : int16_t fSwitchFromIO = 0;
81 : int16_t ppp_mode;
82 :
83 95169 : hBWE_TD = st->hBWE_TD;
84 95169 : hBWE_FD = st->hBWE_FD;
85 :
86 95169 : if ( st->Opt_SC_VBR )
87 : {
88 140 : ppp_mode = st->hSC_VBR->ppp_mode;
89 : }
90 : else
91 : {
92 95029 : ppp_mode = 0;
93 : }
94 :
95 95169 : if ( ( st->last_total_brate == ACELP_6k60 ) ||
96 95152 : ( st->last_total_brate == ACELP_8k85 ) ||
97 95136 : ( st->last_total_brate == ACELP_12k65 ) ||
98 94917 : ( st->last_total_brate == ACELP_14k25 ) ||
99 94797 : ( st->last_total_brate == ACELP_15k85 ) ||
100 94435 : ( st->last_total_brate == ACELP_18k25 ) ||
101 94403 : ( st->last_total_brate == ACELP_19k85 ) ||
102 94363 : ( st->last_total_brate == ACELP_23k05 ) ||
103 94196 : ( st->last_total_brate == ACELP_23k85 ) )
104 : {
105 1168 : fSwitchFromIO = 1;
106 : }
107 :
108 95169 : set_f( old_input, 0, NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + STEREO_DFT_OVL_16k + L_FRAME16k );
109 :
110 95169 : if ( st->extl == WB_BWE || st->extl == WB_TBE || st->igf )
111 : {
112 37073 : ramp_flag = 0;
113 37073 : if ( ( st->last_extl != WB_TBE && st->last_extl != WB_BWE && !st->igf ) || ( st->igf && fSwitchFromIO ) )
114 : {
115 1571 : ramp_flag = 1;
116 : }
117 :
118 37073 : if ( !ppp_mode )
119 : {
120 37073 : if ( st->element_mode == IVAS_CPE_DFT )
121 : {
122 7173 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
123 :
124 7173 : if ( last_element_mode == IVAS_CPE_TD )
125 : {
126 1 : set_f( hBWE_TD->decim_state1, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
127 1 : set_f( hBWE_TD->decim_state2, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
128 1 : ramp_flag = 1;
129 : }
130 :
131 : /*Get past signal*/
132 7173 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - STEREO_DFT_OVL_16k, old_input, STEREO_DFT_OVL_16k );
133 7173 : mvr2r( hBWE_FD->old_input_wb, old_input + STEREO_DFT_OVL_16k, Sample_Delay_WB_BWE );
134 :
135 : /*Get new signal*/
136 7173 : mvr2r( new_inp_resamp16k, &old_input[Sample_Delay_WB_BWE + STEREO_DFT_OVL_16k], L_FRAME16k );
137 :
138 : /*compute hb_speech on delayed input*/
139 7173 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
140 :
141 : /*Update memory*/
142 7173 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( L_SUBFR * 5 / 16 ), STEREO_DFT_OVL_16k / 4 );
143 7173 : mvr2r( hb_speech + STEREO_DFT_OVL_16k / 4, hb_speech, ( L_FRAME16k - STEREO_DFT_OVL_16k ) / 4 );
144 :
145 : /*rest without memory update*/
146 7173 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
147 7173 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
148 :
149 7173 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k - STEREO_DFT_OVL_16k ) / 4, STEREO_DFT_OVL_16k, decim_state1, decim_state2, 0 );
150 : }
151 29900 : else if ( st->element_mode == IVAS_CPE_TD )
152 : {
153 1247 : int16_t l_recalc_16k = L_MEM_RECALC_16K + L_FILT16k + 1; /* Note: "+1" is used because L_FILT16k is not divisible by 4 */
154 1247 : int16_t l_recalc_4k = ( L_MEM_RECALC_16K + L_FILT16k + 1 ) / 4;
155 1247 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
156 :
157 1247 : if ( last_element_mode == IVAS_CPE_DFT )
158 : {
159 21 : set_f( hBWE_TD->decim_state1, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
160 21 : set_f( hBWE_TD->decim_state2, 0, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
161 21 : ramp_flag = 1;
162 : }
163 :
164 : /*Get past signal*/
165 1247 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - l_recalc_16k, old_input, l_recalc_16k );
166 1247 : mvr2r( hBWE_FD->old_input_wb, old_input + l_recalc_16k, Sample_Delay_WB_BWE );
167 :
168 1247 : old_input[Sample_Delay_WB_BWE] = hBWE_FD->mem_old_wtda_swb;
169 :
170 : /*Get new signal*/
171 1247 : mvr2r( new_inp_resamp16k, old_input + Sample_Delay_WB_BWE + l_recalc_16k, L_FRAME16k );
172 :
173 : /*compute hb_speech on delayed input*/
174 1247 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
175 :
176 : /*Update memory*/
177 1247 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 ) - l_recalc_4k, l_recalc_4k );
178 1247 : mvr2r( hb_speech + l_recalc_4k, hb_speech, ( L_FRAME16k / 4 ) - l_recalc_4k );
179 :
180 : /*rest without memory update*/
181 1247 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
182 1247 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
183 :
184 1247 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k / 4 ) - l_recalc_4k, l_recalc_16k, decim_state1, decim_state2, 0 );
185 : }
186 28653 : else if ( st->element_mode == IVAS_SCE )
187 : {
188 26653 : if ( st->input_Fs == 16000 )
189 : {
190 8559 : flip_spectrum_and_decimby4( new_inp_resamp16k, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
191 : }
192 : else
193 : {
194 18094 : int16_t l_recalc_16k = L_FILT16k + 1; /* Note: "+1" is used because L_FILT16k is not divisible by 4 */
195 18094 : int16_t l_recalc_4k = ( L_FILT16k + 1 ) / 4;
196 :
197 18094 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
198 :
199 : /*Get past signal*/
200 18094 : mvr2r( hBWE_FD->old_wtda_swb + L_FRAME16k - l_recalc_16k, old_input, l_recalc_16k );
201 18094 : mvr2r( hBWE_FD->old_input_wb, old_input + l_recalc_16k, Sample_Delay_WB_BWE );
202 :
203 : /*Get new signal*/
204 18094 : mvr2r( new_inp_resamp16k, old_input + Sample_Delay_WB_BWE + l_recalc_16k, L_FRAME16k );
205 :
206 : /*compute hb_speech on delayed input*/
207 18094 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
208 :
209 : /* update hBWE_TD->old_speech_wb memory */
210 18094 : mvr2r( hb_speech, hBWE_TD->old_speech_wb + ( ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 ) - l_recalc_4k, l_recalc_4k );
211 18094 : mvr2r( hb_speech + l_recalc_4k, hb_speech, ( L_FRAME16k / 4 ) - l_recalc_4k );
212 :
213 : /*rest without memory update*/
214 18094 : mvr2r( hBWE_TD->decim_state1, decim_state1, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
215 18094 : mvr2r( hBWE_TD->decim_state2, decim_state2, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
216 :
217 18094 : flip_spectrum_and_decimby4( old_input + Sample_Delay_WB_BWE + L_FRAME16k, hb_speech + ( L_FRAME16k / 4 ) - l_recalc_4k, l_recalc_16k, decim_state1, decim_state2, 0 );
218 : }
219 : }
220 : else
221 : {
222 2000 : flip_spectrum_and_decimby4( new_inp_resamp16k, hb_speech, L_FRAME16k, hBWE_TD->decim_state1, hBWE_TD->decim_state2, ramp_flag );
223 : }
224 :
225 37073 : if ( st->extl != WB_TBE )
226 : {
227 : /* Update the previous wideband speech buffer in case of a WB_BWE frame - this code is in wb_tbe_enc */
228 20035 : Sample_Delay_WB_BWE = ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16;
229 :
230 20035 : highband_new_speech = highband_old_speech + Sample_Delay_WB_BWE;
231 :
232 20035 : mvr2r( hBWE_TD->old_speech_wb, highband_old_speech, Sample_Delay_WB_BWE );
233 20035 : mvr2r( hb_speech, highband_new_speech, L_FRAME16k / 4 );
234 20035 : mvr2r( highband_old_speech + L_FRAME16k / 4, hBWE_TD->old_speech_wb, Sample_Delay_WB_BWE );
235 : }
236 : }
237 : }
238 : else
239 : {
240 58096 : set_f( hBWE_TD->decim_state1, 0.0f, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
241 58096 : set_f( hBWE_TD->decim_state2, 0.0f, ( 2 * ALLPASSSECTIONS_STEEP + 1 ) );
242 58096 : set_f( hBWE_TD->old_speech_wb, 0.0f, ( L_LOOK_12k8 + L_SUBFR ) * 5 / 16 );
243 : }
244 :
245 95169 : if ( hBWE_FD != NULL )
246 : {
247 95169 : hBWE_FD->mem_old_wtda_swb = new_inp_resamp16k[L_FRAME16k - L_MEM_RECALC_16K - L_FILT16k - 1];
248 : }
249 :
250 : /* st->old_input_wb and st->old_wtda_wb must be updated each frame, or there are often some clicks during WB TBE <-> WB BWE switching */
251 95169 : if ( ( st->extl != WB_BWE || ( st->extl == WB_BWE && st->extl_brate == 0 ) ) && !ppp_mode )
252 : {
253 89566 : Sample_Delay_WB_BWE = NS2SA( 16000, DELAY_FD_BWE_ENC_12k8_NS );
254 :
255 89566 : mvr2r( new_inp_resamp16k, &old_input[Sample_Delay_WB_BWE], L_FRAME16k );
256 89566 : mvr2r( hBWE_FD->old_input_wb, old_input, Sample_Delay_WB_BWE );
257 89566 : mvr2r( new_inp_resamp16k + L_FRAME16k - Sample_Delay_WB_BWE, hBWE_FD->old_input_wb, Sample_Delay_WB_BWE );
258 89566 : if ( ( st->extl != SWB_BWE ) && ( st->extl != FB_BWE ) )
259 : {
260 89566 : mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME16k );
261 : }
262 : }
263 :
264 95169 : return;
265 : }
266 :
267 :
268 : /*-------------------------------------------------------------------*
269 : * swb_pre_proc()
270 : *
271 : * - Calculate the 6 to 14 kHz (or 7.5 - 15.5 kHz) SHB target signal
272 : * for SWB TBE or SWB BWE coding
273 : * - Common SWB TBE and SWB BWE pre-processing
274 : *-------------------------------------------------------------------*/
275 :
276 819207 : void swb_pre_proc(
277 : Encoder_State *st, /* i/o: encoder state structure */
278 : float *new_swb_speech, /* o : original input signal at 32kHz */
279 : float *shb_speech, /* o : SHB target signal (6-14kHz) at 16kHz */
280 : float realBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : real buffer */
281 : float imagBuffer[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX], /* i : imag buffer */
282 : CPE_ENC_HANDLE hCPE /* i/o: CPE encoder structure */
283 : )
284 : {
285 : int16_t Sample_Delay_SWB_BWE, inner_frame, delay;
286 : TD_BWE_ENC_HANDLE hBWE_TD;
287 : FD_BWE_ENC_HANDLE hBWE_FD;
288 : int32_t inner_Fs, input_Fs;
289 : float old_input[NS2SA( 48000, DELAY_FD_BWE_ENC_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k];
290 : float spchTmp[L_FRAME32k], spchTmp2[L_FRAME32k];
291 : int16_t i, j, L_resamp;
292 : int16_t startB, endB;
293 : float *realBufferFlipped[CLDFB_NO_COL_MAX];
294 : float *imagBufferFlipped[CLDFB_NO_COL_MAX];
295 : float realBufferTmp[CLDFB_NO_COL_MAX][20];
296 : float imagBufferTmp[CLDFB_NO_COL_MAX][20];
297 : int16_t ts, nB, uB;
298 : float sign, lbEner, v, t, regression;
299 : const float *thr, *regV;
300 : int16_t Sample_Delay_SWB_BWE32k, lMemRecalc32k, dft_ovl32k;
301 :
302 819207 : lMemRecalc32k = NS2SA( 32000, L_MEM_RECALC_NS );
303 :
304 : /* initialization */
305 819207 : hBWE_TD = st->hBWE_TD;
306 819207 : hBWE_FD = st->hBWE_FD;
307 819207 : input_Fs = st->input_Fs;
308 :
309 13926519 : for ( j = 0; j < CLDFB_NO_COL_MAX; j++ )
310 : {
311 13107312 : set_f( realBufferTmp[j], 0, 20 );
312 13107312 : set_f( imagBufferTmp[j], 0, 20 );
313 13107312 : realBufferFlipped[j] = realBufferTmp[j];
314 13107312 : imagBufferFlipped[j] = imagBufferTmp[j];
315 : }
316 :
317 819207 : set_f( old_input, 0.0f, NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS ) + L_FRAME48k );
318 :
319 819207 : if ( input_Fs == 32000 )
320 : {
321 106727 : if ( st->element_mode > EVS_MONO )
322 : {
323 102157 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
324 102157 : if ( st->L_frame == L_FRAME16k )
325 : {
326 49136 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
327 : }
328 :
329 102157 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
330 102157 : mvr2r( st->input - L_FRAME32k, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
331 :
332 102157 : if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
333 : {
334 4016 : mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
335 : }
336 98141 : else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
337 : {
338 30357 : mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME32k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
339 : }
340 : }
341 :
342 106727 : mvr2r( st->input, new_swb_speech, L_FRAME32k );
343 :
344 106727 : if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE && st->extl != SWB_BWE_HIGHRATE )
345 : {
346 104742 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
347 104742 : if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
348 : {
349 48987 : Sample_Delay_SWB_BWE = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
350 : }
351 104742 : if ( st->element_mode > EVS_MONO )
352 : {
353 100498 : Sample_Delay_SWB_BWE -= NS2SA( 32000, DELAY_FIR_RESAMPL_NS );
354 : }
355 :
356 104742 : mvr2r( hBWE_FD->old_fdbwe_speech, &old_input[Sample_Delay_SWB_BWE], L_FRAME32k );
357 :
358 104742 : set_f( old_input, 0, Sample_Delay_SWB_BWE );
359 104742 : mvr2r( hBWE_FD->old_fdbwe_speech + L_FRAME32k - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
360 104742 : if ( st->extl != WB_BWE )
361 : {
362 104476 : mvr2r( old_input, hBWE_FD->old_wtda_swb, L_FRAME32k );
363 : }
364 : }
365 :
366 106727 : if ( st->extl != SWB_BWE && st->extl != FB_BWE )
367 : {
368 105039 : mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME32k );
369 : }
370 : }
371 : else /* 48 kHz */
372 : {
373 :
374 712480 : Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_12k8_NS );
375 712480 : Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_12k8_NS );
376 712480 : if ( st->L_frame == L_FRAME16k )
377 : {
378 293257 : Sample_Delay_SWB_BWE32k = NS2SA( 32000, DELAY_FD_BWE_ENC_16k_NS );
379 293257 : Sample_Delay_SWB_BWE = NS2SA( 48000, DELAY_FD_BWE_ENC_16k_NS );
380 : }
381 :
382 712480 : dft_ovl32k = 0;
383 712480 : if ( st->element_mode == IVAS_CPE_DFT )
384 : {
385 89059 : dft_ovl32k = (int16_t) ( STEREO_DFT_OVL_MAX * 32000 / 48000 );
386 : }
387 :
388 712480 : if ( st->codec_mode == MODE1 )
389 : {
390 709090 : if ( st->element_mode > EVS_MONO )
391 : {
392 :
393 706780 : if ( st->element_mode == IVAS_CPE_TD )
394 : {
395 : }
396 703330 : else if ( st->bwidth == FB )
397 : {
398 432065 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
399 : }
400 :
401 706780 : mvr2r( st->input - L_FRAME48k, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
402 :
403 706780 : if ( st->element_mode == IVAS_CPE_TD && st->bwidth >= SWB )
404 : {
405 3333 : if ( st->bwidth == SWB )
406 : {
407 : /* buffers hBWE_FD->old_input[] and hBWE_FD->old_wtda_swb[] need to be at 32 kHz (inner) sampling rate */
408 :
409 558 : decimate_2_over_3_allpass( st->input - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, spchTmp, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
410 :
411 558 : mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( lMemRecalc32k - Sample_Delay_SWB_BWE32k ), lMemRecalc32k - Sample_Delay_SWB_BWE32k );
412 558 : mvr2r( spchTmp + lMemRecalc32k - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
413 : }
414 : else /* FB_BWE */
415 : {
416 2775 : mvr2r( st->input - hCPE->hStereoTCA->lMemRecalc, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE ), hCPE->hStereoTCA->lMemRecalc - Sample_Delay_SWB_BWE );
417 2775 : mvr2r( st->input - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
418 : }
419 : }
420 703447 : else if ( st->element_mode == IVAS_CPE_DFT && st->bwidth >= SWB )
421 : {
422 87825 : if ( st->bwidth == SWB )
423 : {
424 65045 : lerp( st->input - hCPE->hStereoDft->dft_ovl, spchTmp, dft_ovl32k - Sample_Delay_SWB_BWE32k, hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
425 :
426 65045 : mvr2r( spchTmp, hBWE_FD->old_wtda_swb + L_FRAME32k - ( dft_ovl32k - Sample_Delay_SWB_BWE32k ), dft_ovl32k - Sample_Delay_SWB_BWE32k );
427 : }
428 : else
429 : {
430 22780 : mvr2r( st->input - hCPE->hStereoDft->dft_ovl, hBWE_FD->old_wtda_swb + L_FRAME48k - ( hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE ), hCPE->hStereoDft->dft_ovl - Sample_Delay_SWB_BWE );
431 : }
432 : }
433 : }
434 :
435 709090 : if ( ( st->extl != SWB_BWE && st->extl != FB_BWE && st->core == ACELP_CORE ) || ( st->element_mode == IVAS_CPE_DFT && st->core != ACELP_CORE ) /*resampling not needed for MDCT cores*/ )
436 : {
437 : /* move the resampling out of the TDBWE path as new_swb_speech is not needed for TDBWE. */
438 263689 : mvr2r( st->input, hBWE_FD->old_fdbwe_speech, L_FRAME48k );
439 : }
440 : else
441 : {
442 445401 : if ( st->last_extl != SWB_BWE && st->last_extl != FB_BWE )
443 : {
444 : /* resample 48 kHz to 32kHz */
445 419531 : if ( ( st->last_bwidth == FB && st->element_mode == EVS_MONO ) || ( st->bwidth == FB && st->element_mode > EVS_MONO ) ) // note: once EVS i CR fixed, the condition will simplify to "if ( st->bwidth == FB )" only
446 : {
447 318773 : inner_frame = L_FRAME48k;
448 318773 : inner_Fs = 48000;
449 318773 : mvr2r( hBWE_FD->old_fdbwe_speech, new_swb_speech, L_FRAME48k );
450 : }
451 : else
452 : {
453 100758 : inner_frame = L_FRAME32k;
454 100758 : inner_Fs = 32000;
455 :
456 100758 : if ( st->element_mode != IVAS_CPE_DFT )
457 : {
458 99322 : decimate_2_over_3_allpass( hBWE_FD->old_fdbwe_speech, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
459 : }
460 : else
461 : {
462 1436 : lerp( hBWE_FD->old_fdbwe_speech, new_swb_speech, inner_frame, L_FRAME48k );
463 : }
464 :
465 100758 : if ( st->element_mode == IVAS_CPE_DFT && st->idchan == 0 )
466 : {
467 403516 : for ( i = 0; i < STEREO_DFT_OVL_32k; i++ )
468 : {
469 402080 : hCPE->hStereoDft->output_mem_dmx_32k[i] = new_swb_speech[inner_frame - STEREO_DFT_OVL_32k + i] * hCPE->hStereoDft->win_32k[STEREO_DFT_OVL_32k - 1 - i];
470 : }
471 : }
472 : }
473 :
474 419531 : Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_12k8_NS + DELAY_FIR_RESAMPL_NS );
475 419531 : if ( st->element_mode > EVS_MONO && st->L_frame == L_FRAME16k )
476 : {
477 149607 : Sample_Delay_SWB_BWE = NS2SA( inner_Fs, DELAY_FD_BWE_ENC_16k_NS + DELAY_FIR_RESAMPL_NS );
478 : }
479 419531 : if ( st->element_mode > EVS_MONO )
480 : {
481 418982 : Sample_Delay_SWB_BWE -= NS2SA( inner_Fs, DELAY_FIR_RESAMPL_NS );
482 : }
483 :
484 419531 : mvr2r( new_swb_speech, &old_input[Sample_Delay_SWB_BWE], inner_frame );
485 419531 : set_f( old_input, 0, Sample_Delay_SWB_BWE );
486 419531 : mvr2r( new_swb_speech + inner_frame - Sample_Delay_SWB_BWE, hBWE_FD->old_input, Sample_Delay_SWB_BWE );
487 419531 : mvr2r( old_input, hBWE_FD->old_wtda_swb, inner_frame );
488 : }
489 :
490 : /* resample 48 kHz to 32kHz */
491 445401 : if ( st->bwidth == FB )
492 : {
493 328541 : mvr2r( st->input, new_swb_speech, L_FRAME48k );
494 : }
495 : else
496 : {
497 116860 : if ( st->element_mode == IVAS_CPE_TD )
498 : {
499 : float dec_2_over_3_mem_tmp[L_FILT_2OVER3], dec_2_over_3_mem_lp_tmp[L_FILT_2OVER3_LP];
500 :
501 19 : decimate_2_over_3_allpass( st->input, L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
502 :
503 19 : mvr2r( hBWE_TD->dec_2_over_3_mem, dec_2_over_3_mem_tmp, L_FILT_2OVER3 );
504 19 : mvr2r( hBWE_TD->dec_2_over_3_mem_lp, dec_2_over_3_mem_lp_tmp, L_FILT_2OVER3_LP );
505 :
506 19 : decimate_2_over_3_allpass( st->input + L_FRAME48k - hCPE->hStereoTCA->lMemRecalc, hCPE->hStereoTCA->lMemRecalc, new_swb_speech + L_FRAME32k - lMemRecalc32k, dec_2_over_3_mem_tmp, dec_2_over_3_mem_lp_tmp );
507 : }
508 116841 : else if ( st->element_mode != IVAS_CPE_DFT )
509 : {
510 106824 : decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
511 : }
512 : else /* IVAS_CPE_DFT */
513 : {
514 10017 : stereo_dft_enc_synthesize( hCPE->hStereoDft, new_swb_speech, st->idchan, input_Fs, 32000, 0 );
515 :
516 10017 : mvr2r( new_swb_speech - Sample_Delay_SWB_BWE32k, hBWE_FD->old_input, Sample_Delay_SWB_BWE32k );
517 : }
518 : }
519 : }
520 : }
521 : else
522 : {
523 : /* resample 48 kHz to 32kHz */
524 3390 : if ( st->bwidth == FB )
525 : {
526 1132 : mvr2r( st->input, new_swb_speech, L_FRAME48k );
527 : }
528 : else
529 : {
530 2258 : decimate_2_over_3_allpass( st->input, L_FRAME48k, new_swb_speech, hBWE_TD->dec_2_over_3_mem, hBWE_TD->dec_2_over_3_mem_lp );
531 : }
532 : }
533 : }
534 :
535 819207 : if ( ( st->core == ACELP_CORE && st->extl != SWB_BWE_HIGHRATE && st->extl != FB_BWE_HIGHRATE ) ||
536 507578 : ( ( st->total_brate == ACELP_9k60 || st->rf_mode ) && st->bwidth == SWB && st->element_mode == EVS_MONO ) )
537 311789 : {
538 311789 : float CldfbHB = 0;
539 :
540 311789 : if ( st->element_mode == IVAS_CPE_DFT )
541 : {
542 77323 : CldfbHB = stereo_dft_enc_synthesize( hCPE->hStereoDft, old_input + STEREO_DFT_OVL_16k, st->idchan, input_Fs, 16000, st->L_frame );
543 :
544 : /* delay corresponding to CLDFB delay */
545 77323 : mvr2r( old_input + STEREO_DFT_OVL_16k - 20, shb_speech, L_FRAME16k );
546 77323 : mvr2r( old_input, hBWE_TD->old_speech_shb + L_LOOK_16k + L_SUBFR16k - ( STEREO_DFT_OVL_16k - 20 ), STEREO_DFT_OVL_16k - 20 );
547 77323 : mvr2r( old_input, hCPE->hStereoICBWE->mem_shb_speech_ref, STEREO_DFT_OVL_16k - 20 );
548 :
549 77323 : if ( CldfbHB <= 0 )
550 : {
551 1329 : CldfbHB = 1.0f;
552 : }
553 77323 : hBWE_TD->cldfbHBLT = 0.9f * hBWE_TD->cldfbHBLT + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
554 :
555 77323 : lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
556 77323 : hCPE->hStereoICBWE->icbweRefEner = 0.05f * (float) sqrt( hCPE->hStereoDft->icbweRefEner );
557 77323 : lbEner = 0.05f * (float) sqrt( hCPE->hStereoDft->lbEner );
558 77323 : thr = icbwe_thr_DFT;
559 77323 : regV = icbwe_regressionValuesDFT;
560 : }
561 : else
562 : {
563 234466 : if ( st->L_frame == L_FRAME )
564 : {
565 115307 : startB = 34;
566 115307 : endB = 14;
567 1960219 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
568 : {
569 38743152 : for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
570 : {
571 36898240 : sign = ( ts % 2 ) ? 1.0f : -1.0f;
572 36898240 : realBufferFlipped[ts][uB] = -sign * realBuffer[ts][nB];
573 36898240 : imagBufferFlipped[ts][uB] = sign * imagBuffer[ts][nB];
574 : }
575 : }
576 : }
577 : else
578 : {
579 119159 : startB = 39;
580 119159 : endB = 19;
581 2025703 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
582 : {
583 40037424 : for ( nB = startB, uB = 0; nB > endB; nB--, uB++ )
584 : {
585 38130880 : realBufferFlipped[ts][uB] = -realBuffer[ts][nB];
586 38130880 : imagBufferFlipped[ts][uB] = imagBuffer[ts][nB];
587 : }
588 : }
589 : }
590 :
591 2579126 : for ( nB = 0; nB < 10; nB++ )
592 : {
593 39859220 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
594 : {
595 37514560 : CldfbHB += ( realBufferFlipped[ts][nB] * realBufferFlipped[ts][nB] + imagBufferFlipped[ts][nB] * imagBufferFlipped[ts][nB] );
596 : }
597 : }
598 234466 : if ( CldfbHB <= 0 )
599 : {
600 410 : CldfbHB = 1.0f;
601 : }
602 234466 : hBWE_TD->cldfbHBLT = 0.9f * hBWE_TD->cldfbHBLT + 0.1f * ( 0.221462f /*=1/log10(32768)*/ * ( log10f( CldfbHB ) - 1.0f ) );
603 :
604 234466 : if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
605 : {
606 84 : hCPE->hStereoICBWE->icbweRefEner = EPSILON;
607 1764 : for ( nB = 20; nB < 40; nB++ )
608 : {
609 28560 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
610 : {
611 26880 : hCPE->hStereoICBWE->icbweRefEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
612 : }
613 : }
614 84 : hCPE->hStereoICBWE->icbweRefEner = 0.05f * sqrtf( hCPE->hStereoICBWE->icbweRefEner );
615 : }
616 :
617 234466 : lbEner = EPSILON;
618 4923786 : for ( nB = 0; nB < 20; nB++ )
619 : {
620 79718440 : for ( ts = 0; ts < CLDFB_NO_COL_MAX; ts++ )
621 : {
622 75029120 : lbEner += ( realBuffer[ts][nB] * realBuffer[ts][nB] + imagBuffer[ts][nB] * imagBuffer[ts][nB] );
623 : }
624 : }
625 234466 : lbEner = 0.05f * sqrtf( lbEner );
626 234466 : thr = icbwe_thr_TDM;
627 234466 : regV = icbwe_regressionValuesTDM;
628 :
629 234466 : cldfbSynthesis( realBufferFlipped, imagBufferFlipped, shb_speech, -1, st->cldfbSynTd );
630 : }
631 :
632 311789 : if ( st->element_mode >= IVAS_CPE_DFT && hCPE->hStereoICBWE != NULL )
633 : {
634 77407 : hCPE->hStereoICBWE->MSFlag = 0; /* Init the multi-source flag */
635 77407 : v = 0.3333f * sum_f( st->voicing, 3 );
636 77407 : t = log10f( ( hCPE->hStereoICBWE->icbweRefEner + 1e-6f ) / ( lbEner + 1e-6f ) );
637 :
638 : /* Three Level Decision Tree to calculate a regression value first */
639 77407 : if ( t < thr[0] ) /* level 1 */
640 : {
641 58084 : if ( t < thr[1] ) /* level 2 */
642 : {
643 51125 : regression = ( v < thr[3] ) ? regV[0] : regV[1]; /* level 3 */
644 : }
645 : else
646 : {
647 6959 : regression = ( v < thr[4] ) ? regV[2] : regV[3]; /* level 3 */
648 : }
649 : }
650 : else
651 : {
652 19323 : if ( t < thr[2] ) /* level 2 */
653 : {
654 3932 : regression = ( v < thr[5] ) ? regV[4] : regV[5]; /* level 3 */
655 : }
656 : else
657 : {
658 15391 : regression = ( v < thr[6] ) ? regV[6] : regV[7]; /* level 3 */
659 : }
660 : }
661 :
662 : /* Convert the regression to a hard decision (classification) */
663 77407 : if ( regression > 0.79f && !( st->bwidth < SWB || hCPE->hCoreCoder[0]->vad_flag == 0 ) )
664 : {
665 15128 : hCPE->hStereoICBWE->MSFlag = 1;
666 : }
667 : }
668 :
669 311789 : if ( st->extl != WB_TBE && st->extl != SWB_TBE && st->extl != FB_TBE )
670 : {
671 : /* Update the previous superwideband speech buffer in case of a SWB_BWE frame - this code is in swb_tbe_enc */
672 62153 : delay = L_LOOK_16k + L_SUBFR16k;
673 62153 : mvr2r( shb_speech + L_FRAME16k - delay, hBWE_TD->old_speech_shb, delay );
674 : }
675 : }
676 : else
677 : {
678 507418 : if ( ( st->bwidth == FB || st->core == ACELP_CORE ) && ( st->element_mode == EVS_MONO ) )
679 : {
680 2121 : InitSWBencBufferStates( st->hBWE_TD, shb_speech );
681 : }
682 : else
683 : {
684 505297 : if ( st->element_mode == IVAS_CPE_DFT )
685 : {
686 42439 : if ( st->L_frame == L_FRAME )
687 : {
688 16674 : L_resamp = 560; /* 6.4 kHz core -> 6 - 14 kHz SHB target. 20 ms is 560 samples in 28 kHz sample rate */
689 : }
690 : else
691 : {
692 25765 : L_resamp = 620; /* 8 kHz core -> 7.5 - 15.5 kHz SHB target. 20 ms is 620 samples in 31 kHz sample rate */
693 : }
694 :
695 : /* Dirty downsampling to match Nyquist to upper frequency limit of target */
696 42439 : lerp( st->input, new_swb_speech, L_resamp, (int16_t) ( input_Fs / 50 ) );
697 :
698 : /* flip the spectrum */
699 42439 : mvr2r( new_swb_speech, spchTmp, L_resamp );
700 12698309 : for ( i = 0; i < L_resamp; i = i + 2 )
701 : {
702 12655870 : spchTmp[i] = -spchTmp[i];
703 : }
704 :
705 : /* Dirty upsampling to match Nyquist/2 to lower frequency limit of target (reversed spectrum)*/
706 42439 : lerp( spchTmp, spchTmp2, L_FRAME32k, L_resamp );
707 42439 : mvr2r( spchTmp2, spchTmp, L_FRAME32k );
708 : }
709 : else
710 : {
711 : /* flip the spectrum */
712 462858 : mvr2r( new_swb_speech, spchTmp, L_FRAME32k );
713 :
714 148577418 : for ( i = 0; i < L_FRAME32k; i = i + 2 )
715 : {
716 148114560 : spchTmp[i] = -spchTmp[i];
717 : }
718 : }
719 :
720 505297 : Decimate_allpass_steep( spchTmp, hBWE_TD->state_ana_filt_shb, L_FRAME32k, shb_speech );
721 :
722 505297 : mvr2r( shb_speech + L_FRAME16k - ( L_LOOK_16k + L_SUBFR16k ), hBWE_TD->old_speech_shb, L_LOOK_16k + L_SUBFR16k );
723 :
724 : /*Compute the past overlap for potential next iDFTs SHB*/
725 505297 : if ( st->element_mode == IVAS_CPE_DFT )
726 : {
727 5983899 : for ( i = 0; i < STEREO_DFT_OVL_16k; i++ )
728 : {
729 5941460 : hCPE->hStereoDft->output_mem_dmx_16k_shb[i] = shb_speech[20 + i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i] * hCPE->hStereoDft->win_ana_16k[STEREO_DFT_OVL_16k - 1 - i];
730 : }
731 : }
732 : }
733 :
734 507418 : if ( st->element_mode != IVAS_CPE_DFT )
735 : {
736 : /* Reset CLDFB synthesis buffer */
737 464979 : set_f( st->cldfbSynTd->cldfb_state, 0.0f, st->cldfbSynTd->p_filter_length );
738 : }
739 : else
740 : {
741 42439 : hCPE->hStereoDft->flip_sign = -hCPE->hStereoDft->flip_sign; /* Make sure sign is updated even if DFT SHB target is not generated */
742 : }
743 : }
744 :
745 : /* Memory reset to compensate for 0.9375 ms offset when transitioning from IO to SWB */
746 : /* When switching from n >1 to n = 1, we keep the enc/dec delay as 8.75/3.25 and below code not needed;
747 : only when n = 1 start, it will be 9.6875/2.3125 in that case this reset is needed for IO->BWE.*/
748 819207 : if ( st->last_extl == -1 && st->element_mode == EVS_MONO )
749 : {
750 1374 : delay = NS2SA( input_Fs, DELAY_FIR_RESAMPL_NS );
751 51144 : for ( i = 0; i < delay; i++ )
752 : {
753 49770 : shb_speech[i] = (float) i * ( 0.03f * shb_speech[2 * delay - 1 - i] );
754 : }
755 : }
756 :
757 819207 : return;
758 : }
|