Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #include <math.h>
37 : #include "cnst.h"
38 : #include "prot.h"
39 : #include "rom_com.h"
40 : #include "ivas_prot.h"
41 : #include "ivas_rom_com.h"
42 : #include "ivas_stat_enc.h"
43 : #include "ivas_cnst.h"
44 : #ifdef DEBUGGING
45 : #include "debug.h"
46 : #endif
47 : #ifdef DEBUG_PLOT
48 : #include "deb_out.h"
49 : #endif
50 : #include "wmc_auto.h"
51 :
52 :
53 : /*-------------------------------------------------------------------*
54 : * Local constants
55 : *-------------------------------------------------------------------*/
56 :
57 : #define STEREO_DFT_CHANNEL_EXTR_LPC_ORDER 10
58 : #define STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT 320
59 :
60 :
61 : /*---------------------------------------------------------------
62 : * stereo_td_get_td_itd()
63 : *
64 : *
65 : * ---------------------------------------------------------------*/
66 :
67 35442 : static void stereo_td_get_td_itd(
68 : int16_t *td_itd, /* o : td_itd in samples at sampling frequency */
69 : int16_t *td_itd_32, /* o : td_itd in samples at 32kHz */
70 : const float itd, /* i : itd in samples at sampling frequency */
71 : const int32_t input_Fs /* i : sampling frequency */
72 : )
73 : {
74 : /* *td_itd is the closest integer to itd that transforms into an integer value *
75 : * under the transform x -> (32000/fs) x. */
76 : int16_t d;
77 :
78 35442 : if ( input_Fs == 32000 )
79 : {
80 12815 : *td_itd_32 = *td_itd = (int16_t) itd;
81 : }
82 : else
83 : {
84 22627 : assert( ( input_Fs % 16000 ) == 0 && "sampling frequency should be divisible by 16000" );
85 22627 : d = (int16_t) ( input_Fs / 16000 );
86 22627 : *td_itd_32 = 2 * (int16_t) floorf( itd / d + 0.5f );
87 22627 : *td_itd = ( ( *td_itd_32 ) / 2 ) * d;
88 : }
89 :
90 35442 : return;
91 : }
92 :
93 : /*---------------------------------------------------------------
94 : * stereo_td_channel_extrapolate()
95 : *
96 : *
97 : * ---------------------------------------------------------------*/
98 :
99 15692 : static void stereo_td_channel_extrapolate(
100 : Encoder_State **sts,
101 : const int16_t dft_ovl,
102 : const float shift_mem[],
103 : float shift_input[],
104 : const int16_t input_frame,
105 : const int16_t itd_shift,
106 : const int16_t lagging_channel,
107 : float *input_mem[CPE_CHANNELS] /* i/o: input buffer memory */
108 : )
109 : {
110 : int16_t leading_channel, i, size_ovl, pred_ovlp;
111 : float g, nsr, g_lpc, dot_lead_lag, dot_lead_lead, dot_lag_lag;
112 : float window[STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT], mem_zero[STEREO_DFT_CHANNEL_EXTR_LPC_ORDER];
113 : float residual[STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT + ( STEREO_DFT_ITD_MAX * L_FRAME48k ) / L_FRAME32k];
114 : float r[STEREO_DFT_CHANNEL_EXTR_LPC_ORDER + 1];
115 : float A[STEREO_DFT_CHANNEL_EXTR_LPC_ORDER + 1];
116 : float shift_combined[L_FRAME48k + L_FRAME48k];
117 : int16_t flag;
118 : int16_t pitch_lag;
119 : int16_t res_shift;
120 : int16_t pitch0;
121 :
122 15692 : set_f( shift_combined, 0.0f, L_FRAME48k + L_FRAME48k );
123 15692 : set_f( residual, 0.0f, STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT + ( STEREO_DFT_ITD_MAX * input_frame ) / L_FRAME32k );
124 :
125 15692 : leading_channel = ( lagging_channel + 1 ) % 2;
126 15692 : if ( sts[0]->element_mode == IVAS_CPE_DFT )
127 : {
128 12135 : size_ovl = dft_ovl;
129 12135 : pitch0 = sts[0]->pitch[0];
130 : }
131 : else
132 : {
133 3557 : size_ovl = input_frame;
134 3557 : pitch0 = sts[lagging_channel]->pitch[0];
135 : }
136 :
137 15692 : pred_ovlp = input_frame / 10;
138 :
139 : /*get pitch lag from previous frame */
140 15692 : pitch_lag = (int16_t) ( pitch0 * ( (float) input_frame / L_FRAME ) );
141 :
142 : /* compute the parameters g, nsr and g_lpc */
143 15692 : dot_lead_lag = EPSILON;
144 15692 : dot_lead_lead = EPSILON;
145 15692 : dot_lag_lag = EPSILON;
146 5883352 : for ( i = 0; i < size_ovl; i++ )
147 : {
148 5867660 : shift_combined[i] = shift_mem[i];
149 5867660 : dot_lead_lag += input_mem[leading_channel][i] * shift_mem[i];
150 5867660 : dot_lead_lead += input_mem[leading_channel][i] * input_mem[leading_channel][i];
151 5867660 : dot_lag_lag += shift_mem[i] * shift_mem[i];
152 : }
153 10379512 : for ( i = 0; i < input_frame - itd_shift; i++ )
154 : {
155 10363820 : shift_combined[i + size_ovl] = shift_input[i];
156 10363820 : dot_lead_lag += sts[leading_channel]->input[i] * shift_input[i];
157 10363820 : dot_lead_lead += sts[leading_channel]->input[i] * sts[leading_channel]->input[i];
158 10363820 : dot_lag_lag += shift_input[i] * shift_input[i];
159 : }
160 :
161 15692 : g = dot_lead_lag / dot_lead_lead;
162 15692 : nsr = 1 - ( ( dot_lead_lag * dot_lead_lag ) / ( dot_lead_lead * dot_lag_lag ) );
163 :
164 15692 : g = check_bounds( g, -1, 1.5f );
165 15692 : nsr = check_bounds( nsr, 0.0f, 1 );
166 15692 : g_lpc = sqrtf( nsr );
167 :
168 : /* rectangular window */
169 15692 : set_f( window, 1.0f, STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT );
170 15692 : set_zero( mem_zero, STEREO_DFT_CHANNEL_EXTR_LPC_ORDER );
171 :
172 : /* get the LPC filter */
173 15692 : autocorr( shift_combined + input_frame + size_ovl - itd_shift - STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT, r, STEREO_DFT_CHANNEL_EXTR_LPC_ORDER, STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT, window, 0, 0, 0 );
174 :
175 : /* Ensure R[0] isn't zero when entering Levinson-Durbin */
176 15692 : r[0] = max( r[0], 1.0e-8f );
177 188304 : for ( i = 0; i <= STEREO_DFT_CHANNEL_EXTR_LPC_ORDER; i++ )
178 : {
179 172612 : r[i] = r[i] * wac_swb[i];
180 : }
181 :
182 : /* Ensure R[0] isn't zero when entering Levinson-Durbin */
183 15692 : r[0] += 1.0e-8f;
184 :
185 15692 : flag = lev_dur( A, r, STEREO_DFT_CHANNEL_EXTR_LPC_ORDER, NULL );
186 :
187 15692 : if ( flag == 1 )
188 : {
189 0 : g_lpc = 0.0f;
190 : }
191 : else
192 : {
193 : /* get the residual */
194 15692 : fir( shift_combined + input_frame + size_ovl - itd_shift - STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT, A, residual, mem_zero, STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT, STEREO_DFT_CHANNEL_EXTR_LPC_ORDER, 0 );
195 :
196 : /* extend the residual */
197 : /* to prevent out of bound reading */
198 :
199 15692 : res_shift = ( pitch_lag < PIT_MAX ? pitch_lag : itd_shift );
200 :
201 657632 : for ( i = STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT; i < STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT + itd_shift; i++ )
202 : {
203 641940 : residual[i] = residual[i - res_shift];
204 : }
205 :
206 : /* perform sythesis */
207 15692 : syn_filt( A, STEREO_DFT_CHANNEL_EXTR_LPC_ORDER, residual, shift_combined + input_frame + size_ovl - STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT - itd_shift, STEREO_DFT_CHANNEL_EXTR_LPC_VEC_LIMIT + itd_shift, mem_zero, 0 );
208 : }
209 :
210 15692 : mvr2r( shift_combined + size_ovl, shift_input, input_frame );
211 :
212 : /* use this synthesis part to replace the zeros in the lagging channel */
213 657632 : for ( i = input_frame - itd_shift; i < input_frame; i++ )
214 : {
215 641940 : shift_input[i] = ( g_lpc * shift_input[i] ) + ( g * sts[leading_channel]->input[i] );
216 : }
217 :
218 : /* smooth transition (currently done by blending over linearly, could be replaced by something more elaborate.) */
219 1116268 : for ( i = input_frame - itd_shift - pred_ovlp; i < input_frame - itd_shift; i++ )
220 : {
221 1100576 : shift_input[i] = ( i - input_frame + itd_shift + pred_ovlp ) * ( ( g_lpc * shift_input[i] ) + ( g * sts[leading_channel]->input[i] ) ) / pred_ovlp + ( input_frame - itd_shift - i ) * shift_input[i] / pred_ovlp;
222 : }
223 :
224 15692 : return;
225 : }
226 :
227 :
228 : /*---------------------------------------------------------------
229 : * stereo_td_itd()
230 : *
231 : *
232 : * ---------------------------------------------------------------*/
233 :
234 74369 : void stereo_td_itd(
235 : ITD_DATA *hITD, /* i/o: ITD data structure */
236 : float input_mem_itd[CPE_CHANNELS][STEREO_DFT_OVL_MAX], /* o : ITD memory (only used in DFT Stereo) */
237 : const int16_t hybrid_itd_flag, /* i : flag for hybrid TD/FD ITD processing */
238 : #ifdef DEBUG_MODE_DFT
239 : const int16_t itd_mode, /* i : main ITD processing flag */
240 : #endif
241 : const int16_t dft_ovl, /* i : size of DFT overlap */
242 : Encoder_State **sts, /* i/o: Encoder state structure */
243 : const int16_t input_frame, /* i : input frame length */
244 : float *input_mem[CPE_CHANNELS] /* i/o: input buffer memory */
245 : )
246 : {
247 : int16_t i, ch, n;
248 : int16_t size_ovl, k_offset;
249 : int16_t shift[2];
250 : int16_t itd, itd_max;
251 : float shift_input[L_FRAME48k];
252 : float shift_mem[L_FRAME48k];
253 : float *mdct_mem[CPE_CHANNELS];
254 :
255 74369 : k_offset = STEREO_DFT_OFFSET;
256 74369 : set_f( shift_input, 0.0f, input_frame );
257 74369 : if ( sts[0]->element_mode == IVAS_CPE_MDCT )
258 : {
259 44070 : for ( n = 0; n < CPE_CHANNELS; n++ )
260 : {
261 29380 : mdct_mem[n] = sts[n]->old_input_signal;
262 : }
263 : }
264 74369 : if ( sts[0]->element_mode == IVAS_CPE_DFT )
265 : {
266 : /* Update the parameters */
267 119358 : for ( i = 0; i < k_offset; i++ )
268 : {
269 59679 : hITD->deltaItd[i] = hITD->deltaItd[i + 1];
270 59679 : hITD->td_itd[i] = hITD->td_itd[i + 1];
271 59679 : hITD->td_itd_32k[i] = hITD->td_itd_32k[i + 1];
272 : }
273 : }
274 : /*reset TD ITDs in case of hybrid itd_max change - turn hybrid ITD off*/
275 74369 : if ( hITD->hybrid_itd_max == -1 && hybrid_itd_flag == 0 )
276 : {
277 21 : hITD->td_itd[k_offset] = 0;
278 21 : hITD->td_itd_32k[k_offset] = 0;
279 : }
280 74369 : if ( hybrid_itd_flag == 0 )
281 : {
282 38927 : return;
283 : }
284 35442 : stereo_td_get_td_itd( &( hITD->td_itd[k_offset] ), &( hITD->td_itd_32k[k_offset] ), hITD->itd[k_offset], sts[0]->input_Fs );
285 :
286 : /* initializations*/
287 : #ifdef DEBUG_MODE_DFT
288 : if ( itd_mode )
289 : #endif
290 : {
291 35442 : size_ovl = dft_ovl;
292 :
293 35442 : itd_max = ( STEREO_DFT_ITD_MAX * input_frame ) / L_FRAME32k;
294 :
295 35442 : assert( ( fabsf( hITD->itd[k_offset] ) <= itd_max ) && "ITD value is too high!" );
296 35442 : assert( ( fabsf( hITD->itd[k_offset - 1] ) <= itd_max ) && "ITD value is too high!" );
297 :
298 35442 : itd = hITD->td_itd[k_offset];
299 :
300 : /*Time shift with current ITD*/
301 35442 : if ( itd < 0 )
302 : {
303 10429 : shift[1] = 0;
304 10429 : shift[0] = -itd;
305 10429 : ch = 0;
306 : }
307 : else
308 : {
309 25013 : shift[1] = itd;
310 25013 : shift[0] = 0;
311 25013 : ch = 1;
312 : }
313 :
314 : /* extrapolate lagging channel */
315 35442 : if ( shift[ch] > 0 )
316 : {
317 15692 : if ( sts[0]->element_mode == IVAS_CPE_DFT )
318 : {
319 : /* store last part of signal before extrapolation */
320 36405 : for ( n = 0; n < CPE_CHANNELS; n++ )
321 : {
322 24270 : mvr2r( sts[n]->input + input_frame - dft_ovl, input_mem_itd[n], dft_ovl );
323 : }
324 :
325 : /*shift past part*/
326 12135 : mvr2r( input_mem[ch] + shift[ch], shift_mem, size_ovl - shift[ch] );
327 12135 : mvr2r( sts[ch]->input, shift_mem + size_ovl - shift[ch], shift[ch] );
328 : }
329 : else
330 : {
331 : /*shift past part*/
332 3557 : mvr2r( mdct_mem[ch] + shift[ch], shift_mem, input_frame - shift[ch] );
333 3557 : mvr2r( sts[ch]->input, shift_mem + input_frame - shift[ch], shift[ch] );
334 : }
335 : /*shift current part*/
336 15692 : mvr2r( sts[ch]->input + shift[ch], shift_input, input_frame - shift[ch] );
337 15692 : if ( sts[0]->element_mode == IVAS_CPE_DFT )
338 : {
339 : /*Extrapolate current frame*/
340 12135 : stereo_td_channel_extrapolate( sts, dft_ovl, shift_mem, shift_input, input_frame, shift[ch], ch, input_mem );
341 : }
342 : else
343 : {
344 : /*Extrapolate current frame*/
345 3557 : stereo_td_channel_extrapolate( sts, 0, shift_mem, shift_input, input_frame, shift[ch], ch, mdct_mem );
346 : }
347 :
348 : /* write back the extrapolated signal into sts[ch]->input */
349 15692 : mvr2r( shift_input, sts[ch]->input, input_frame );
350 15692 : if ( sts[0]->element_mode == IVAS_CPE_DFT )
351 : {
352 12135 : mvr2r( shift_mem, input_mem[ch], size_ovl );
353 : }
354 : else
355 : {
356 3557 : mvr2r( shift_mem, mdct_mem[ch], input_frame );
357 : }
358 : }
359 : }
360 :
361 : #ifdef DEBUG_MODE_DFT
362 : {
363 : int16_t tmp[L_FRAME48k];
364 :
365 : for ( i = 0; i < input_frame; i++ )
366 : {
367 : tmp[i] = (int16_t) ( sts[0]->input[i] + 0.5f );
368 : }
369 :
370 : dbgwrite( tmp, sizeof( int16_t ), input_frame, 1, "./res/td_shifted_signal_ch0.pcm" );
371 :
372 : for ( i = 0; i < input_frame; i++ )
373 : {
374 : tmp[i] = (int16_t) ( sts[1]->input[i] + 0.5f );
375 : }
376 :
377 : dbgwrite( tmp, sizeof( int16_t ), input_frame, 1, "./res/td_shifted_signal_ch1.pcm" );
378 : }
379 : #endif
380 35442 : return;
381 : }
382 :
383 :
384 : /*---------------------------------------------------------------
385 : * stereo_td_itd_mdct_stereo()
386 : *
387 : * Time-domain ITD in MDCT stereo
388 : * ---------------------------------------------------------------*/
389 :
390 357385 : void stereo_td_itd_mdct_stereo(
391 : CPE_ENC_HANDLE hCPE, /* i/o: CPE encoder handle */
392 : const int16_t vad_flag_dtx[], /* i : VAD dtx flags */
393 : const int16_t vad_hover_flag[], /* i : VAD hangover flags */
394 : const int16_t input_frame /* i : frame length */
395 : )
396 : {
397 : int16_t i;
398 : float bin_nrgL[STEREO_DFT_N_32k_ENC];
399 : float bin_nrgR[STEREO_DFT_N_32k_ENC];
400 : float DFT[CPE_CHANNELS][STEREO_DFT_N_MAX_ENC];
401 : STEREO_MDCT_ENC_DATA_HANDLE hStereoMdct;
402 : int16_t dft_ovl;
403 :
404 357385 : if ( hCPE->hStereoMdct != NULL && hCPE->hStereoMdct->hItd != NULL )
405 : {
406 14690 : hStereoMdct = hCPE->hStereoMdct;
407 :
408 14690 : hCPE->hStereoMdct->hItd->td_itd_32k[STEREO_DFT_OFFSET] = 0;
409 14690 : hStereoMdct->hItd->td_itd[STEREO_DFT_OFFSET] = 0;
410 :
411 : /* Update the parameters */
412 29380 : for ( i = 0; i < STEREO_DFT_OFFSET; i++ )
413 : {
414 14690 : hStereoMdct->hItd->deltaItd[i] = hStereoMdct->hItd->deltaItd[i + 1];
415 14690 : hStereoMdct->hItd->td_itd[i] = hStereoMdct->hItd->td_itd[i + 1];
416 14690 : hStereoMdct->hItd->td_itd_32k[i] = hStereoMdct->hItd->td_itd_32k[i + 1];
417 : }
418 :
419 14690 : stereo_dft_enc_analyze( hCPE->hCoreCoder, CPE_CHANNELS, input_frame, NULL, hStereoMdct, DFT, hCPE->input_mem );
420 :
421 : /*call ITD function*/
422 14690 : stereo_dft_enc_compute_itd( hCPE, DFT[0], DFT[1], STEREO_DFT_OFFSET, input_frame, vad_flag_dtx, vad_hover_flag, bin_nrgL, bin_nrgR );
423 :
424 : /* Time Domain ITD compensation using extrapolation */
425 : #ifdef DEBUG_MODE_DFT
426 : stereo_td_itd( hStereoMdct->hItd, NULL, 1, 1, hStereoMdct->hDft_ana->dft_ovl, hCPE->hCoreCoder, input_frame, hCPE->input_mem );
427 : #else
428 14690 : stereo_td_itd( hStereoMdct->hItd, NULL, 1, hStereoMdct->hDft_ana->dft_ovl, hCPE->hCoreCoder, input_frame, hCPE->input_mem );
429 : #endif
430 : }
431 342695 : else if ( hCPE->input_mem[0] != NULL )
432 : {
433 20942 : dft_ovl = STEREO_DFT_OVL_MAX * input_frame / L_FRAME48k;
434 :
435 62826 : for ( i = 0; i < CPE_CHANNELS; i++ )
436 : {
437 41884 : mvr2r( hCPE->hCoreCoder[i]->input + input_frame - dft_ovl, hCPE->input_mem[i], dft_ovl );
438 : }
439 : }
440 :
441 357385 : return;
442 : }
|