Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #include <math.h>
37 : #include "prot.h"
38 : #include "cnst.h"
39 : #include "stat_com.h"
40 : #include "ivas_prot.h"
41 : #include "ivas_stat_dec.h"
42 : #ifdef DEBUGGING
43 : #include "debug.h"
44 : #endif
45 : #include "wmc_auto.h"
46 :
47 :
48 : /*-------------------------------------------------------------------------
49 : * Local function prototypes
50 : *-------------------------------------------------------------------------*/
51 :
52 : static void apply_dmx_weights( CPE_DEC_HANDLE hCPE, float *x[CPE_CHANNELS][NB_DIV], int16_t transform_type_left[NB_DIV], int16_t transform_type_right[NB_DIV] );
53 :
54 : static void run_min_stats( Decoder_State **sts, float *x[CPE_CHANNELS][NB_DIV] );
55 :
56 :
57 : /*-------------------------------------------------------------------*
58 : * convert_coeffs_to_higher_res()
59 : *
60 : * convert MDCT coefficients to higher frequency resolution
61 : * by applying high- and lowpass filters to subdivide bins
62 : *-------------------------------------------------------------------*/
63 :
64 3489 : void convert_coeffs_to_higher_res(
65 : const float *in1, /* i : first subframe input */
66 : const float *in2, /* i : second subframe input */
67 : float *out, /* o : converted output */
68 : const int16_t len /* i : length of subframes */
69 : )
70 : {
71 : int16_t i;
72 : float tmp1, tmp2;
73 :
74 3489 : if ( in1 == out )
75 : {
76 280218 : for ( i = 0; i < len; i += 2 )
77 : {
78 279000 : tmp1 = 0.5f * ( in2[i] + in1[i] );
79 279000 : tmp2 = 0.5f * ( in2[i] - in1[i] );
80 279000 : out[2 * i] = tmp1;
81 279000 : out[2 * i + 1] = tmp2;
82 279000 : tmp1 = 0.5f * ( in2[i] - in1[i] );
83 279000 : tmp2 = 0.5f * ( in2[i] + in1[i] );
84 279000 : out[2 * i + 2] = tmp1;
85 279000 : out[2 * i + 3] = tmp2;
86 : }
87 : }
88 : else
89 : {
90 313251 : for ( i = 0; i < len; i += 2 )
91 : {
92 310980 : out[2 * i] = 0.5f * ( in2[i] + in1[i] );
93 310980 : out[2 * i + 1] = 0.5f * ( in2[i] - in1[i] );
94 310980 : out[2 * i + 2] = 0.5f * ( in2[i + 1] - in1[i + 1] );
95 310980 : out[2 * i + 3] = 0.5f * ( in2[i + 1] + in1[i + 1] );
96 : }
97 : }
98 :
99 3489 : return;
100 : }
101 :
102 :
103 : /*-------------------------------------------------------------------*
104 : * stereo_mdct_dec_stereo()
105 : *
106 : * decode core and MDCT stereo information
107 : *-------------------------------------------------------------------*/
108 :
109 240156 : static void stereo_mdct_dec_stereo(
110 : CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
111 : int16_t ms_mask[2][MAX_SFB] /* o : bandwise MS mask */
112 : )
113 : {
114 : int16_t availableBits;
115 : Decoder_State **sts;
116 :
117 240156 : sts = hCPE->hCoreCoder;
118 :
119 240156 : parse_stereo_from_bitstream( hCPE->hStereoMdct, hCPE->hCoreCoder, 0, hCPE->hStereoMdct->isSBAStereoMode, hCPE->hCoreCoder[0], ms_mask );
120 :
121 : /*Split available bits between channels */
122 240156 : availableBits = sts[0]->bits_frame_channel + sts[1]->bits_frame_channel - sts[0]->next_bit_pos - sts[0]->core * ( NF_GAIN_BITS + SMDCT_MINIMUM_ARITH_BITS ) - sts[1]->core * ( NF_GAIN_BITS + SMDCT_MINIMUM_ARITH_BITS );
123 :
124 240156 : splitAvailableBits( availableBits, hCPE->hStereoMdct->split_ratio, hCPE->hStereoMdct->isSBAStereoMode, &sts[0]->bits_frame_channel, &sts[1]->bits_frame_channel );
125 :
126 240156 : sts[0]->bits_frame_channel += sts[0]->core * SMDCT_MINIMUM_ARITH_BITS;
127 240156 : sts[1]->bits_frame_channel += sts[1]->core * SMDCT_MINIMUM_ARITH_BITS;
128 :
129 240156 : sts[1]->bit_stream = &sts[0]->bit_stream[sts[0]->next_bit_pos + sts[0]->bits_frame_channel + sts[0]->core * NF_GAIN_BITS];
130 :
131 240156 : return;
132 : }
133 :
134 :
135 : /*-------------------------------------------------------------------*
136 : * stereo_mdct_core_dec()
137 : *
138 : * MDCT stereo core/stereo decoder
139 : *--------------------------------------------------------------------*/
140 :
141 247263 : void stereo_mdct_core_dec(
142 : Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */
143 : CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
144 : float *signal_out[CPE_CHANNELS], /* o : synthesis @internal_FS */
145 : float signal_outFB[CPE_CHANNELS][L_FRAME48k] /* o : synthesis @output_FS */
146 : )
147 : {
148 : int16_t k, ch, nChannels;
149 : Decoder_State *st, **sts;
150 :
151 : /* bitstream */
152 : int16_t param[CPE_CHANNELS][DEC_NPRM_DIV * NB_DIV];
153 : int16_t param_lpc[CPE_CHANNELS][NPRM_LPC_NEW];
154 :
155 : float Aq[CPE_CHANNELS][( NB_SUBFR16k + 1 ) * ( M + 1 )];
156 : float *x[CPE_CHANNELS][NB_DIV];
157 :
158 : /*needed to allocate N_MAX to prevent stereo switching crash */
159 : #ifndef FIX_1320_STACK_CPE_DECODER
160 : float x_0_buf[CPE_CHANNELS][N_MAX];
161 : #endif
162 : float *x_0[CPE_CHANNELS][NB_DIV];
163 :
164 : /* Concealment */
165 : int16_t bfi;
166 :
167 : /* Framing */
168 : int16_t L_frame[CPE_CHANNELS], L_frameTCX[CPE_CHANNELS], nSubframes[CPE_CHANNELS];
169 :
170 : /* TCX */
171 : int16_t fUseTns[CPE_CHANNELS][NB_DIV];
172 : STnsData tnsData[CPE_CHANNELS][NB_DIV];
173 : int16_t tcx_offset[CPE_CHANNELS];
174 : int16_t tcx_offsetFB[CPE_CHANNELS];
175 : int16_t left_rect[CPE_CHANNELS];
176 : int16_t L_spec[CPE_CHANNELS];
177 :
178 : /* stereo */
179 : int16_t ms_mask[NB_DIV][MAX_SFB];
180 :
181 : int16_t p_param[CPE_CHANNELS][NB_DIV];
182 : int16_t nTnsBitsTCX10[CPE_CHANNELS][NB_DIV];
183 :
184 : float signal_outFB_tmp[CPE_CHANNELS][L_FRAME_PLUS];
185 : float signal_out_tmp[CPE_CHANNELS][L_FRAME_PLUS];
186 247263 : push_wmops( "stereo_mdct_core_dec" );
187 :
188 : /*--------------------------------------------------------------------------------*
189 : * Initializations
190 : *--------------------------------------------------------------------------------*/
191 :
192 247263 : sts = hCPE->hCoreCoder;
193 247263 : st = NULL;
194 :
195 247263 : nChannels = CPE_CHANNELS;
196 247263 : bfi = sts[0]->bfi;
197 :
198 741789 : for ( ch = 0; ch < nChannels; ch++ )
199 : {
200 : /* Initialization or re-configuration of Stereo TCX */
201 494526 : stereo_tcx_init_dec( sts[ch], 0, hCPE->last_element_mode );
202 494526 : sts[ch]->hTcxDec->tnsActive[0] = sts[ch]->hTcxDec->tnsActive[1] = 0;
203 494526 : sts[ch]->enablePlcWaveadjust = 0;
204 494526 : set_zero( signal_out_tmp[ch], L_FRAME_PLUS );
205 494526 : x[ch][0] = &signal_out_tmp[ch][0];
206 494526 : x[ch][1] = &signal_out_tmp[ch][0] + L_FRAME_PLUS / 2;
207 :
208 : #ifdef FIX_1320_STACK_CPE_DECODER
209 494526 : set_zero( signal_outFB_tmp[ch], N_MAX ); /* length of N_MAX is needed to prevent stereo switching crash -> reuse buffer signal_outFB_tmp[][] */
210 494526 : x_0[ch][0] = &signal_outFB_tmp[ch][0];
211 494526 : x_0[ch][1] = &signal_outFB_tmp[ch][0] + L_FRAME48k / 2;
212 : #else
213 : set_zero( x_0_buf[ch], N_MAX );
214 : x_0[ch][0] = &x_0_buf[ch][0];
215 : x_0[ch][1] = &x_0_buf[ch][0] + L_FRAME48k / 2;
216 : #endif
217 494526 : nTnsBitsTCX10[ch][0] = 0;
218 494526 : nTnsBitsTCX10[ch][1] = 0;
219 :
220 494526 : set_s( param_lpc[ch], 0, NPRM_LPC_NEW );
221 : }
222 247263 : set_s( ms_mask[0], 0, MAX_SFB );
223 247263 : set_s( ms_mask[1], 0, MAX_SFB );
224 :
225 247263 : initMdctStereoDecData( hCPE->hStereoMdct, sts[0]->igf, sts[0]->hIGFDec->igfData.igfInfo.grid, hCPE->element_brate, sts[0]->bwidth );
226 247263 : hCPE->hStereoMdct->isSBAStereoMode = ( ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == SBA_ISM_FORMAT ) && ( st_ivas->nchan_transport == 2 ) );
227 247263 : if ( !bfi )
228 : {
229 240156 : ivas_mdct_dec_side_bits_frame_channel( hCPE, param_lpc, p_param, hCPE->hCoreCoder[0], nTnsBitsTCX10, param, 0, 0 );
230 :
231 240156 : if ( sts[0]->igf )
232 : {
233 573435 : for ( ch = 0; ch < nChannels; ch++ )
234 : {
235 382290 : st = sts[ch];
236 382290 : mdct_read_IGF_bits( st, sts[0] );
237 : }
238 : }
239 :
240 240156 : stereo_mdct_dec_stereo( hCPE, ms_mask );
241 : }
242 : else
243 : {
244 7107 : mvs2s( hCPE->hStereoMdct->prev_ms_mask[0], ms_mask[0], MAX_SFB );
245 7107 : mvs2s( hCPE->hStereoMdct->prev_ms_mask[1], ms_mask[1], MAX_SFB );
246 :
247 7107 : if ( sts[0]->core != TCX_10_CORE && sts[1]->core != TCX_10_CORE )
248 : {
249 7017 : hCPE->hStereoMdct->mdct_stereo_mode[1] = SMDCT_DUAL_MONO;
250 7017 : hCPE->hStereoMdct->IGFStereoMode[1] = SMDCT_DUAL_MONO;
251 : }
252 90 : else if ( sts[0]->core == TCX_10_CORE && sts[1]->core == TCX_10_CORE )
253 : {
254 78 : hCPE->hStereoMdct->global_ild[0] = hCPE->hStereoMdct->global_ild[1];
255 78 : hCPE->hStereoMdct->mdct_stereo_mode[0] = hCPE->hStereoMdct->mdct_stereo_mode[1];
256 78 : hCPE->hStereoMdct->IGFStereoMode[0] = hCPE->hStereoMdct->IGFStereoMode[1];
257 : }
258 : }
259 :
260 247263 : ivas_mdct_core_invQ( hCPE, nTnsBitsTCX10, p_param, param_lpc, param, fUseTns, tnsData, x_0, x, Aq, ms_mask, 0 );
261 :
262 741789 : for ( ch = 0; ch < nChannels; ch++ )
263 : {
264 494526 : nSubframes[ch] = ( sts[ch]->core == TCX_10_CORE ) ? NB_DIV : 1;
265 1001373 : for ( k = 0; k < nSubframes[ch]; k++ )
266 : {
267 506847 : L_spec[ch] = sts[ch]->hTcxCfg->tcx_coded_lines / nSubframes[ch];
268 :
269 506847 : init_tcx_info( sts[ch], sts[ch]->L_frame / nSubframes[ch], sts[ch]->hTcxDec->L_frameTCX / nSubframes[ch], k, bfi, &tcx_offset[ch], &tcx_offsetFB[ch], &L_frame[ch], &L_frameTCX[ch], &left_rect[ch], &L_spec[ch] );
270 : }
271 : }
272 :
273 : /* IGF decoding */
274 247263 : if ( sts[0]->igf || sts[1]->igf )
275 : {
276 195645 : if ( sts[0]->core != sts[1]->core )
277 : {
278 5571 : for ( ch = 0; ch < nChannels; ch++ )
279 : {
280 3714 : st = sts[ch];
281 3714 : if ( !bfi || ( bfi && st->core != ACELP_CORE ) )
282 : {
283 8136 : for ( k = 0; k < nSubframes[ch]; k++ )
284 : {
285 : /* mono or dual mono IGF decoding */
286 4806 : decoder_tcx_IGF_mono( st, x[ch][k], L_frame[ch], left_rect[ch], bfi, k );
287 : }
288 : }
289 : }
290 : }
291 193788 : else if ( sts[0]->core != ACELP_CORE )
292 : {
293 192900 : assert( nSubframes[0] == nSubframes[1] );
294 :
295 390057 : for ( k = 0; k < nSubframes[0]; k++ )
296 : {
297 197157 : if ( ( hCPE->hStereoMdct->IGFStereoMode[k] != SMDCT_DUAL_MONO || hCPE->hStereoMdct->mdct_stereo_mode[k] != SMDCT_DUAL_MONO ) && !hCPE->hStereoMdct->isSBAStereoMode )
298 : {
299 83565 : assert( ( sts[0]->core == sts[1]->core ) || ( hCPE->hStereoMdct->mdct_stereo_mode[0] == SMDCT_DUAL_MONO ) );
300 :
301 : /* stereo IGF decoding */
302 83565 : decoder_tcx_IGF_stereo( sts, hCPE->hStereoMdct, ms_mask, x, L_frame[0], left_rect[0], k, bfi, 0 /* MCT_flag */ );
303 : }
304 : else
305 : {
306 340776 : for ( ch = 0; ch < nChannels; ch++ )
307 : {
308 227184 : st = sts[ch];
309 : /* mono or dual mono IGF decoding */
310 227184 : decoder_tcx_IGF_mono( st, x[ch][k], L_frame[ch], left_rect[ch], bfi, k );
311 : }
312 : }
313 : }
314 : }
315 : }
316 :
317 : /*--------------------------------------------------------------------------------*
318 : * Stereo processing
319 : *--------------------------------------------------------------------------------*/
320 :
321 247263 : if ( !bfi )
322 : {
323 720468 : for ( ch = 0; ch < CPE_CHANNELS; ch++ )
324 : {
325 480312 : st = sts[ch];
326 972795 : for ( k = 0; k < nSubframes[ch]; k++ )
327 : {
328 : float sns_int_scf[FDNS_NPTS];
329 :
330 492483 : sns_interpolate_scalefactors( &sns_int_scf[0], &Aq[ch][k * M], DEC );
331 :
332 492483 : if ( st->hTonalMDCTConc != NULL && ( ( k + 1 ) == nSubframes[ch] ) )
333 : {
334 480312 : TonalMDCTConceal_SaveFreqSignal( st->hTonalMDCTConc, x[ch][k], L_frameTCX[ch], L_frame[ch], &sns_int_scf[0], get_igf_startline( st, L_frame[ch], L_frameTCX[ch] ) );
335 : }
336 : }
337 :
338 480312 : TonalMDCTConceal_UpdateState( st->hTonalMDCTConc, L_frameTCX[ch], ( st->hTcxDec->tcxltp_last_gain_unmodified > 0 ) ? st->old_fpitch : 0, bfi, bfi && st->tonal_mdct_plc_active );
339 : }
340 :
341 240156 : mvs2s( ms_mask[0], hCPE->hStereoMdct->prev_ms_mask[0], MAX_SFB );
342 240156 : mvs2s( ms_mask[1], hCPE->hStereoMdct->prev_ms_mask[1], MAX_SFB );
343 : }
344 :
345 247263 : if ( ( !bfi || !( sts[0]->core == ACELP_CORE && sts[1]->core == ACELP_CORE ) ) && !hCPE->hStereoMdct->isSBAStereoMode )
346 : {
347 : #ifdef DEBUGGING
348 : assert( ( sts[0]->core == sts[1]->core ) || ( ( hCPE->hStereoMdct->mdct_stereo_mode[0] == SMDCT_DUAL_MONO ) && ( hCPE->hStereoMdct->mdct_stereo_mode[1] == SMDCT_DUAL_MONO ) ) );
349 : #endif
350 127926 : stereo_decoder_tcx( hCPE->hStereoMdct, ms_mask, x_0[1], x[0], x[1], &hCPE->hStereoMdct->mdct_stereo_mode[0], sts[0]->core, sts[1]->core, sts[0]->igf, L_frameTCX[0], L_frameTCX[1], 0, sts[0]->last_core, sts[1]->last_core, 0 );
351 : }
352 :
353 247263 : ivas_mdct_core_tns_ns( hCPE, fUseTns, tnsData, x, Aq, 0 );
354 :
355 247263 : if ( st_ivas->renderer_type == RENDERER_MC_PARAMMC && ( st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_MONO || st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_STEREO ) )
356 : {
357 0 : ivas_ls_setup_conversion_process_mdct_param_mc( st_ivas, x );
358 : }
359 :
360 247263 : run_min_stats( sts, x );
361 :
362 247263 : if ( hCPE->nchan_out == 1 && ( !bfi || ( bfi && sts[0]->core != ACELP_CORE && sts[1]->core != ACELP_CORE ) ) )
363 : {
364 17598 : apply_dmx_weights( hCPE, x, sts[0]->transform_type, sts[1]->transform_type );
365 : }
366 :
367 247263 : ivas_mdct_core_reconstruct( hCPE, x, signal_outFB_tmp, fUseTns, 0 );
368 :
369 247263 : mvr2r( signal_out_tmp[0], signal_out[0], L_FRAME48k );
370 247263 : mvr2r( signal_out_tmp[1], signal_out[1], L_FRAME48k );
371 :
372 247263 : mvr2r( signal_outFB_tmp[0], signal_outFB[0], hCPE->hCoreCoder[0]->hTcxDec->L_frameTCX );
373 247263 : mvr2r( signal_outFB_tmp[1], signal_outFB[1], hCPE->hCoreCoder[1]->hTcxDec->L_frameTCX );
374 :
375 247263 : pop_wmops();
376 247263 : return;
377 : }
378 :
379 :
380 : /*-------------------------------------------------------------------*
381 : * apply_dmx_weights()
382 : *
383 : * apply bandwise weighting for later dmx in case of mono output
384 : *--------------------------------------------------------------------*/
385 :
386 17598 : static void apply_dmx_weights(
387 : CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
388 : float *x[CPE_CHANNELS][NB_DIV], /* i/o: MDCT Spectrum */
389 : int16_t transform_type_left[NB_DIV], /* i : indicate TCX5 for left ch */
390 : int16_t transform_type_right[NB_DIV] /* i : indicate TCX5 for right ch */
391 : )
392 : {
393 : int16_t b, k, l, i, ch;
394 : int16_t nsub, nsub2[2], nChannels;
395 : int16_t transform_type[2][2];
396 : int16_t numCoeffs[2], frameSize;
397 : int16_t tcx_10_only, w_idx;
398 : int16_t start, stop, start_tcx5, stop_tcx5;
399 17598 : STEREO_MDCT_BAND_PARAMETERS *sfbConf = NULL;
400 : float mdst[CPE_CHANNELS][NB_DIV][L_FRAME48k];
401 : float convertRes[CPE_CHANNELS][L_FRAME48k];
402 : float *sig[CPE_CHANNELS][NB_DIV], *pTmp[CPE_CHANNELS][NB_DIV];
403 :
404 17598 : nChannels = CPE_CHANNELS;
405 17598 : frameSize = hCPE->hStereoMdct->stbParamsTCX20.sfbOffset[hCPE->hStereoMdct->stbParamsTCX20.sfbCnt];
406 :
407 17598 : transform_type[0][0] = transform_type_left[0];
408 17598 : transform_type[0][1] = transform_type_left[1];
409 17598 : transform_type[1][0] = transform_type_right[0];
410 17598 : transform_type[1][1] = transform_type_right[1];
411 :
412 : /* set overall frequency resolution of (sub)frame to maximum of (sub)frame, requires conversion if both channels are not the same */
413 17598 : if ( transform_type[0][0] == TCX_20 || transform_type[1][0] == TCX_20 )
414 : {
415 : /* use TCX20 band config for TCX20 in both channels and mixed frames */
416 17247 : sfbConf = &hCPE->hStereoMdct->stbParamsTCX20;
417 17247 : nsub = nsub2[0] = nsub2[1] = 1; /* overall TCX 20 */
418 17247 : tcx_10_only = 0;
419 : }
420 : else
421 : {
422 : /* use TCX10 band config only if none of the channels is TCX20 */
423 351 : sfbConf = &hCPE->hStereoMdct->stbParamsTCX10;
424 351 : nsub = 2;
425 : /* set resolution per subframe, subdivide again if subframe is TCX5 in both channels */
426 351 : nsub2[0] = ( transform_type[0][0] == TCX_5 && transform_type[1][0] == TCX_5 ) ? 2 : 1;
427 351 : nsub2[1] = ( transform_type[0][1] == TCX_5 && transform_type[1][1] == TCX_5 ) ? 2 : 1;
428 351 : tcx_10_only = 1;
429 : }
430 :
431 : /* for subframes with only TCX5 in both channels number of coefficients is only half (in 2 quarterframes) */
432 17598 : numCoeffs[0] = ( nsub2[0] == 1 ) ? sfbConf->sfbOffset[sfbConf->sfbCnt] : sfbConf->sfbOffset[sfbConf->sfbCnt] / 2;
433 17598 : numCoeffs[1] = ( nsub2[1] == 1 ) ? sfbConf->sfbOffset[sfbConf->sfbCnt] : sfbConf->sfbOffset[sfbConf->sfbCnt] / 2;
434 :
435 : /* initially, set pointers to input; if conversion occurs in (sub)frame, set to convertRes */
436 17598 : sig[0][0] = pTmp[0][0] = x[0][0];
437 17598 : sig[0][1] = pTmp[0][1] = x[0][1];
438 17598 : sig[1][0] = pTmp[1][0] = x[1][0];
439 17598 : sig[1][1] = pTmp[1][1] = x[1][1];
440 :
441 : /* convert (sub)frames to higher frequency resolution */
442 52794 : for ( ch = 0; ch < nChannels; ch++ )
443 : {
444 105588 : for ( k = 0; k < NB_DIV; k++ )
445 : {
446 70392 : if ( transform_type[ch][k] == TCX_5 && nsub2[k] == 1 )
447 : {
448 : /* subframe is TCX5, but TCX10 or TCX20 in other channel -> convert channel with TCX5 to TCX10 resolution */
449 174 : pTmp[ch][k] = sig[ch][k] = convertRes[ch] + k * frameSize / 2;
450 174 : convert_coeffs_to_higher_res( x[ch][k], x[ch][k] + frameSize / 4, pTmp[ch][k], frameSize / 4 );
451 : }
452 : }
453 :
454 35196 : if ( transform_type[ch][0] != TCX_20 && nsub == 1 )
455 : {
456 : /* TCX20 and TCX10 in same frame -> convert channel with TCX10 to TCX20 resolution */
457 156 : sig[ch][0] = convertRes[ch];
458 156 : convert_coeffs_to_higher_res( pTmp[ch][0], pTmp[ch][1], sig[ch][0], frameSize / 2 );
459 : }
460 : }
461 :
462 : /* MDST estimate */
463 52794 : for ( ch = 0; ch < nChannels; ch++ )
464 : {
465 71094 : for ( k = 0; k < nsub; k++ )
466 : {
467 72564 : for ( l = 0; l < nsub2[k]; l++ )
468 : {
469 36666 : mdst[ch][k][l * numCoeffs[k]] = mdst[ch][k][( l + 1 ) * numCoeffs[k] - 1] = 0.f;
470 24295494 : for ( i = l * numCoeffs[k] + 1; i < ( l + 1 ) * numCoeffs[k] - 1; i++ )
471 : {
472 24258828 : mdst[ch][k][i] = sig[ch][k][i + 1] - sig[ch][k][i - 1];
473 : }
474 : }
475 : }
476 : }
477 :
478 : /* compute and apply bandwise weigths for active downmix (similar to DFT Stereo) */
479 17598 : stop_tcx5 = 0;
480 694749 : for ( b = 0; b < sfbConf->sfbCnt; b++ )
481 : {
482 : float w[CPE_CHANNELS][4];
483 :
484 1363080 : for ( k = 0; k < nsub; k++ )
485 : {
486 1381383 : for ( l = 0; l < nsub2[k]; l++ )
487 : {
488 695454 : float sum_nrg_L = EPSILON, sum_nrg_R = EPSILON;
489 695454 : float dot_prod_real = EPSILON, dot_prod_imag = EPSILON;
490 : float sum_nrg_Mid, sum_abs, dot_prod_abs;
491 :
492 695454 : start = l * numCoeffs[k] + sfbConf->sfbOffset[b] / nsub2[k];
493 695454 : stop = l * numCoeffs[k] + sfbConf->sfbOffset[b + 1] / nsub2[k];
494 :
495 : /* compute band energies and cross correlation */
496 12861534 : for ( i = start; i < stop; i++ )
497 : {
498 12166080 : sum_nrg_L += sig[0][k][i] * sig[0][k][i] + mdst[0][k][i] * mdst[0][k][i];
499 12166080 : sum_nrg_R += sig[1][k][i] * sig[1][k][i] + mdst[1][k][i] * mdst[1][k][i];
500 12166080 : dot_prod_real += sig[0][k][i] * sig[1][k][i] + mdst[0][k][i] * mdst[1][k][i];
501 12166080 : dot_prod_imag += mdst[0][k][i] * sig[1][k][i] - sig[0][k][i] * mdst[1][k][i];
502 : }
503 695454 : sum_nrg_Mid = max( 0.f, sum_nrg_L + sum_nrg_R + 2.f * dot_prod_real );
504 695454 : sum_abs = sqrtf( sum_nrg_L ) + sqrtf( sum_nrg_R ) + EPSILON;
505 695454 : dot_prod_abs = sqrtf( dot_prod_real * dot_prod_real + dot_prod_imag * dot_prod_imag );
506 :
507 : /* calculate weights */
508 695454 : if ( hCPE->hStereoMdct->reverse_dmx == 0 )
509 : {
510 419403 : w[1][2 * k + l] = sqrtf( 0.5f * ( sum_nrg_L + sum_nrg_R ) + dot_prod_abs ) / sum_abs;
511 419403 : w[0][2 * k + l] = w[1][2 * k + l] + sqrtf( 2.f ) * ( 1.f - sqrtf( sum_nrg_Mid ) / sum_abs );
512 : }
513 : else
514 : {
515 276051 : w[0][2 * k + l] = sqrtf( 0.5f * ( sum_nrg_L + sum_nrg_R ) + dot_prod_abs ) / sum_abs;
516 276051 : w[1][2 * k + l] = w[0][2 * k + l] + sqrtf( 2.f ) * ( 1.f - sqrtf( sum_nrg_Mid ) / sum_abs );
517 : }
518 : }
519 : }
520 :
521 : /* apply weights to channels with their original frequency resolutions */
522 2031453 : for ( ch = 0; ch < CPE_CHANNELS; ch++ )
523 : {
524 1354302 : if ( transform_type[ch][0] == TCX_20 )
525 : {
526 25081140 : for ( i = sfbConf->sfbOffset[b]; i < sfbConf->sfbOffset[b + 1]; i++ )
527 : {
528 23750400 : x[ch][0][i] *= w[ch][0];
529 : }
530 : }
531 : else
532 : {
533 23562 : start = sfbConf->sfbOffset[b];
534 23562 : stop = sfbConf->sfbOffset[b + 1];
535 23562 : if ( !tcx_10_only ) /* TCX20 band config is used */
536 : {
537 6006 : start /= 2;
538 6006 : stop /= 2;
539 : }
540 :
541 70686 : for ( k = 0; k < NB_DIV; k++ )
542 : {
543 47124 : w_idx = ( nsub == 1 ) ? 0 : 2 * k;
544 47124 : if ( transform_type[ch][k] == TCX_10 )
545 : {
546 287127 : for ( i = start; i < stop; i++ )
547 : {
548 265680 : x[ch][k][i] *= w[ch][w_idx];
549 : }
550 : }
551 : else /* TCX_5 */
552 : {
553 25677 : start_tcx5 = stop_tcx5;
554 25677 : stop_tcx5 = ( stop + 1 ) / 2;
555 :
556 111717 : for ( i = start_tcx5; i < stop_tcx5; i++ )
557 : {
558 86040 : x[ch][k][i] *= w[ch][w_idx];
559 : }
560 :
561 25677 : if ( nsub2[k] == 2 )
562 : {
563 19050 : w_idx++;
564 : }
565 :
566 111717 : for ( i = start_tcx5; i < stop_tcx5; i++ )
567 : {
568 86040 : x[ch][k][i + ( frameSize >> 2 )] *= w[ch][w_idx];
569 : }
570 : }
571 : }
572 : }
573 : }
574 : }
575 :
576 17598 : return;
577 : }
578 :
579 :
580 : /*-------------------------------------------------------------------*
581 : * run_min_stats()
582 : *
583 : * run the minimum statistics noise estimation algorithm
584 : * directly on the MDCT spectrum
585 : *--------------------------------------------------------------------*/
586 :
587 247263 : static void run_min_stats(
588 : Decoder_State **sts,
589 : float *x[CPE_CHANNELS][NB_DIV] /* i/o: MDCT Spectrum */
590 : )
591 : {
592 : int16_t ch, will_estimate_noise_on_channel[CPE_CHANNELS], save_VAD[CPE_CHANNELS];
593 : float power_spec[L_FRAME16k];
594 : float *spec_in;
595 :
596 : /* Check if the minimum statistics would run on the respective channels. They are run on inactive TCX20 channels */
597 247263 : will_estimate_noise_on_channel[0] = sts[0]->core == TCX_20_CORE && !sts[0]->VAD;
598 247263 : will_estimate_noise_on_channel[1] = sts[1]->core == TCX_20_CORE && !sts[1]->VAD;
599 :
600 247263 : save_VAD[0] = sts[0]->VAD;
601 247263 : save_VAD[1] = sts[1]->VAD;
602 :
603 : /* The first loop calculates the power spectra needed in the minimum statistics (MS) noise estimation. This is only needed if the MS
604 : would run at all on at least one of the channels. If they run on both channels, we need to calculate two distinct power spectra
605 : for the two different channels. If they would only run on one of the channels, the VAD of the other one is patched so that the MS will
606 : still run. This other channel then uses the power spectrum of the other channel to run the MS. This is done to keep continuity and synchronicity
607 : between the two noise levels and silently assumes that the background noise is somehow diffuse and at leas partly shared between the channels */
608 741789 : for ( ch = 0; ch < CPE_CHANNELS; ch++ )
609 : {
610 : Decoder_State *st;
611 494526 : st = sts[ch];
612 :
613 494526 : if ( !sts[0]->bfi && ( will_estimate_noise_on_channel[0] || will_estimate_noise_on_channel[1] ) )
614 : {
615 : /* if noise estimation is expected to run on this channel, compute power spectrum from it,
616 : otherwise, use other channel's signal */
617 18372 : if ( will_estimate_noise_on_channel[ch] )
618 : {
619 10938 : spec_in = &x[ch][0][0];
620 : }
621 : else
622 : {
623 7434 : spec_in = &x[( ch + 1 ) % 2][0][0];
624 : /* patch VAD to zero so that estimation runs, will later be restored */
625 7434 : st->VAD = 0;
626 : }
627 :
628 : /* Compute power spectrum twice if estimation will run on both channels. If only on one channel, it is
629 : computed only once (for ch == 0) and not again in the second run sive the outcome will be the same anyway */
630 18372 : if ( ( will_estimate_noise_on_channel[0] == will_estimate_noise_on_channel[1] ) || ch == 0 )
631 : {
632 : float power_spec_scale_fac;
633 :
634 : /* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */
635 10938 : power_spec_scale_fac = 1.f / (float) ( L_FRAME16k * L_FRAME16k );
636 10938 : power_spec[0] = spec_in[0] * spec_in[0] * power_spec_scale_fac;
637 10938 : power_spec[L_FRAME16k - 1] = spec_in[L_FRAME16k - 1] * spec_in[L_FRAME16k - 1] * power_spec_scale_fac;
638 3489222 : for ( int16_t i = 1; i < L_FRAME16k - 1; i++ )
639 : {
640 : float mdst;
641 3478284 : mdst = spec_in[i + 1] - spec_in[i - 1];
642 3478284 : power_spec[i] = power_spec_scale_fac * ( spec_in[i] * spec_in[i] + mdst * mdst );
643 : }
644 : }
645 : }
646 :
647 494526 : if ( st->core == TCX_20_CORE )
648 : {
649 478914 : noisy_speech_detection( st->hFdCngDec, save_VAD[ch] && st->m_frame_type == ACTIVE_FRAME, x[ch][0] );
650 478914 : st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech + 0.01f * (float) st->hFdCngDec->hFdCngCom->flag_noisy_speech;
651 478914 : st->lp_noise = st->hFdCngDec->lp_noise;
652 : }
653 :
654 494526 : if ( will_estimate_noise_on_channel[0] || will_estimate_noise_on_channel[1] || st->bfi )
655 : {
656 32586 : ApplyFdCng( NULL, st->bfi ? NULL : power_spec, NULL, NULL, st, st->bfi, 0 );
657 : }
658 :
659 : /* restore VAD (see above) */
660 494526 : st->VAD = save_VAD[ch];
661 : }
662 :
663 247263 : return;
664 : }
|