Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #include <math.h>
37 : #include "prot.h"
38 : #include "cnst.h"
39 : #include "stat_com.h"
40 : #include "ivas_prot.h"
41 : #include "ivas_stat_dec.h"
42 : #ifdef DEBUGGING
43 : #include "debug.h"
44 : #endif
45 : #include "wmc_auto.h"
46 :
47 :
48 : /*-------------------------------------------------------------------------
49 : * Local function prototypes
50 : *-------------------------------------------------------------------------*/
51 :
52 : static void apply_dmx_weights( CPE_DEC_HANDLE hCPE, float *x[CPE_CHANNELS][NB_DIV], int16_t transform_type_left[NB_DIV], int16_t transform_type_right[NB_DIV] );
53 :
54 : static void run_min_stats( Decoder_State **sts, float *x[CPE_CHANNELS][NB_DIV] );
55 :
56 :
57 : /*-------------------------------------------------------------------*
58 : * convert_coeffs_to_higher_res()
59 : *
60 : * convert MDCT coefficients to higher frequency resolution
61 : * by applying high- and lowpass filters to subdivide bins
62 : *-------------------------------------------------------------------*/
63 :
64 3489 : void convert_coeffs_to_higher_res(
65 : const float *in1, /* i : first subframe input */
66 : const float *in2, /* i : second subframe input */
67 : float *out, /* o : converted output */
68 : const int16_t len /* i : length of subframes */
69 : )
70 : {
71 : int16_t i;
72 : float tmp1, tmp2;
73 :
74 3489 : if ( in1 == out )
75 : {
76 280218 : for ( i = 0; i < len; i += 2 )
77 : {
78 279000 : tmp1 = 0.5f * ( in2[i] + in1[i] );
79 279000 : tmp2 = 0.5f * ( in2[i] - in1[i] );
80 279000 : out[2 * i] = tmp1;
81 279000 : out[2 * i + 1] = tmp2;
82 279000 : tmp1 = 0.5f * ( in2[i] - in1[i] );
83 279000 : tmp2 = 0.5f * ( in2[i] + in1[i] );
84 279000 : out[2 * i + 2] = tmp1;
85 279000 : out[2 * i + 3] = tmp2;
86 : }
87 : }
88 : else
89 : {
90 313251 : for ( i = 0; i < len; i += 2 )
91 : {
92 310980 : out[2 * i] = 0.5f * ( in2[i] + in1[i] );
93 310980 : out[2 * i + 1] = 0.5f * ( in2[i] - in1[i] );
94 310980 : out[2 * i + 2] = 0.5f * ( in2[i + 1] - in1[i + 1] );
95 310980 : out[2 * i + 3] = 0.5f * ( in2[i + 1] + in1[i + 1] );
96 : }
97 : }
98 :
99 3489 : return;
100 : }
101 :
102 :
103 : /*-------------------------------------------------------------------*
104 : * stereo_mdct_dec_stereo()
105 : *
106 : * decode core and MDCT stereo information
107 : *-------------------------------------------------------------------*/
108 :
109 240156 : static void stereo_mdct_dec_stereo(
110 : CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
111 : int16_t ms_mask[2][MAX_SFB] /* o : bandwise MS mask */
112 : )
113 : {
114 : int16_t availableBits;
115 : Decoder_State **sts;
116 :
117 240156 : sts = hCPE->hCoreCoder;
118 :
119 240156 : parse_stereo_from_bitstream( hCPE->hStereoMdct, hCPE->hCoreCoder, 0, hCPE->hStereoMdct->isSBAStereoMode, hCPE->hCoreCoder[0], ms_mask );
120 :
121 : /*Split available bits between channels */
122 240156 : availableBits = sts[0]->bits_frame_channel + sts[1]->bits_frame_channel - sts[0]->next_bit_pos - sts[0]->core * ( NF_GAIN_BITS + SMDCT_MINIMUM_ARITH_BITS ) - sts[1]->core * ( NF_GAIN_BITS + SMDCT_MINIMUM_ARITH_BITS );
123 :
124 240156 : splitAvailableBits( availableBits, hCPE->hStereoMdct->split_ratio, hCPE->hStereoMdct->isSBAStereoMode, &sts[0]->bits_frame_channel, &sts[1]->bits_frame_channel );
125 :
126 240156 : sts[0]->bits_frame_channel += sts[0]->core * SMDCT_MINIMUM_ARITH_BITS;
127 240156 : sts[1]->bits_frame_channel += sts[1]->core * SMDCT_MINIMUM_ARITH_BITS;
128 :
129 240156 : sts[1]->bit_stream = &sts[0]->bit_stream[sts[0]->next_bit_pos + sts[0]->bits_frame_channel + sts[0]->core * NF_GAIN_BITS];
130 :
131 240156 : return;
132 : }
133 :
134 :
135 : /*-------------------------------------------------------------------*
136 : * stereo_mdct_core_dec()
137 : *
138 : * MDCT stereo core/stereo decoder
139 : *--------------------------------------------------------------------*/
140 :
141 247263 : void stereo_mdct_core_dec(
142 : Decoder_Struct *st_ivas, /* i/o: IVAS decoder structure */
143 : CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
144 : float *signal_out[CPE_CHANNELS], /* o : synthesis @internal_FS */
145 : float signal_outFB[CPE_CHANNELS][L_FRAME48k] /* o : synthesis @output_FS */
146 : )
147 : {
148 : int16_t k, ch, nChannels;
149 : Decoder_State *st, **sts;
150 :
151 : /* bitstream */
152 : int16_t param[CPE_CHANNELS][DEC_NPRM_DIV * NB_DIV];
153 : int16_t param_lpc[CPE_CHANNELS][NPRM_LPC_NEW];
154 :
155 : float Aq[CPE_CHANNELS][( NB_SUBFR16k + 1 ) * ( M + 1 )];
156 : float *x[CPE_CHANNELS][NB_DIV];
157 :
158 : float *x_0[CPE_CHANNELS][NB_DIV];
159 :
160 : /* Concealment */
161 : int16_t bfi;
162 :
163 : /* Framing */
164 : int16_t L_frame[CPE_CHANNELS], L_frameTCX[CPE_CHANNELS], nSubframes[CPE_CHANNELS];
165 :
166 : /* TCX */
167 : int16_t fUseTns[CPE_CHANNELS][NB_DIV];
168 : STnsData tnsData[CPE_CHANNELS][NB_DIV];
169 : int16_t tcx_offset[CPE_CHANNELS];
170 : int16_t tcx_offsetFB[CPE_CHANNELS];
171 : int16_t left_rect[CPE_CHANNELS];
172 : int16_t L_spec[CPE_CHANNELS];
173 :
174 : /* stereo */
175 : int16_t ms_mask[NB_DIV][MAX_SFB];
176 :
177 : int16_t p_param[CPE_CHANNELS][NB_DIV];
178 : int16_t nTnsBitsTCX10[CPE_CHANNELS][NB_DIV];
179 :
180 : float signal_outFB_tmp[CPE_CHANNELS][L_FRAME_PLUS];
181 : float signal_out_tmp[CPE_CHANNELS][L_FRAME_PLUS];
182 247263 : push_wmops( "stereo_mdct_core_dec" );
183 :
184 : /*--------------------------------------------------------------------------------*
185 : * Initializations
186 : *--------------------------------------------------------------------------------*/
187 :
188 247263 : sts = hCPE->hCoreCoder;
189 247263 : st = NULL;
190 :
191 247263 : nChannels = CPE_CHANNELS;
192 247263 : bfi = sts[0]->bfi;
193 :
194 741789 : for ( ch = 0; ch < nChannels; ch++ )
195 : {
196 : /* Initialization or re-configuration of Stereo TCX */
197 494526 : stereo_tcx_init_dec( sts[ch], 0, hCPE->last_element_mode );
198 494526 : sts[ch]->hTcxDec->tnsActive[0] = sts[ch]->hTcxDec->tnsActive[1] = 0;
199 494526 : sts[ch]->enablePlcWaveadjust = 0;
200 494526 : set_zero( signal_out_tmp[ch], L_FRAME_PLUS );
201 494526 : x[ch][0] = &signal_out_tmp[ch][0];
202 494526 : x[ch][1] = &signal_out_tmp[ch][0] + L_FRAME_PLUS / 2;
203 :
204 494526 : set_zero( signal_outFB_tmp[ch], N_MAX ); /* length of N_MAX is needed to prevent stereo switching crash -> reuse buffer signal_outFB_tmp[][] */
205 494526 : x_0[ch][0] = &signal_outFB_tmp[ch][0];
206 494526 : x_0[ch][1] = &signal_outFB_tmp[ch][0] + L_FRAME48k / 2;
207 494526 : nTnsBitsTCX10[ch][0] = 0;
208 494526 : nTnsBitsTCX10[ch][1] = 0;
209 :
210 494526 : set_s( param_lpc[ch], 0, NPRM_LPC_NEW );
211 : }
212 247263 : set_s( ms_mask[0], 0, MAX_SFB );
213 247263 : set_s( ms_mask[1], 0, MAX_SFB );
214 :
215 247263 : initMdctStereoDecData( hCPE->hStereoMdct, sts[0]->igf, sts[0]->hIGFDec->igfData.igfInfo.grid, hCPE->element_brate, sts[0]->bwidth );
216 247263 : hCPE->hStereoMdct->isSBAStereoMode = ( ( st_ivas->ivas_format == SBA_FORMAT || st_ivas->ivas_format == SBA_ISM_FORMAT ) && ( st_ivas->nchan_transport == 2 ) );
217 247263 : if ( !bfi )
218 : {
219 240156 : ivas_mdct_dec_side_bits_frame_channel( hCPE, param_lpc, p_param, hCPE->hCoreCoder[0], nTnsBitsTCX10, param, 0, 0 );
220 :
221 240156 : if ( sts[0]->igf )
222 : {
223 573435 : for ( ch = 0; ch < nChannels; ch++ )
224 : {
225 382290 : st = sts[ch];
226 382290 : mdct_read_IGF_bits( st, sts[0] );
227 : }
228 : }
229 :
230 240156 : stereo_mdct_dec_stereo( hCPE, ms_mask );
231 : }
232 : else
233 : {
234 7107 : mvs2s( hCPE->hStereoMdct->prev_ms_mask[0], ms_mask[0], MAX_SFB );
235 7107 : mvs2s( hCPE->hStereoMdct->prev_ms_mask[1], ms_mask[1], MAX_SFB );
236 :
237 7107 : if ( sts[0]->core != TCX_10_CORE && sts[1]->core != TCX_10_CORE )
238 : {
239 7017 : hCPE->hStereoMdct->mdct_stereo_mode[1] = SMDCT_DUAL_MONO;
240 7017 : hCPE->hStereoMdct->IGFStereoMode[1] = SMDCT_DUAL_MONO;
241 : }
242 90 : else if ( sts[0]->core == TCX_10_CORE && sts[1]->core == TCX_10_CORE )
243 : {
244 78 : hCPE->hStereoMdct->global_ild[0] = hCPE->hStereoMdct->global_ild[1];
245 78 : hCPE->hStereoMdct->mdct_stereo_mode[0] = hCPE->hStereoMdct->mdct_stereo_mode[1];
246 78 : hCPE->hStereoMdct->IGFStereoMode[0] = hCPE->hStereoMdct->IGFStereoMode[1];
247 : }
248 : }
249 :
250 247263 : ivas_mdct_core_invQ( hCPE, nTnsBitsTCX10, p_param, param_lpc, param, fUseTns, tnsData, x_0, x, Aq, ms_mask, 0 );
251 :
252 741789 : for ( ch = 0; ch < nChannels; ch++ )
253 : {
254 494526 : nSubframes[ch] = ( sts[ch]->core == TCX_10_CORE ) ? NB_DIV : 1;
255 1001373 : for ( k = 0; k < nSubframes[ch]; k++ )
256 : {
257 506847 : L_spec[ch] = sts[ch]->hTcxCfg->tcx_coded_lines / nSubframes[ch];
258 :
259 506847 : init_tcx_info( sts[ch], sts[ch]->L_frame / nSubframes[ch], sts[ch]->hTcxDec->L_frameTCX / nSubframes[ch], k, bfi, &tcx_offset[ch], &tcx_offsetFB[ch], &L_frame[ch], &L_frameTCX[ch], &left_rect[ch], &L_spec[ch] );
260 : }
261 : }
262 :
263 : /* IGF decoding */
264 247263 : if ( sts[0]->igf || sts[1]->igf )
265 : {
266 195645 : if ( sts[0]->core != sts[1]->core )
267 : {
268 5571 : for ( ch = 0; ch < nChannels; ch++ )
269 : {
270 3714 : st = sts[ch];
271 3714 : if ( !bfi || ( bfi && st->core != ACELP_CORE ) )
272 : {
273 8136 : for ( k = 0; k < nSubframes[ch]; k++ )
274 : {
275 : /* mono or dual mono IGF decoding */
276 4806 : decoder_tcx_IGF_mono( st, x[ch][k], L_frame[ch], left_rect[ch], bfi, k );
277 : }
278 : }
279 : }
280 : }
281 193788 : else if ( sts[0]->core != ACELP_CORE )
282 : {
283 192900 : assert( nSubframes[0] == nSubframes[1] );
284 :
285 390057 : for ( k = 0; k < nSubframes[0]; k++ )
286 : {
287 197157 : if ( ( hCPE->hStereoMdct->IGFStereoMode[k] != SMDCT_DUAL_MONO || hCPE->hStereoMdct->mdct_stereo_mode[k] != SMDCT_DUAL_MONO ) && !hCPE->hStereoMdct->isSBAStereoMode )
288 : {
289 83565 : assert( ( sts[0]->core == sts[1]->core ) || ( hCPE->hStereoMdct->mdct_stereo_mode[0] == SMDCT_DUAL_MONO ) );
290 :
291 : /* stereo IGF decoding */
292 83565 : decoder_tcx_IGF_stereo( sts, hCPE->hStereoMdct, ms_mask, x, L_frame[0], left_rect[0], k, bfi, 0 /* MCT_flag */ );
293 : }
294 : else
295 : {
296 340776 : for ( ch = 0; ch < nChannels; ch++ )
297 : {
298 227184 : st = sts[ch];
299 : /* mono or dual mono IGF decoding */
300 227184 : decoder_tcx_IGF_mono( st, x[ch][k], L_frame[ch], left_rect[ch], bfi, k );
301 : }
302 : }
303 : }
304 : }
305 : }
306 :
307 : /*--------------------------------------------------------------------------------*
308 : * Stereo processing
309 : *--------------------------------------------------------------------------------*/
310 :
311 247263 : if ( !bfi )
312 : {
313 720468 : for ( ch = 0; ch < CPE_CHANNELS; ch++ )
314 : {
315 480312 : st = sts[ch];
316 972795 : for ( k = 0; k < nSubframes[ch]; k++ )
317 : {
318 : float sns_int_scf[FDNS_NPTS];
319 :
320 492483 : sns_interpolate_scalefactors( &sns_int_scf[0], &Aq[ch][k * M], DEC );
321 :
322 492483 : if ( st->hTonalMDCTConc != NULL && ( ( k + 1 ) == nSubframes[ch] ) )
323 : {
324 480312 : TonalMDCTConceal_SaveFreqSignal( st->hTonalMDCTConc, x[ch][k], L_frameTCX[ch], L_frame[ch], &sns_int_scf[0], get_igf_startline( st, L_frame[ch], L_frameTCX[ch] ) );
325 : }
326 : }
327 :
328 480312 : TonalMDCTConceal_UpdateState( st->hTonalMDCTConc, L_frameTCX[ch], ( st->hTcxDec->tcxltp_last_gain_unmodified > 0 ) ? st->old_fpitch : 0, bfi, bfi && st->tonal_mdct_plc_active );
329 : }
330 :
331 240156 : mvs2s( ms_mask[0], hCPE->hStereoMdct->prev_ms_mask[0], MAX_SFB );
332 240156 : mvs2s( ms_mask[1], hCPE->hStereoMdct->prev_ms_mask[1], MAX_SFB );
333 : }
334 :
335 247263 : if ( ( !bfi || !( sts[0]->core == ACELP_CORE && sts[1]->core == ACELP_CORE ) ) && !hCPE->hStereoMdct->isSBAStereoMode )
336 : {
337 : #ifdef DEBUGGING
338 : assert( ( sts[0]->core == sts[1]->core ) || ( ( hCPE->hStereoMdct->mdct_stereo_mode[0] == SMDCT_DUAL_MONO ) && ( hCPE->hStereoMdct->mdct_stereo_mode[1] == SMDCT_DUAL_MONO ) ) );
339 : #endif
340 127926 : stereo_decoder_tcx( hCPE->hStereoMdct, ms_mask, x_0[1], x[0], x[1], &hCPE->hStereoMdct->mdct_stereo_mode[0], sts[0]->core, sts[1]->core, sts[0]->igf, L_frameTCX[0], L_frameTCX[1], 0, sts[0]->last_core, sts[1]->last_core, 0 );
341 : }
342 :
343 247263 : ivas_mdct_core_tns_ns( hCPE, fUseTns, tnsData, x, Aq, 0 );
344 :
345 247263 : if ( st_ivas->renderer_type == RENDERER_MC_PARAMMC && ( st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_MONO || st_ivas->hDecoderConfig->output_config == IVAS_AUDIO_CONFIG_STEREO ) )
346 : {
347 0 : ivas_ls_setup_conversion_process_mdct_param_mc( st_ivas, x );
348 : }
349 :
350 247263 : run_min_stats( sts, x );
351 :
352 247263 : if ( hCPE->nchan_out == 1 && ( !bfi || ( bfi && sts[0]->core != ACELP_CORE && sts[1]->core != ACELP_CORE ) ) )
353 : {
354 17598 : apply_dmx_weights( hCPE, x, sts[0]->transform_type, sts[1]->transform_type );
355 : }
356 :
357 247263 : ivas_mdct_core_reconstruct( hCPE, x, signal_outFB_tmp, fUseTns, 0 );
358 :
359 247263 : mvr2r( signal_out_tmp[0], signal_out[0], L_FRAME48k );
360 247263 : mvr2r( signal_out_tmp[1], signal_out[1], L_FRAME48k );
361 :
362 247263 : mvr2r( signal_outFB_tmp[0], signal_outFB[0], hCPE->hCoreCoder[0]->hTcxDec->L_frameTCX );
363 247263 : mvr2r( signal_outFB_tmp[1], signal_outFB[1], hCPE->hCoreCoder[1]->hTcxDec->L_frameTCX );
364 :
365 247263 : pop_wmops();
366 247263 : return;
367 : }
368 :
369 :
370 : /*-------------------------------------------------------------------*
371 : * apply_dmx_weights()
372 : *
373 : * apply bandwise weighting for later dmx in case of mono output
374 : *--------------------------------------------------------------------*/
375 :
376 17598 : static void apply_dmx_weights(
377 : CPE_DEC_HANDLE hCPE, /* i/o: CPE decoder structure */
378 : float *x[CPE_CHANNELS][NB_DIV], /* i/o: MDCT Spectrum */
379 : int16_t transform_type_left[NB_DIV], /* i : indicate TCX5 for left ch */
380 : int16_t transform_type_right[NB_DIV] /* i : indicate TCX5 for right ch */
381 : )
382 : {
383 : int16_t b, k, l, i, ch;
384 : int16_t nsub, nsub2[2], nChannels;
385 : int16_t transform_type[2][2];
386 : int16_t numCoeffs[2], frameSize;
387 : int16_t tcx_10_only, w_idx;
388 : int16_t start, stop, start_tcx5, stop_tcx5;
389 17598 : STEREO_MDCT_BAND_PARAMETERS *sfbConf = NULL;
390 : float mdst[CPE_CHANNELS][NB_DIV][L_FRAME48k];
391 : float convertRes[CPE_CHANNELS][L_FRAME48k];
392 : float *sig[CPE_CHANNELS][NB_DIV], *pTmp[CPE_CHANNELS][NB_DIV];
393 :
394 17598 : nChannels = CPE_CHANNELS;
395 17598 : frameSize = hCPE->hStereoMdct->stbParamsTCX20.sfbOffset[hCPE->hStereoMdct->stbParamsTCX20.sfbCnt];
396 :
397 17598 : transform_type[0][0] = transform_type_left[0];
398 17598 : transform_type[0][1] = transform_type_left[1];
399 17598 : transform_type[1][0] = transform_type_right[0];
400 17598 : transform_type[1][1] = transform_type_right[1];
401 :
402 : /* set overall frequency resolution of (sub)frame to maximum of (sub)frame, requires conversion if both channels are not the same */
403 17598 : if ( transform_type[0][0] == TCX_20 || transform_type[1][0] == TCX_20 )
404 : {
405 : /* use TCX20 band config for TCX20 in both channels and mixed frames */
406 17247 : sfbConf = &hCPE->hStereoMdct->stbParamsTCX20;
407 17247 : nsub = nsub2[0] = nsub2[1] = 1; /* overall TCX 20 */
408 17247 : tcx_10_only = 0;
409 : }
410 : else
411 : {
412 : /* use TCX10 band config only if none of the channels is TCX20 */
413 351 : sfbConf = &hCPE->hStereoMdct->stbParamsTCX10;
414 351 : nsub = 2;
415 : /* set resolution per subframe, subdivide again if subframe is TCX5 in both channels */
416 351 : nsub2[0] = ( transform_type[0][0] == TCX_5 && transform_type[1][0] == TCX_5 ) ? 2 : 1;
417 351 : nsub2[1] = ( transform_type[0][1] == TCX_5 && transform_type[1][1] == TCX_5 ) ? 2 : 1;
418 351 : tcx_10_only = 1;
419 : }
420 :
421 : /* for subframes with only TCX5 in both channels number of coefficients is only half (in 2 quarterframes) */
422 17598 : numCoeffs[0] = ( nsub2[0] == 1 ) ? sfbConf->sfbOffset[sfbConf->sfbCnt] : sfbConf->sfbOffset[sfbConf->sfbCnt] / 2;
423 17598 : numCoeffs[1] = ( nsub2[1] == 1 ) ? sfbConf->sfbOffset[sfbConf->sfbCnt] : sfbConf->sfbOffset[sfbConf->sfbCnt] / 2;
424 :
425 : /* initially, set pointers to input; if conversion occurs in (sub)frame, set to convertRes */
426 17598 : sig[0][0] = pTmp[0][0] = x[0][0];
427 17598 : sig[0][1] = pTmp[0][1] = x[0][1];
428 17598 : sig[1][0] = pTmp[1][0] = x[1][0];
429 17598 : sig[1][1] = pTmp[1][1] = x[1][1];
430 :
431 : /* convert (sub)frames to higher frequency resolution */
432 52794 : for ( ch = 0; ch < nChannels; ch++ )
433 : {
434 105588 : for ( k = 0; k < NB_DIV; k++ )
435 : {
436 70392 : if ( transform_type[ch][k] == TCX_5 && nsub2[k] == 1 )
437 : {
438 : /* subframe is TCX5, but TCX10 or TCX20 in other channel -> convert channel with TCX5 to TCX10 resolution */
439 174 : pTmp[ch][k] = sig[ch][k] = convertRes[ch] + k * frameSize / 2;
440 174 : convert_coeffs_to_higher_res( x[ch][k], x[ch][k] + frameSize / 4, pTmp[ch][k], frameSize / 4 );
441 : }
442 : }
443 :
444 35196 : if ( transform_type[ch][0] != TCX_20 && nsub == 1 )
445 : {
446 : /* TCX20 and TCX10 in same frame -> convert channel with TCX10 to TCX20 resolution */
447 156 : sig[ch][0] = convertRes[ch];
448 156 : convert_coeffs_to_higher_res( pTmp[ch][0], pTmp[ch][1], sig[ch][0], frameSize / 2 );
449 : }
450 : }
451 :
452 : /* MDST estimate */
453 52794 : for ( ch = 0; ch < nChannels; ch++ )
454 : {
455 71094 : for ( k = 0; k < nsub; k++ )
456 : {
457 72564 : for ( l = 0; l < nsub2[k]; l++ )
458 : {
459 36666 : mdst[ch][k][l * numCoeffs[k]] = mdst[ch][k][( l + 1 ) * numCoeffs[k] - 1] = 0.f;
460 24295494 : for ( i = l * numCoeffs[k] + 1; i < ( l + 1 ) * numCoeffs[k] - 1; i++ )
461 : {
462 24258828 : mdst[ch][k][i] = sig[ch][k][i + 1] - sig[ch][k][i - 1];
463 : }
464 : }
465 : }
466 : }
467 :
468 : /* compute and apply bandwise weigths for active downmix (similar to DFT Stereo) */
469 17598 : stop_tcx5 = 0;
470 694749 : for ( b = 0; b < sfbConf->sfbCnt; b++ )
471 : {
472 : float w[CPE_CHANNELS][4];
473 :
474 1363080 : for ( k = 0; k < nsub; k++ )
475 : {
476 1381383 : for ( l = 0; l < nsub2[k]; l++ )
477 : {
478 695454 : float sum_nrg_L = EPSILON, sum_nrg_R = EPSILON;
479 695454 : float dot_prod_real = EPSILON, dot_prod_imag = EPSILON;
480 : float sum_nrg_Mid, sum_abs, dot_prod_abs;
481 :
482 695454 : start = l * numCoeffs[k] + sfbConf->sfbOffset[b] / nsub2[k];
483 695454 : stop = l * numCoeffs[k] + sfbConf->sfbOffset[b + 1] / nsub2[k];
484 :
485 : /* compute band energies and cross correlation */
486 12861534 : for ( i = start; i < stop; i++ )
487 : {
488 12166080 : sum_nrg_L += sig[0][k][i] * sig[0][k][i] + mdst[0][k][i] * mdst[0][k][i];
489 12166080 : sum_nrg_R += sig[1][k][i] * sig[1][k][i] + mdst[1][k][i] * mdst[1][k][i];
490 12166080 : dot_prod_real += sig[0][k][i] * sig[1][k][i] + mdst[0][k][i] * mdst[1][k][i];
491 12166080 : dot_prod_imag += mdst[0][k][i] * sig[1][k][i] - sig[0][k][i] * mdst[1][k][i];
492 : }
493 695454 : sum_nrg_Mid = max( 0.f, sum_nrg_L + sum_nrg_R + 2.f * dot_prod_real );
494 695454 : sum_abs = sqrtf( sum_nrg_L ) + sqrtf( sum_nrg_R ) + EPSILON;
495 695454 : dot_prod_abs = sqrtf( dot_prod_real * dot_prod_real + dot_prod_imag * dot_prod_imag );
496 :
497 : /* calculate weights */
498 695454 : if ( hCPE->hStereoMdct->reverse_dmx == 0 )
499 : {
500 419403 : w[1][2 * k + l] = sqrtf( 0.5f * ( sum_nrg_L + sum_nrg_R ) + dot_prod_abs ) / sum_abs;
501 419403 : w[0][2 * k + l] = w[1][2 * k + l] + sqrtf( 2.f ) * ( 1.f - sqrtf( sum_nrg_Mid ) / sum_abs );
502 : }
503 : else
504 : {
505 276051 : w[0][2 * k + l] = sqrtf( 0.5f * ( sum_nrg_L + sum_nrg_R ) + dot_prod_abs ) / sum_abs;
506 276051 : w[1][2 * k + l] = w[0][2 * k + l] + sqrtf( 2.f ) * ( 1.f - sqrtf( sum_nrg_Mid ) / sum_abs );
507 : }
508 : }
509 : }
510 :
511 : /* apply weights to channels with their original frequency resolutions */
512 2031453 : for ( ch = 0; ch < CPE_CHANNELS; ch++ )
513 : {
514 1354302 : if ( transform_type[ch][0] == TCX_20 )
515 : {
516 25081140 : for ( i = sfbConf->sfbOffset[b]; i < sfbConf->sfbOffset[b + 1]; i++ )
517 : {
518 23750400 : x[ch][0][i] *= w[ch][0];
519 : }
520 : }
521 : else
522 : {
523 23562 : start = sfbConf->sfbOffset[b];
524 23562 : stop = sfbConf->sfbOffset[b + 1];
525 23562 : if ( !tcx_10_only ) /* TCX20 band config is used */
526 : {
527 6006 : start /= 2;
528 6006 : stop /= 2;
529 : }
530 :
531 70686 : for ( k = 0; k < NB_DIV; k++ )
532 : {
533 47124 : w_idx = ( nsub == 1 ) ? 0 : 2 * k;
534 47124 : if ( transform_type[ch][k] == TCX_10 )
535 : {
536 287127 : for ( i = start; i < stop; i++ )
537 : {
538 265680 : x[ch][k][i] *= w[ch][w_idx];
539 : }
540 : }
541 : else /* TCX_5 */
542 : {
543 25677 : start_tcx5 = stop_tcx5;
544 25677 : stop_tcx5 = ( stop + 1 ) / 2;
545 :
546 111717 : for ( i = start_tcx5; i < stop_tcx5; i++ )
547 : {
548 86040 : x[ch][k][i] *= w[ch][w_idx];
549 : }
550 :
551 25677 : if ( nsub2[k] == 2 )
552 : {
553 19050 : w_idx++;
554 : }
555 :
556 111717 : for ( i = start_tcx5; i < stop_tcx5; i++ )
557 : {
558 86040 : x[ch][k][i + ( frameSize >> 2 )] *= w[ch][w_idx];
559 : }
560 : }
561 : }
562 : }
563 : }
564 : }
565 :
566 17598 : return;
567 : }
568 :
569 :
570 : /*-------------------------------------------------------------------*
571 : * run_min_stats()
572 : *
573 : * run the minimum statistics noise estimation algorithm
574 : * directly on the MDCT spectrum
575 : *--------------------------------------------------------------------*/
576 :
577 247263 : static void run_min_stats(
578 : Decoder_State **sts,
579 : float *x[CPE_CHANNELS][NB_DIV] /* i/o: MDCT Spectrum */
580 : )
581 : {
582 : int16_t ch, will_estimate_noise_on_channel[CPE_CHANNELS], save_VAD[CPE_CHANNELS];
583 : float power_spec[L_FRAME16k];
584 : float *spec_in;
585 :
586 : /* Check if the minimum statistics would run on the respective channels. They are run on inactive TCX20 channels */
587 247263 : will_estimate_noise_on_channel[0] = sts[0]->core == TCX_20_CORE && !sts[0]->VAD;
588 247263 : will_estimate_noise_on_channel[1] = sts[1]->core == TCX_20_CORE && !sts[1]->VAD;
589 :
590 247263 : save_VAD[0] = sts[0]->VAD;
591 247263 : save_VAD[1] = sts[1]->VAD;
592 :
593 : /* The first loop calculates the power spectra needed in the minimum statistics (MS) noise estimation. This is only needed if the MS
594 : would run at all on at least one of the channels. If they run on both channels, we need to calculate two distinct power spectra
595 : for the two different channels. If they would only run on one of the channels, the VAD of the other one is patched so that the MS will
596 : still run. This other channel then uses the power spectrum of the other channel to run the MS. This is done to keep continuity and synchronicity
597 : between the two noise levels and silently assumes that the background noise is somehow diffuse and at leas partly shared between the channels */
598 741789 : for ( ch = 0; ch < CPE_CHANNELS; ch++ )
599 : {
600 : Decoder_State *st;
601 494526 : st = sts[ch];
602 :
603 494526 : if ( !sts[0]->bfi && ( will_estimate_noise_on_channel[0] || will_estimate_noise_on_channel[1] ) )
604 : {
605 : /* if noise estimation is expected to run on this channel, compute power spectrum from it,
606 : otherwise, use other channel's signal */
607 18372 : if ( will_estimate_noise_on_channel[ch] )
608 : {
609 10938 : spec_in = &x[ch][0][0];
610 : }
611 : else
612 : {
613 7434 : spec_in = &x[( ch + 1 ) % 2][0][0];
614 : /* patch VAD to zero so that estimation runs, will later be restored */
615 7434 : st->VAD = 0;
616 : }
617 :
618 : /* Compute power spectrum twice if estimation will run on both channels. If only on one channel, it is
619 : computed only once (for ch == 0) and not again in the second run sive the outcome will be the same anyway */
620 18372 : if ( ( will_estimate_noise_on_channel[0] == will_estimate_noise_on_channel[1] ) || ch == 0 )
621 : {
622 : float power_spec_scale_fac;
623 :
624 : /* calculate power spectrum from MDCT coefficients and estimated MDST coeffs */
625 10938 : power_spec_scale_fac = 1.f / (float) ( L_FRAME16k * L_FRAME16k );
626 10938 : power_spec[0] = spec_in[0] * spec_in[0] * power_spec_scale_fac;
627 10938 : power_spec[L_FRAME16k - 1] = spec_in[L_FRAME16k - 1] * spec_in[L_FRAME16k - 1] * power_spec_scale_fac;
628 3489222 : for ( int16_t i = 1; i < L_FRAME16k - 1; i++ )
629 : {
630 : float mdst;
631 3478284 : mdst = spec_in[i + 1] - spec_in[i - 1];
632 3478284 : power_spec[i] = power_spec_scale_fac * ( spec_in[i] * spec_in[i] + mdst * mdst );
633 : }
634 : }
635 : }
636 :
637 494526 : if ( st->core == TCX_20_CORE )
638 : {
639 478914 : noisy_speech_detection( st->hFdCngDec, save_VAD[ch] && st->m_frame_type == ACTIVE_FRAME, x[ch][0] );
640 478914 : st->hFdCngDec->hFdCngCom->likelihood_noisy_speech = 0.99f * st->hFdCngDec->hFdCngCom->likelihood_noisy_speech + 0.01f * (float) st->hFdCngDec->hFdCngCom->flag_noisy_speech;
641 478914 : st->lp_noise = st->hFdCngDec->lp_noise;
642 : }
643 :
644 494526 : if ( will_estimate_noise_on_channel[0] || will_estimate_noise_on_channel[1] || st->bfi )
645 : {
646 32586 : ApplyFdCng( NULL, st->bfi ? NULL : power_spec, NULL, NULL, st, st->bfi, 0 );
647 : }
648 :
649 : /* restore VAD (see above) */
650 494526 : st->VAD = save_VAD[ch];
651 : }
652 :
653 247263 : return;
654 : }
|