Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #ifdef DEBUGGING
37 : #include "debug.h"
38 : #endif
39 : #include <math.h>
40 : #include "cnst.h"
41 : #include "prot.h"
42 : #include "rom_com.h"
43 : #include "basop_proto_func.h"
44 : #include "wmc_auto.h"
45 : #include "ivas_prot.h"
46 :
47 : /*-------------------------------------------------------------------*
48 : * stereo_tcx_init_enc()
49 : *
50 : * Initialize stereo TCX encoder
51 : *-------------------------------------------------------------------*/
52 :
53 2619322 : void stereo_tcx_init_enc(
54 : Encoder_State *st /* i/o: encoder state structure */
55 : )
56 : {
57 : int16_t prev_IsTNSAllowed;
58 2619322 : assert( st->core_brate != SID_2k40 && st->core_brate != FRAME_NO_DATA );
59 :
60 : /* Get the raw coder type from signal analysis*/
61 2619322 : st->coder_type = st->coder_type_raw;
62 2619322 : if ( !st->localVAD )
63 : {
64 410384 : st->coder_type = INACTIVE;
65 : }
66 2208938 : else if ( st->coder_type > GENERIC )
67 : {
68 0 : st->coder_type = GENERIC;
69 : }
70 :
71 2619322 : if ( st->tcxonly )
72 : {
73 2354208 : st->coder_type = GENERIC;
74 : }
75 :
76 2619322 : st->hTcxCfg->coder_type = st->coder_type;
77 2619322 : if ( !st->tcxonly && !st->localVAD && st->hTcxCfg->coder_type == GENERIC )
78 : {
79 0 : st->hTcxCfg->coder_type = UNVOICED;
80 : }
81 :
82 : /*sampling rate*/
83 2619322 : st->sr_core = getCoreSamplerateMode2( st->element_mode, st->bits_frame_nominal * FRAMES_PER_SEC, st->bwidth, st->flag_ACELP16k, st->rf_mode, st->is_ism_format );
84 2619322 : st->fscale = sr2fscale( st->sr_core );
85 :
86 : /*frame size*/
87 2619322 : st->L_frame = (int16_t) ( st->sr_core / FRAMES_PER_SEC );
88 2619322 : st->hTcxEnc->L_frameTCX = (int16_t) ( st->input_Fs / FRAMES_PER_SEC );
89 :
90 2619322 : if ( ( st->L_frame == L_FRAME16k && ( st->bits_frame_nominal * FRAMES_PER_SEC ) <= MAX_ACELP_BRATE ) || ( st->tcxonly && ( st->sr_core == INT_FS_16k || st->sr_core == INT_FS_16k ) ) )
91 : {
92 344952 : st->nb_subfr = NB_SUBFR16k;
93 : }
94 : else
95 : {
96 2274370 : st->nb_subfr = NB_SUBFR;
97 : }
98 :
99 : /*TCX tools*/
100 2619322 : st->hTcxCfg->ctx_hm = getCtxHm( st->element_mode, st->bits_frame_nominal * FRAMES_PER_SEC, st->rf_mode );
101 2619322 : st->hTcxCfg->resq = getResq( st->bits_frame_nominal * FRAMES_PER_SEC );
102 2619322 : st->hTcxEnc->tcx_lpc_shaped_ari = getTcxLpcShapedAri( st->bits_frame_nominal * FRAMES_PER_SEC, st->rf_mode, st->element_mode );
103 2619322 : st->igf = getIgfPresent( st->element_mode, st->bits_frame_nominal * FRAMES_PER_SEC, st->bwidth, st->rf_mode );
104 2619322 : prev_IsTNSAllowed = st->hTcxCfg->fIsTNSAllowed;
105 2619322 : if ( st->element_mode != EVS_MONO )
106 : {
107 2619322 : st->hTcxCfg->fIsTNSAllowed = getTnsAllowed( st->bits_frame_nominal * FRAMES_PER_SEC, st->igf, st->element_mode );
108 : }
109 2619322 : if ( !prev_IsTNSAllowed && st->hTcxCfg->fIsTNSAllowed && st->element_mode == IVAS_CPE_DFT ) /* may happen in unified stereo when switching stereo technologies */
110 : {
111 2 : InitTnsConfigs( st->bwidth, st->hTcxCfg->tcx_coded_lines, st->hTcxCfg->tnsConfig, st->hIGFEnc->infoStopFrequency, st->bits_frame_nominal * FRAMES_PER_SEC, st->element_mode, 0 );
112 :
113 2 : SetAllowTnsOnWhite( st->hTcxCfg->tnsConfig, 0 );
114 : }
115 2619322 : st->core_brate = st->total_brate;
116 :
117 2619322 : return;
118 : }
119 :
120 :
121 : /*-------------------------------------------------------------------*
122 : * stereo_tcx_core_enc()
123 : *
124 : * Stereo TCX encoder
125 : *-------------------------------------------------------------------*/
126 :
127 469572 : void stereo_tcx_core_enc(
128 : Encoder_State *st, /* i/o: encoder state structure */
129 : const float new_samples_12k8[], /* i : buffer of input signal @12.8 kHz */
130 : const float new_samples_16k[], /* i : buffer of input signal @16 kHz */
131 : const float Aw[], /* i : weighted A(z) unquant. for subframes*/
132 : float lsp_new[], /* i : LSPs at the end of the frame */
133 : float lsp_mid[], /* i : LSPs in the middle of the frame */
134 : float pitch_buf[NB_SUBFR16k], /* o : floating pitch for each subframe */
135 : const int16_t last_element_mode, /* i : last element mode */
136 : const int16_t vad_hover_flag /* i : VAD hangover flag */
137 : )
138 : {
139 : TCX_ENC_HANDLE hTcxEnc;
140 : int16_t i, n;
141 :
142 : /*size and windowing*/
143 : const float *p_new_samples;
144 : int16_t n_subframes;
145 : int16_t last_core_orig;
146 :
147 : /*Bits*/
148 : int16_t nbits_start, total_nbbits, nbits_header;
149 : int16_t target_bits[2], bitsAvailable;
150 : int16_t nbits_lpc[2];
151 : int16_t tnsSize[2]; /* number of tns parameters put into prm */
152 : int16_t tnsBits[2]; /* number of tns bits in the frame */
153 : int16_t ltpBits;
154 :
155 : /*Parameters*/
156 : int16_t param_lpc[NPRM_LPC_NEW];
157 : int16_t param_core[2 * NPRM_DIV];
158 : int16_t bits_param_lpc[10], no_param_lpc;
159 :
160 : /*LPC*/
161 : float lsf_q[M], lsp_q[M], lsp[M], lsf[M];
162 : float lspmid_q[M];
163 : float A_q[M + 1];
164 : float gainlpc[2][FDNS_NPTS];
165 : float lsp_tcx_q[M], lsf_tcx_q[M];
166 : int16_t tcx_lpc_cdk;
167 : Word16 A_q_ind[M + 1]; /*for LPC-based AC*/
168 : Word16 lspq_ind[M]; /*for LPC-based AC*/
169 :
170 : /*TCX-LTP*/
171 : int16_t T_op[3];
172 :
173 : /*HM*/
174 : int16_t indexBuffer[2 * ( ( N_MAX / 2 ) + 1 )];
175 : CONTEXT_HM_CONFIG hm_cfg[2];
176 :
177 : /* bitstream */
178 469572 : BSTR_ENC_HANDLE hBstr = st->hBstr;
179 :
180 : #ifdef DEBUG_MODE_TCX
181 : static FILE *pF = NULL;
182 : if ( pF == NULL )
183 : pF = fopen( "./res/stereo_tcx_enc_ind.txt", "w" );
184 : #endif
185 :
186 469572 : push_wmops( "stereo_tcx_core_enc" );
187 :
188 : /*Sanity check*/
189 469572 : assert( st->mdct_sw == MODE1 && "MDCT switching should be in TCX MODE 1\n" );
190 469572 : assert( st->rf_mode == 0 && "Channel aware not supported! " );
191 :
192 469572 : no_param_lpc = 0;
193 469572 : n_subframes = 1;
194 :
195 469572 : hTcxEnc = st->hTcxEnc;
196 :
197 : /*--------------------------------------------------------------*
198 : * Configuration of TCX
199 : *---------------------------------------------------------------*/
200 :
201 469572 : stereo_tcx_init_enc( st );
202 :
203 : /*--------------------------------------------------------------*
204 : * Initialization
205 : *---------------------------------------------------------------*/
206 :
207 : /* Subtract the bits of common header */
208 469572 : st->bits_frame_core = (int16_t) ( st->total_brate / FRAMES_PER_SEC ) - hBstr->nb_bits_tot;
209 :
210 : /*Get Bits of TCX header*/
211 469572 : nbits_header = 3; /* Coder types (2) + last_core for bfi (1) */
212 :
213 469572 : if ( st->tcxonly )
214 : {
215 : /* TCX20/10 flag */
216 204458 : nbits_header++;
217 : }
218 :
219 : /* bits for TCX overlap mode (1 bit: full, 2 bits: half or no overlap) */
220 469572 : nbits_header += ( st->hTcxCfg->tcx_curr_overlap_mode == HALF_OVERLAP || st->hTcxCfg->tcx_curr_overlap_mode == MIN_OVERLAP ) ? 2 : 1;
221 :
222 469572 : hm_cfg[0].indexBuffer = &indexBuffer[0];
223 469572 : hm_cfg[1].indexBuffer = &indexBuffer[N_MAX / 2 + 1];
224 :
225 469572 : set_s( tnsSize, 0, 2 );
226 469572 : set_s( tnsBits, 0, 2 );
227 469572 : set_s( nbits_lpc, 0, 2 );
228 469572 : ltpBits = 0;
229 :
230 1878288 : for ( i = 0; i < 3; i++ )
231 : {
232 1408716 : T_op[i] = st->pitch[i];
233 :
234 : /* check minimum pitch for quantization */
235 1408716 : if ( T_op[i] < PIT_MIN_SHORTER )
236 : {
237 184831 : T_op[i] *= 2;
238 : }
239 :
240 : /* convert pitch values to core sampling-rate */
241 1408716 : if ( st->L_frame != L_FRAME )
242 : {
243 1122312 : T_op[i] = (int16_t) ( T_op[i] * (float) st->L_frame / (float) L_FRAME + 0.5f );
244 : }
245 : }
246 :
247 469572 : if ( st->L_frame == L_FRAME )
248 : {
249 95468 : p_new_samples = new_samples_12k8;
250 : }
251 : else
252 : {
253 374104 : p_new_samples = new_samples_16k;
254 : }
255 :
256 : /*--------------------------------------------------------------*
257 : * TCX20/TCX10 switching decision
258 : *---------------------------------------------------------------*/
259 :
260 469572 : if ( hTcxEnc->tcxMode == TCX_10 )
261 : {
262 3579 : st->core = TCX_10_CORE;
263 3579 : n_subframes = 2;
264 3579 : nbits_header += ( st->hTcxCfg->tcx_last_overlap_mode == HALF_OVERLAP || st->hTcxCfg->tcx_last_overlap_mode == MIN_OVERLAP ) ? 2 : 1;
265 : }
266 465993 : else if ( hTcxEnc->tcxMode == TCX_20 )
267 : {
268 465993 : st->core = TCX_20_CORE;
269 465993 : n_subframes = 1;
270 : }
271 :
272 : #ifdef DEBUG_MODE_TCX
273 : fprintf( pF, "== stereo Chan %d - Nominal Bits %d - Allocated Bits %d ==\n", st->idchan, st->bits_frame_nominal, (int16_t) ( st->total_brate / FRAMES_PER_SEC ) );
274 : fprintf( pF, "stereo Common Header: %d bits\n", hBstr->nb_bits_tot );
275 : #endif
276 469572 : nbits_start = hBstr->nb_bits_tot;
277 :
278 : /*--------------------------------------------------------------------------------*
279 : * Write TCX signaling
280 : *--------------------------------------------------------------------------------*/
281 :
282 : /* TCX20/TCX10 and coder type */
283 469572 : writeTCXMode( st, hBstr, 0, /* MCT_flag */ &nbits_start );
284 :
285 : /* write last_core for error concealment */
286 469572 : push_next_indice( hBstr, ( st->last_core != ACELP_CORE || st->core == TCX_10_CORE ), 1 );
287 :
288 : /* write TCX overlap mode (1 bit: full, 2 bits: half or no overlap) */
289 469572 : writeTCXWindowing( hBstr, st->hTcxCfg->tcx_curr_overlap_mode );
290 469572 : if ( st->core == TCX_10_CORE )
291 : {
292 3579 : writeTCXWindowing( hBstr, st->hTcxCfg->tcx_last_overlap_mode );
293 : }
294 :
295 469572 : assert( nbits_header == ( hBstr->nb_bits_tot - nbits_start ) );
296 : #ifdef DEBUG_MODE_TCX
297 : fprintf( pF, "\t TCX Header: %d bits: %d %d %d %d\n", hBstr->nb_bits_tot - nbits_start, st->tcxonly, st->core, st->tcxonly ? st->clas : st->hTcxCfg->coder_type, st->hTcxCfg->tcx_curr_overlap_mode );
298 : #endif
299 :
300 : /*--------------------------------------------------------------*
301 : * Core Signal Analysis: MDCT, TNS, LPC analysis
302 : *---------------------------------------------------------------*/
303 :
304 469572 : core_signal_analysis_high_bitrate( p_new_samples, T_op, lsp_new, lsp_mid, st, NULL, tnsSize, tnsBits, param_core, <pBits, NULL, st->L_frame, hTcxEnc->L_frameTCX, last_element_mode, vad_hover_flag );
305 :
306 469572 : bitsAvailable = st->bits_frame_core - nbits_header;
307 469572 : if ( st->igf )
308 : {
309 423839 : bitsAvailable -= st->hIGFEnc->infoTotalBitsWritten;
310 : }
311 :
312 : /*--------------------------------------------------------------*
313 : * LPC Envelope Quantization and FDNS
314 : *---------------------------------------------------------------*/
315 :
316 469572 : if ( !st->enableTcxLpc )
317 : {
318 433294 : if ( st->envWeighted )
319 : {
320 : /* Unweight the envelope */
321 0 : E_LPC_lsp_unweight( st->lsp_old, st->lsp_old, st->lsf_old, 1.0f / st->gamma );
322 0 : st->envWeighted = 0;
323 : }
324 :
325 433294 : lpc_quantization( st, lsp_new, lsp_mid, lsp_q, lsf_q, lspmid_q, AUDIO, 0, /*No acelp->no need to compute any mid-LPC*/
326 : param_lpc, nbits_lpc, bits_param_lpc, &no_param_lpc );
327 :
328 : /*--------------------------------------------------------------*
329 : * Rate switching
330 : *--------------------------------------------------------------*/
331 :
332 433294 : if ( st->rate_switching_reset )
333 : {
334 2010 : mvr2r( lsp_q, st->lsp_old, M );
335 2010 : mvr2r( lsf_q, st->lsf_old, M );
336 : }
337 : }
338 :
339 469572 : st->acelp_cfg.midLpc = 0;
340 469572 : last_core_orig = st->last_core;
341 942723 : for ( n = 0; n < n_subframes; n++ )
342 : {
343 : /* Get the envelope */
344 473151 : if ( st->enableTcxLpc )
345 : {
346 36278 : tcx_lpc_cdk = tcxlpc_get_cdk( st->hTcxCfg->coder_type );
347 :
348 : /* Get the envelope corresponding to the current frame */
349 36278 : E_LPC_int_lpc_tcx( st->lspold_enc, lsp_new, A_q );
350 :
351 : /* Weight the envelope */
352 36278 : weight_a( A_q, A_q, st->gamma, M );
353 :
354 : /* Convert to lsp and lsf */
355 36278 : a2lsp_stab( A_q, lsp, lsp_new );
356 36278 : lsp2lsf( lsp, lsf, M, INT_FS_12k8 );
357 :
358 : /* Quantize */
359 36278 : Q_lsf_tcxlpc( lsf, lsf_tcx_q, lspq_ind, param_lpc, st->narrowBand, tcx_lpc_cdk, st->mem_MA, st->hTcxCfg->coder_type, st->Bin_E );
360 :
361 : /* Account for consumed bits */
362 36278 : nbits_lpc[0] = TCXLPC_NUMBITS;
363 36278 : if ( param_lpc[0] )
364 : {
365 6399 : nbits_lpc[0] += TCXLPC_IND_NUMBITS;
366 : }
367 :
368 : /* Convert quantized lsf to lsp and A */
369 36278 : lsf2lsp( lsf_tcx_q, lsp_tcx_q, M, INT_FS_12k8 );
370 36278 : lsp2a_stab( lsp_tcx_q, A_q, M );
371 : }
372 436873 : else if ( !st->tcxonly )
373 : {
374 228836 : E_LPC_int_lpc_tcx( st->lsp_old, lsp_q, A_q );
375 : }
376 208037 : else if ( n + 2 == n_subframes ) /* First TCX10/5 subframe */
377 : {
378 3579 : lsp2a_stab( lspmid_q, A_q, M );
379 : }
380 : else
381 : {
382 204458 : lsp2a_stab( lsp_q, A_q, M );
383 : }
384 :
385 473151 : if ( hTcxEnc->tcx_lpc_shaped_ari )
386 : {
387 36278 : basop_E_LPC_f_lsp_a_conversion( lspq_ind, A_q_ind, M );
388 : }
389 :
390 473151 : bitsAvailable -= nbits_lpc[n];
391 :
392 : /* Shape spectrum */
393 473151 : ShapeSpectrum( st->hTcxCfg, A_q, gainlpc[n], st->L_frame / n_subframes, st->hTcxCfg->tcx_coded_lines / n_subframes, hTcxEnc->spectrum[n], hTcxEnc->fUseTns[n], st, NULL );
394 :
395 473151 : st->last_core = st->core;
396 : }
397 :
398 469572 : st->last_core = last_core_orig;
399 :
400 : /*--------------------------------------------------------------------------------*
401 : * Write LPC parameters
402 : *--------------------------------------------------------------------------------*/
403 :
404 469572 : writeLPCparam( st, hBstr, param_lpc, bits_param_lpc, no_param_lpc, &total_nbbits );
405 :
406 469572 : assert( total_nbbits == ( nbits_lpc[0] + nbits_lpc[1] ) );
407 : #ifdef DEBUG_MODE_TCX
408 : fprintf( pF, "\t TCX LPC: %d bits\n", total_nbbits );
409 : #endif
410 :
411 : /*--------------------------------------------------------------*
412 : * Run TCX10/20 Core
413 : *---------------------------------------------------------------*/
414 :
415 469572 : hTcxEnc->measuredBwRatio = 1.f;
416 :
417 942723 : for ( n = 0; n < n_subframes; n++ )
418 : {
419 473151 : target_bits[n] = ( bitsAvailable + ( n_subframes - 1 ) - n ) / n_subframes - tnsBits[n];
420 :
421 473151 : if ( st->enablePlcWaveadjust && ( n == n_subframes - 1 ) )
422 : {
423 0 : target_bits[n] -= 1;
424 : }
425 473151 : if ( n == 0 )
426 : {
427 469572 : target_bits[n] -= ltpBits;
428 : }
429 :
430 : /* Run TCX20/10 encoder */
431 473151 : QuantizeSpectrum( st, A_q, A_q_ind, gainlpc[n], st->synth + n * st->L_frame / n_subframes, target_bits[n], tnsSize[n], param_core + n * NPRM_DIV, n, &hm_cfg[n], vad_hover_flag );
432 : }
433 :
434 : /* Update tcx overlap mode */
435 469572 : st->hTcxCfg->tcx_last_overlap_mode = st->hTcxCfg->tcx_curr_overlap_mode;
436 :
437 469572 : coder_tcx_post( st, A_q, Aw );
438 :
439 469572 : if ( st->enableTcxLpc )
440 : {
441 36278 : E_LPC_lsp_unweight( lsp_tcx_q, lsp_q, lsf_q, 1.0f / st->gamma ); /* Update lsf_q for encoderSideLossSimulation() */
442 : }
443 :
444 : /* Update lsp/lsf memory */
445 469572 : if ( st->enableTcxLpc && st->core != ACELP_CORE )
446 : {
447 : /* Update lsf / lsp memory */
448 36278 : mvr2r( lsf_tcx_q, st->lsf_old, M );
449 36278 : mvr2r( lsp_tcx_q, st->lsp_old, M );
450 36278 : st->envWeighted = 1;
451 :
452 : /* Update ACELP quantizer state */
453 36278 : lsf_update_memory( st->narrowBand, st->lsf_old, st->mem_MA, st->mem_MA );
454 36278 : st->pstreaklen = 0;
455 36278 : st->streaklimit = 1.0f;
456 : /* check resonance for pitch clipping algorithm */
457 36278 : gp_clip_test_lsf( st->element_mode, st->core_brate, st->lsf_old, st->clip_var, 0 );
458 36278 : mvr2r( st->lsf_old, st->mem_AR, M );
459 : }
460 : else
461 : {
462 433294 : mvr2r( lsf_q, st->lsf_old, M );
463 433294 : mvr2r( lsp_q, st->lsp_old, M );
464 : }
465 :
466 469572 : if ( st->Opt_DTX_ON && !st->tcxonly && st->hTdCngEnc != NULL )
467 : {
468 : /* update CNG parameters in active frames */
469 13291 : if ( st->bwidth == NB && st->enableTcxLpc && st->core != ACELP_CORE )
470 0 : {
471 : float buf[L_LP], res[L_FRAME], A[M + 1], r[M + 1], tmp, lsptmp[M];
472 :
473 0 : assert( st->L_frame == L_FRAME );
474 :
475 0 : mvr2r( st->synth + L_FRAME - L_LP, buf, L_LP );
476 0 : tmp = st->synth[L_FRAME - L_LP - 1];
477 0 : preemph( buf, st->preemph_fac, L_LP, &tmp );
478 0 : autocorr( buf, r, M, L_LP, LP_assym_window, 0, 0, 0 );
479 0 : lag_wind( r, M, INT_FS_12k8, LAGW_WEAK );
480 0 : lev_dur( A, r, M, NULL );
481 0 : a2lsp_stab( A, lsptmp, lsp_new );
482 :
483 0 : residu( A, M, buf + L_LP - L_FRAME, res, L_FRAME );
484 :
485 0 : cng_params_upd( lsptmp, res, st->L_frame, &st->hTdCngEnc->ho_circ_ptr, st->hTdCngEnc->ho_ener_circ, &st->hTdCngEnc->ho_circ_size, st->hTdCngEnc->ho_lsp_circ, ENC, st->hTdCngEnc->ho_env_circ, &st->hTdCngEnc->cng_buf_cnt, st->hTdCngEnc->cng_exc2_buf, st->hTdCngEnc->cng_brate_buf, st->hDtxEnc->last_active_brate, st->element_mode, st->hFdCngEnc->hFdCngCom->CngBandwidth );
486 : }
487 : else
488 : {
489 13291 : cng_params_upd( lsp_new, st->hLPDmem->old_exc + L_EXC_MEM - st->L_frame, st->L_frame, &st->hTdCngEnc->ho_circ_ptr, st->hTdCngEnc->ho_ener_circ, &st->hTdCngEnc->ho_circ_size, st->hTdCngEnc->ho_lsp_circ, ENC, st->hTdCngEnc->ho_env_circ, &st->hTdCngEnc->cng_buf_cnt, st->hTdCngEnc->cng_exc2_buf, st->hTdCngEnc->cng_brate_buf, st->hDtxEnc->last_active_brate, st->element_mode, st->hFdCngEnc->hFdCngCom->CngBandwidth );
490 : }
491 :
492 13291 : if ( st->L_frame == L_FRAME )
493 : {
494 : /* store LSPs@16k, potentially to be used in CNG@16k */
495 4184 : mvr2r( st->lsp_old16k, &( st->hTdCngEnc->ho_lsp_circ2[( st->hTdCngEnc->ho_circ_ptr ) * M] ), M );
496 : }
497 :
498 : /* Set 16k LSP flag for CNG buffer */
499 13291 : st->hTdCngEnc->ho_16k_lsp[st->hTdCngEnc->ho_circ_ptr] = ( st->L_frame == L_FRAME ? 0 : 1 );
500 :
501 : /* efficient DTX hangover control */
502 13291 : if ( st->hTdCngEnc->burst_ho_cnt > 1 )
503 : {
504 728 : dtx_hangover_control( st, lsp_new );
505 : }
506 : }
507 :
508 : #ifdef DEBUG_MODE_TCX
509 : {
510 : int16_t tmp[L_FRAME48k];
511 :
512 : for ( i = 0; i < st->L_frame; i++ )
513 : {
514 : tmp[i] = (int16_t) ( st->synth[i] + 0.5f );
515 : }
516 : dbgwrite( tmp, sizeof( int16_t ), st->L_frame, 1, "./res/stereo_tcx_enc_synth.pcm" );
517 :
518 : for ( i = 0; i < st->L_frame; i++ )
519 : {
520 : tmp[i] = (int16_t) ( st->speech_enc[i] + 0.5f );
521 : }
522 : dbgwrite( tmp, sizeof( int16_t ), st->L_frame, 1, "./res/stereo_tcx_enc_inLB.pcm" );
523 :
524 : for ( i = 0; i < hTcxEnc->L_frameTCX; i++ )
525 : {
526 : tmp[i] = (int16_t) ( st->hTcxEnc->speech_TCX[i] + 0.5f );
527 : }
528 : dbgwrite( tmp, sizeof( int16_t ), hTcxEnc->L_frameTCX, 1, "./res/stereo_tcx_enc_inFB.pcm" );
529 : }
530 : #endif
531 :
532 : /*--------------------------------------------------------------------------------*
533 : * Encode TCX20/10 parameters
534 : *--------------------------------------------------------------------------------*/
535 :
536 469572 : writeTCXparam( st, hBstr, hm_cfg, param_core, nbits_header, nbits_start, nbits_lpc[0] + nbits_lpc[1], NULL, NULL, NULL, -1 );
537 :
538 469572 : total_nbbits = hBstr->nb_bits_tot - nbits_start;
539 :
540 : #ifdef DEBUG_MODE_TCX
541 : {
542 : static FILE *sP = NULL;
543 :
544 : if ( sP == NULL )
545 : sP = fopen( "./res/stereo_tcx_core_enc_swicthes.txt", "w" );
546 :
547 : fprintf( sP, "frame:%d\t mdct_sw=%d\t rf_mode=%d tcxonly=%d\t tcxMode=%d\t core=%d\t, enableTcxLpc=%d\t igf=%d\t envWeighted=%d\t lpcQuantization=%d\t enablePlcWaveadjust=%d\t tcxltp=%d\t fIsTNSAllowed=%d\t tcx_lpc_shaped_ari=%d\t ctx_hm=%d\t \n", frame, st->mdct_sw, st->rf_mode, st->tcxonly, hTcxEnc->tcxMode, st->core, st->enableTcxLpc, st->igf, st->envWeighted, st->lpcQuantization, st->enablePlcWaveadjust, hTcxEnc->tcxltp, st->hTcxCfg->fIsTNSAllowed, hTcxEnc->tcx_lpc_shaped_ari, st->hTcxCfg->ctx_hm );
548 : }
549 : #endif
550 :
551 469572 : if ( param_core[1 + NOISE_FILL_RANGES] != 0 )
552 : {
553 219056 : set_f( pitch_buf, hTcxEnc->tcxltp_pitch_int + (float) hTcxEnc->tcxltp_pitch_fr / (float) st->pit_res_max, NB_SUBFR16k );
554 : }
555 : else
556 : {
557 250516 : set_f( pitch_buf, L_SUBFR, NB_SUBFR16k );
558 : }
559 :
560 469572 : pop_wmops();
561 469572 : return;
562 : }
563 :
564 :
565 : /*-------------------------------------------------------------------*
566 : * ivas_acelp_tcx20_switching()
567 : *
568 : * Open-loop ACELP/TCX20 core decision
569 : *-------------------------------------------------------------------*/
570 :
571 : /*! r: S/M decision (0=speech or noise,1=unclear,2=music) */
572 632675 : int16_t ivas_acelp_tcx20_switching(
573 : Encoder_State *st, /* i/o: encoder state structure */
574 : const float *inp, /* i : new input signal */
575 : const float *wsp, /* i : input weighted signal */
576 : const float non_staX, /* i : unbound non-stationarity for sp/mu clas.*/
577 : const float *pitch_fr, /* i : fraction pitch values */
578 : const float *voicing_fr, /* i : fractional voicing values */
579 : const float currFlatness, /* i : flatness */
580 : const float lsp_mid[M], /* i : LSPs at the middle of the frame */
581 : const float stab_fac, /* i : LP filter stability */
582 : float *res_cod_SNR_M,
583 : const int16_t flag_16k_smc /* i : flag to compute parameters with 16kHz core */
584 : )
585 : {
586 632675 : TCX_ENC_HANDLE hTcxEnc = st->hTcxEnc;
587 : int16_t i, j;
588 : float A_q_tcx[NB_SUBFR16k * ( M + 1 )];
589 : float dsnr, snr_tcx, snr_acelp;
590 : int16_t iter;
591 : float xn_buf[L_MDCT_OVLP_MAX + L_FRAME_PLUS + L_MDCT_OVLP_MAX];
592 : float window[L_LOOK_16k];
593 : float Ap[M + 1];
594 : float gainlpc[FDNS_NPTS];
595 : float en[N_MAX / 4];
596 : float sqGain, ener, tmp, fac, offset;
597 : int16_t L_frame, L_frame_tmp, L_loop;
598 : int16_t overlap;
599 : int16_t tcx_offset;
600 : float *x;
601 : float target;
602 : int16_t T0;
603 : float gain, noise, scale;
604 : float *pt_ener_sfr, ener_sfr[NB_SUBFR16k];
605 : float pitch_fr_local[4], voicing_fr_local[4];
606 : int16_t smc_dec_ol;
607 : float y[N_MAX];
608 :
609 : /* Initialization */
610 632675 : L_frame = flag_16k_smc ? st->L_frame : L_FRAME;
611 632675 : L_frame_tmp = L_frame;
612 :
613 632675 : x = hTcxEnc->spectrum_long;
614 :
615 : /* Check minimum pitch for quantization */
616 3163375 : for ( i = 0; i < 4; i++ )
617 : {
618 2530700 : pitch_fr_local[i] = pitch_fr[i];
619 2530700 : voicing_fr_local[i] = voicing_fr[i];
620 : }
621 :
622 632675 : lsp2a_stab( lsp_mid, A_q_tcx, M );
623 :
624 : /*--------------------------------------------------------------*
625 : * Estimate TCX SNR
626 : *---------------------------------------------------------------*/
627 :
628 632675 : target = 850.f;
629 632675 : if ( flag_16k_smc )
630 : {
631 31189 : tcx_offset = st->hTcxCfg->tcx_offset;
632 :
633 31189 : if ( st->last_core == ACELP_CORE )
634 : {
635 19598 : L_frame += tcx_offset;
636 :
637 19598 : if ( st->hTcxCfg->lfacNext < 0 )
638 : {
639 19598 : L_frame -= st->hTcxCfg->lfacNext;
640 19598 : tcx_offset = st->hTcxCfg->lfacNext;
641 : }
642 : else
643 : {
644 0 : tcx_offset = 0;
645 : }
646 : }
647 :
648 31189 : overlap = st->hTcxCfg->tcx_mdct_window_delay;
649 31189 : mvr2r( st->hTcxCfg->tcx_mdct_window, window, L_LOOK_16k );
650 : }
651 : else
652 : {
653 601486 : overlap = L_LOOK_12k8;
654 601486 : tcx_offset = ( overlap >> 1 );
655 601486 : mdct_window_sine( window, INT_FS_12k8, overlap, FULL_OVERLAP, st->element_mode );
656 :
657 601486 : if ( st->last_core == ACELP_CORE )
658 : {
659 165577 : L_frame += L_frame / 4;
660 165577 : tcx_offset -= L_frame / 4;
661 : }
662 : }
663 632675 : mvr2r( inp - ( overlap >> 1 ) + tcx_offset, xn_buf, L_frame + overlap );
664 :
665 632675 : if ( st->last_core == ACELP_CORE )
666 : {
667 185175 : if ( ( tcx_offset < 0 && flag_16k_smc ) || !flag_16k_smc )
668 : {
669 185175 : set_f( xn_buf, 0.0f, overlap >> 1 );
670 : }
671 : }
672 : else
673 : {
674 50892048 : for ( i = 0; i < overlap; i++ )
675 : {
676 50444548 : xn_buf[i] *= window[i];
677 : }
678 : }
679 :
680 72365567 : for ( i = 0; i < overlap; i++ )
681 : {
682 71732892 : xn_buf[L_frame + i] *= window[overlap - 1 - i];
683 : }
684 :
685 632675 : TCX_MDCT( xn_buf, x, overlap, L_frame - overlap, overlap, st->element_mode );
686 :
687 176758339 : for ( i = 0; i < L_frame; i++ )
688 : {
689 176125664 : x[i] *= (float) L_frame * inv_sqrt( 2 * NORM_MDCT_FACTOR );
690 176125664 : y[i] = x[i];
691 : }
692 :
693 632675 : weight_a( A_q_tcx, Ap, GAMMA1, M );
694 :
695 632675 : lpc2mdct( Ap, M, gainlpc, FDNS_NPTS, 0 );
696 :
697 632675 : mdct_preShaping( x, L_frame, gainlpc );
698 :
699 44664091 : for ( i = 0; i < L_frame; i += 4 )
700 : {
701 44031416 : ener = 0.01f + x[i] * x[i] + x[i + 1] * x[i + 1] + x[i + 2] * x[i + 2] + x[i + 3] * x[i + 3];
702 44031416 : en[i / 4] = 9.0f + 10.0f * log10f( ener );
703 : }
704 :
705 632675 : fac = 128.0f;
706 632675 : offset = fac;
707 :
708 6959425 : for ( iter = 0; iter < 10; iter++ )
709 : {
710 6326750 : fac *= 0.5f;
711 6326750 : offset -= fac;
712 6326750 : ener = 0.0f;
713 :
714 405047807 : for ( i = 0; i < L_frame / 4; i++ )
715 : {
716 401787598 : tmp = en[i] - offset;
717 :
718 401787598 : if ( tmp > 3.0f )
719 : {
720 330025616 : ener += tmp;
721 : }
722 :
723 401787598 : if ( ener > target )
724 : {
725 3066541 : offset += fac;
726 3066541 : break;
727 : }
728 : }
729 : }
730 :
731 632675 : if ( offset <= 32.f )
732 : {
733 47404 : offset = -128.f;
734 : }
735 :
736 632675 : sqGain = powf( 10.0f, offset / 20.0f );
737 632675 : ener = sqGain * sqGain / 12.f * sqrtf( 2.f ) / (float) L_frame;
738 :
739 632675 : if ( !flag_16k_smc )
740 : {
741 : const int16_t *bands;
742 601486 : const int16_t bands_20[8] = { 0, 5, 9, 19, 34, 51, 81, 111 };
743 601486 : const int16_t bands_25[8] = { 0, 4, 7, 15, 28, 40, 65, 89 };
744 : float nrg_s, nrg_n;
745 :
746 : /*Approximate SNR of TCX*/
747 601486 : set_f( x, sqrtf( ener ), L_frame );
748 601486 : mdct_noiseShaping( x, L_frame, gainlpc, FDNS_NPTS );
749 :
750 601486 : if ( st->last_core != ACELP_CORE )
751 : {
752 : /*25Hz resolution*/
753 435909 : bands = bands_25;
754 : }
755 : else
756 : {
757 : /*20Hz resolution*/
758 165577 : bands = bands_20;
759 : }
760 :
761 4811888 : for ( iter = 0; iter < 7; iter++ )
762 : {
763 4210402 : nrg_s = 1e-6f;
764 4210402 : nrg_n = 1e-6f;
765 61385350 : for ( i = bands[iter]; i < bands[iter + 1]; i++ )
766 : {
767 57174948 : nrg_s += y[i] * y[i];
768 57174948 : nrg_n += x[i] * x[i];
769 : }
770 4210402 : res_cod_SNR_M[iter] = nrg_s / nrg_n;
771 : }
772 : }
773 :
774 632675 : snr_tcx = 0.0f;
775 632675 : pt_ener_sfr = ener_sfr;
776 :
777 632675 : L_loop = flag_16k_smc ? L_frame_tmp : L_frame;
778 :
779 3360141 : for ( i = 0; i < L_loop; i += L_SUBFR )
780 : {
781 2727466 : *pt_ener_sfr = sum2_f( wsp + i, L_SUBFR ) + 1e-6f;
782 2727466 : snr_tcx += log10f( *pt_ener_sfr / ( ener * L_SUBFR ) );
783 2727466 : pt_ener_sfr++;
784 : }
785 632675 : snr_tcx *= ( (float) ( 10 * L_SUBFR ) ) / (float) L_loop;
786 :
787 :
788 : /*--------------------------------------------------------------*
789 : * Estimate ACELP SNR
790 : *---------------------------------------------------------------*/
791 :
792 632675 : if ( flag_16k_smc )
793 : {
794 31189 : scale = 0.092f;
795 : }
796 : else
797 : {
798 601486 : scale = 0.059f;
799 : }
800 :
801 632675 : snr_acelp = 0.0f;
802 632675 : fac = flag_16k_smc ? (float) st->sr_core / (float) INT_FS_12k8 : 1.0f;
803 632675 : L_loop = flag_16k_smc ? L_frame_tmp : L_FRAME;
804 :
805 632675 : pt_ener_sfr = ener_sfr;
806 3194564 : for ( i = 0; i < L_loop; i += L_SUBFR )
807 : {
808 2561889 : T0 = (int16_t) ( ( fac * pitch_fr_local[(int16_t) ( (float) ( i / L_SUBFR ) / fac + 0.5f )] ) + 0.5f );
809 2561889 : gain = get_gain( wsp + i, wsp + i - T0, L_SUBFR, NULL );
810 2561889 : noise = 1e-6f;
811 166522785 : for ( j = 0; j < L_SUBFR; j++ )
812 : {
813 163960896 : tmp = wsp[i + j] - gain * wsp[i + j - T0];
814 163960896 : noise += tmp * tmp;
815 : }
816 :
817 2561889 : noise *= scale;
818 2561889 : snr_acelp += log10f( *pt_ener_sfr / noise );
819 2561889 : pt_ener_sfr++;
820 : }
821 :
822 632675 : snr_acelp *= ( (float) ( 10 * L_SUBFR ) ) / (float) L_loop;
823 :
824 :
825 : /*--------------------------------------------------------------*
826 : * Switching Decision
827 : *---------------------------------------------------------------*/
828 :
829 632675 : dsnr = 0.0f;
830 : /* hysteresis for very small SNR differences between ACELP and TCX */
831 :
832 : /* try to use TCX instead of ACELP on temporally stationary frames */
833 632675 : if ( ( snr_acelp > snr_tcx ) && ( snr_acelp < snr_tcx + 2.0f ) &&
834 93137 : ( st->prevTempFlatness + currFlatness < 3.25f || stab_fac == 1.0f ||
835 27149 : ( !flag_16k_smc && st->sp_aud_decision0 > 0 && st->prevTempFlatness + currFlatness < 20.f ) ) &&
836 68777 : ( st->Nb_ACELP_frames <= 6 ) )
837 : {
838 30470 : dsnr = -2.0f;
839 : }
840 :
841 : /* try to use ACELP instead of TCX on transient and "buzzy" frames */
842 632675 : if ( ( snr_acelp < snr_tcx ) &&
843 437290 : ( snr_acelp > snr_tcx - 2.0f ) &&
844 119133 : ( st->prevTempFlatness + currFlatness > 3.25f ) &&
845 41914 : ( st->Nb_ACELP_frames >= 6 ) )
846 : {
847 3557 : dsnr = 2.0f;
848 : }
849 632675 : if ( ( !flag_16k_smc ) && ( offset < 74.0f ) && ( non_staX > 5.0f ) && ( snr_acelp >= snr_tcx - 4 ) && st->Nb_ACELP_frames >= 1 && ( ( ( st->hSpMusClas->lps > st->hSpMusClas->lpm ) && mean( voicing_fr_local, 4 ) >= 0.3f ) || ( st->Nb_ACELP_frames >= 6 && ( st->hSpMusClas->lps > st->hSpMusClas->lpm - 1.5f ) ) ) && ( st->sp_aud_decision0 == 0 ) && st->vad_flag )
850 : {
851 : /* Fine tuned across various databases based on various metrics to detect TCX frames in speech.*/
852 61030 : dsnr = 4.0f;
853 : }
854 :
855 632675 : if ( st->flag_noisy_speech_snr )
856 : {
857 :
858 155075 : if ( st->vad_flag || st->Opt_DTX_ON )
859 : {
860 120471 : dsnr += 2.f;
861 : }
862 : else
863 : {
864 34604 : dsnr -= 2.f;
865 : }
866 : }
867 :
868 : /* Select ACELP or TCX */
869 632675 : if ( ( snr_acelp + dsnr > snr_tcx ) && ( st->sp_aud_decision0 == 0 || st->prevTempFlatness + currFlatness > 3.25f ) )
870 : {
871 187177 : smc_dec_ol = 0;
872 : }
873 : else
874 : {
875 445498 : smc_dec_ol = 2;
876 : }
877 :
878 : #ifdef DEBUGGING
879 : if ( st->force == FORCE_SPEECH )
880 : {
881 : /* enforce ACELP */
882 : smc_dec_ol = 0;
883 : }
884 : else if ( st->force == FORCE_MUSIC )
885 : {
886 : /* enforce TCX */
887 : smc_dec_ol = 2;
888 : }
889 : #endif
890 :
891 632675 : st->prevTempFlatness = currFlatness;
892 :
893 632675 : return smc_dec_ol;
894 : }
|