Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <stdint.h>
34 : #include <math.h>
35 : #include "options.h"
36 : #include "ivas_cnst.h"
37 : #include "prot.h"
38 : #include "ivas_prot.h"
39 : #ifdef DEBUGGING
40 : #include "debug.h"
41 : #endif
42 : #include "wmc_auto.h"
43 :
44 :
45 : /*-----------------------------------------------------------------------*
46 : * Local constants
47 : *-----------------------------------------------------------------------*/
48 :
49 : #define MD_MAX_DIFF_AZIMUTH 10
50 : #define MD_MAX_DIFF_ELEVATION 10
51 :
52 :
53 : /*-------------------------------------------------------------------*
54 : * ivas_ism_dtx_open()
55 : *
56 : * Open ISM DTX handle
57 : *-------------------------------------------------------------------*/
58 :
59 14 : ivas_error ivas_ism_dtx_open(
60 : Encoder_Struct *st_ivas /* i/o: IVAS encoder structure */
61 : )
62 : {
63 : ivas_error error;
64 : ISM_DTX_HANDLE hISMDTX;
65 : int16_t i;
66 :
67 14 : error = IVAS_ERR_OK;
68 :
69 : /* Assign memory to DirAC handle */
70 14 : if ( ( hISMDTX = (ISM_DTX_HANDLE) malloc( sizeof( ISM_DTX_DATA ) ) ) == NULL )
71 : {
72 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for ISM DTX Handle \n" ) );
73 : }
74 :
75 14 : hISMDTX->dtx_flag = 0;
76 14 : hISMDTX->sce_id_dtx = 0;
77 14 : hISMDTX->cnt_SID_ISM = -1;
78 :
79 70 : for ( i = 0; i < MAX_NUM_OBJECTS; i++ )
80 : {
81 56 : set_f( hISMDTX->long_term_energy_stereo_dmx_enc[i], 0.0f, PARAM_ISM_HYS_BUF_SIZE );
82 : }
83 :
84 14 : set_f( hISMDTX->coh, 0.0f, MAX_NUM_OBJECTS );
85 :
86 14 : st_ivas->hISMDTX = hISMDTX;
87 :
88 14 : return error;
89 : }
90 :
91 :
92 : /*-------------------------------------------------------------------*
93 : * ivas_ism_get_dtx_enc()
94 : *
95 : * Analysis and decision about DTX in ISM format
96 : *-------------------------------------------------------------------*/
97 :
98 : /*! r: indication of DTX frame */
99 14826 : int16_t ivas_ism_dtx_enc(
100 : ISM_DTX_HANDLE hISMDTX, /* i/o: ISM DTX handle */
101 : SCE_ENC_HANDLE hSCE[MAX_SCE], /* i/o: SCE encoder structure */
102 : const int32_t ivas_total_brate, /* i : IVAS total bitrate */
103 : const int16_t nchan_ism, /* i : number of objects */
104 : const int16_t nchan_transport, /* i : number of transport channels */
105 : int16_t vad_flag[MAX_NUM_OBJECTS], /* i : VAD flag */
106 : ISM_METADATA_HANDLE hIsmMeta[], /* i/o: ISM metadata handles */
107 : int16_t md_diff_flag[], /* o : metadata differential flag */
108 : int16_t *sid_flag /* o : indication of SID frame */
109 : )
110 : {
111 : int16_t ch, dtx_flag;
112 : int16_t nBits, nBits_MD_max;
113 : int16_t nBits_azimuth, nBits_elevation, nBits_coh, nBits_sce_id;
114 : float lp_noise[MAX_NUM_OBJECTS], lp_noise_variation, lp_noise_mean;
115 : float lp_noise_max;
116 : float tmp1, tmp2;
117 :
118 : /* initialization */
119 52942 : for ( ch = 0; ch < nchan_transport; ch++ )
120 : {
121 38116 : hSCE[ch]->hCoreCoder[0]->low_rate_mode = 0;
122 : }
123 :
124 : /*------------------------------------------------------------------*
125 : * compute global ISM DTX flag
126 : *-----------------------------------------------------------------*/
127 :
128 : /* compute global ISM based on localVAD */
129 14826 : dtx_flag = 1;
130 52942 : for ( ch = 0; ch < nchan_transport; ch++ )
131 : {
132 38116 : dtx_flag &= !vad_flag[ch];
133 : }
134 :
135 : /* compute global ISM based on long-term background noise */
136 : /* one of the channels is active -> no DTX */
137 52942 : for ( ch = 0; ch < nchan_transport; ch++ )
138 : {
139 38116 : lp_noise[ch] = hSCE[ch]->hCoreCoder[0]->lp_noise;
140 : }
141 :
142 14826 : lp_noise_variation = var( lp_noise, nchan_transport );
143 14826 : lp_noise_mean = mean( lp_noise, nchan_transport );
144 :
145 14826 : if ( lp_noise_mean > 50 || ( lp_noise_mean > 25 && lp_noise_variation > 32 ) )
146 : {
147 134 : dtx_flag = 0;
148 : }
149 :
150 :
151 : /* default DTX is applied at lower bitrates; otherwise DTX is applied only in silence */
152 14826 : maximum( lp_noise, nchan_transport, &lp_noise_max );
153 :
154 14826 : if ( !( ( nchan_ism == 1 && ivas_total_brate <= IVAS_24k4 ) ||
155 13476 : ( nchan_ism == 2 && ivas_total_brate <= IVAS_48k ) ||
156 11276 : ( nchan_ism == 3 && ivas_total_brate <= IVAS_80k ) ||
157 7000 : ( nchan_ism == 4 && ivas_total_brate <= IVAS_96k ) ||
158 4916 : lp_noise_max < DTX_THR ) )
159 : {
160 3960 : dtx_flag = 0;
161 : }
162 :
163 : /*------------------------------------------------------------------*
164 : * Reset the bitstream
165 : *-----------------------------------------------------------------*/
166 :
167 14826 : if ( dtx_flag )
168 : {
169 : /* reset the bitstream (IVAS format signaling was already written) */
170 2483 : reset_indices_enc( hSCE[0]->hCoreCoder[0]->hBstr, hSCE[0]->hCoreCoder[0]->hBstr->nb_ind_tot );
171 : }
172 :
173 : /*------------------------------------------------------------------*
174 : * decide about SID metadata to be sent or not (per object)
175 : * estimate the MD bit-budget consumption
176 : *-----------------------------------------------------------------*/
177 :
178 14826 : if ( dtx_flag )
179 : {
180 2483 : ivas_get_ism_sid_quan_bitbudget( nchan_ism, &nBits_azimuth, &nBits_elevation, &tmp1, &tmp2, &nBits_coh, &nBits_sce_id );
181 :
182 2483 : nBits = 0;
183 9772 : for ( ch = 0; ch < nchan_ism; ch++ )
184 : {
185 : /* check difference between current and last metadata */
186 7289 : md_diff_flag[ch] = 0;
187 7289 : if ( fabsf( hIsmMeta[ch]->azimuth - hIsmMeta[ch]->last_azimuth ) > MD_MAX_DIFF_AZIMUTH )
188 : {
189 3575 : md_diff_flag[ch] = 1;
190 : }
191 :
192 7289 : if ( fabsf( hIsmMeta[ch]->elevation - hIsmMeta[ch]->last_elevation ) > MD_MAX_DIFF_ELEVATION )
193 : {
194 1634 : md_diff_flag[ch] = 1;
195 : }
196 :
197 : /* estimate SID metadata bit-budget */
198 7289 : nBits++; /* number of objects */
199 7289 : nBits++; /* SID metadata flag */
200 7289 : if ( md_diff_flag[ch] == 1 )
201 : {
202 3958 : nBits += nBits_azimuth;
203 3958 : nBits += nBits_elevation;
204 : }
205 : }
206 :
207 : /* calculate maximum available MD bit-budget */
208 2483 : nBits_MD_max = ( IVAS_SID_5k2 - SID_2k40 ) / FRAMES_PER_SEC;
209 2483 : nBits_MD_max -= SID_FORMAT_NBITS;
210 2483 : if ( nchan_transport > 1 )
211 : {
212 2116 : nBits_MD_max -= nBits_sce_id;
213 : }
214 :
215 6455 : for ( ch = 0; ch < nchan_transport - 1; ch++ )
216 : {
217 3972 : nBits_MD_max -= nBits_coh; /* coherence */
218 : }
219 :
220 2483 : if ( nchan_ism > 3 )
221 : {
222 1345 : nBits_MD_max--; /* ism_mode flag */
223 : }
224 :
225 : /* too many metadata bits -> switch to active coding */
226 2483 : if ( nBits > nBits_MD_max )
227 : {
228 330 : dtx_flag = 0;
229 : }
230 : }
231 :
232 : /*------------------------------------------------------------------*
233 : * set core_brate for all channels
234 : * get 'sid_flag' value
235 : *-----------------------------------------------------------------*/
236 :
237 14826 : *sid_flag = 0;
238 :
239 14826 : if ( !dtx_flag )
240 : {
241 : /* at least one of the channels is active -> no DTX */
242 45506 : for ( ch = 0; ch < nchan_transport; ch++ )
243 : {
244 32833 : hSCE[ch]->hCoreCoder[0]->core_brate = -1;
245 32833 : set_bw( IVAS_SCE, hSCE[ch]->element_brate, hSCE[ch]->hCoreCoder[0], MODE1 );
246 : }
247 :
248 12673 : hISMDTX->cnt_SID_ISM = -1;
249 :
250 : /* IVAS format signaling was erased in dtx() */
251 12673 : if ( hSCE[0]->hCoreCoder[0]->hBstr->nb_bits_tot == 0 )
252 : {
253 : /* replicate ivas_write_format() */
254 1883 : int16_t ind = 2;
255 1883 : nBits = IVAS_FORMAT_SIGNALING_NBITS;
256 1883 : if ( ivas_total_brate >= IVAS_24k4 )
257 : {
258 1883 : ind = 4;
259 1883 : nBits = IVAS_FORMAT_SIGNALING_NBITS_EXTENDED;
260 : }
261 :
262 1883 : push_indice( hSCE[0]->hCoreCoder[0]->hBstr, IND_IVAS_FORMAT, ind, nBits );
263 : }
264 : }
265 : else /* ism_dtx_flag == 1 */
266 : {
267 7436 : for ( ch = 0; ch < nchan_transport; ch++ )
268 : {
269 5283 : hSCE[ch]->hCoreCoder[0]->cng_type = FD_CNG;
270 : }
271 :
272 : /* * update the global SID counter */
273 2153 : hISMDTX->cnt_SID_ISM++;
274 2153 : if ( hISMDTX->cnt_SID_ISM >= hSCE[0]->hCoreCoder[0]->hDtxEnc->max_SID )
275 : {
276 : /* adaptive SID update interval */
277 145 : hSCE[0]->hCoreCoder[0]->hDtxEnc->max_SID = hSCE[0]->hCoreCoder[0]->hDtxEnc->interval_SID;
278 145 : hISMDTX->cnt_SID_ISM = 0;
279 : }
280 :
281 : /* encode SID in one channel only */
282 7436 : for ( ch = 0; ch < nchan_transport; ch++ )
283 : {
284 5283 : hSCE[ch]->hCoreCoder[0]->core_brate = FRAME_NO_DATA;
285 : }
286 :
287 2153 : if ( hISMDTX->cnt_SID_ISM == 0 )
288 : {
289 555 : hSCE[hISMDTX->sce_id_dtx]->hCoreCoder[0]->core_brate = SID_2k40;
290 555 : *sid_flag = 1;
291 : }
292 : }
293 :
294 14826 : if ( dtx_flag == 1 && *sid_flag == 0 )
295 : {
296 1598 : set_s( md_diff_flag, 0, nchan_transport );
297 : }
298 :
299 14826 : return dtx_flag;
300 : }
301 :
302 : /*-------------------------------------------------------------------*
303 : * ivas_ism_get_sce_id_dtx()
304 : *
305 : *
306 : *-------------------------------------------------------------------*/
307 :
308 14826 : void ivas_ism_get_sce_id_dtx(
309 : ISM_DTX_HANDLE hISMDTX, /* i/o: ISM DTX handle */
310 : SCE_ENC_HANDLE hSCE[MAX_SCE], /* i/o: SCE encoder structure */
311 : const int16_t nchan_transport, /* i : number of transport channels */
312 : const int16_t input_frame /* i : input frame length per channel */
313 : )
314 : {
315 : float tmp_energy[MAX_NUM_OBJECTS];
316 : int16_t i, j;
317 :
318 14826 : if ( nchan_transport == 1 )
319 : {
320 3026 : hISMDTX->sce_id_dtx = 0;
321 :
322 3026 : return;
323 : }
324 :
325 : /* Initialize*/
326 11800 : set_f( tmp_energy, 0.0f, MAX_NUM_OBJECTS );
327 :
328 : /* compute long term energy parameter */
329 46890 : for ( j = 0; j < nchan_transport; j++ )
330 : {
331 350900 : for ( i = 0; i < ( PARAM_ISM_HYS_BUF_SIZE - 1 ); i++ )
332 : {
333 315810 : hISMDTX->long_term_energy_stereo_dmx_enc[j][i] = hISMDTX->long_term_energy_stereo_dmx_enc[j][i + 1];
334 : }
335 :
336 35090 : hISMDTX->long_term_energy_stereo_dmx_enc[j][PARAM_ISM_HYS_BUF_SIZE - 1] = sum2_f( hSCE[j]->hCoreCoder[0]->input, input_frame );
337 :
338 35090 : tmp_energy[j] = sum_f( hISMDTX->long_term_energy_stereo_dmx_enc[j], PARAM_ISM_HYS_BUF_SIZE );
339 : }
340 :
341 : /* determine the sce_id */
342 11800 : hISMDTX->sce_id_dtx = 0;
343 35090 : for ( j = 1; j < nchan_transport; j++ )
344 : {
345 23290 : if ( tmp_energy[j] > tmp_energy[hISMDTX->sce_id_dtx] )
346 : {
347 12118 : hISMDTX->sce_id_dtx = j;
348 : }
349 : }
350 :
351 11800 : return;
352 : }
353 :
354 :
355 : /*-------------------------------------------------------------------*
356 : * ivas_ism_coh_estim_dtx_enc()
357 : *
358 : *
359 : *-------------------------------------------------------------------*/
360 :
361 555 : void ivas_ism_coh_estim_dtx_enc(
362 : ISM_DTX_HANDLE hISMDTX, /* i/o: ISM DTX handle */
363 : SCE_ENC_HANDLE hSCE[MAX_SCE], /* i/o: SCE encoder structure */
364 : const int16_t nchan_transport, /* i : number of transport channels */
365 : const int16_t input_frame /* i : input frame length */
366 :
367 : )
368 : {
369 : Encoder_State *st, *st_id0;
370 : int16_t sce_id, i;
371 : float acorr_ene[MAX_NUM_OBJECTS], xcorr_ene;
372 :
373 555 : if ( nchan_transport == 1 )
374 : {
375 55 : hISMDTX->coh[0] = 0.f;
376 55 : return;
377 : }
378 :
379 : /* Compute Coherence */
380 500 : acorr_ene[hISMDTX->sce_id_dtx] = 0.0f;
381 500 : st_id0 = hSCE[hISMDTX->sce_id_dtx]->hCoreCoder[0];
382 :
383 480500 : for ( i = 0; i < input_frame; i++ )
384 : {
385 480000 : acorr_ene[hISMDTX->sce_id_dtx] += st_id0->input[i] * st_id0->input[i];
386 : }
387 :
388 2064 : for ( sce_id = 0; sce_id < nchan_transport; sce_id++ )
389 : {
390 1564 : if ( sce_id == hISMDTX->sce_id_dtx )
391 : {
392 500 : hISMDTX->coh[sce_id] = 1.0f;
393 500 : continue;
394 : }
395 :
396 1064 : st = hSCE[sce_id]->hCoreCoder[0];
397 :
398 1064 : acorr_ene[sce_id] = 0.0f;
399 1064 : xcorr_ene = 0.0f;
400 :
401 1022504 : for ( i = 0; i < input_frame; i++ )
402 : {
403 1021440 : acorr_ene[sce_id] += st->input[i] * st->input[i];
404 1021440 : xcorr_ene += st_id0->input[i] * st->input[i];
405 : }
406 :
407 1064 : hISMDTX->coh[sce_id] = fabsf( xcorr_ene ) / ( sqrtf( ( acorr_ene[hISMDTX->sce_id_dtx] * acorr_ene[sce_id] ) + EPSILON ) );
408 :
409 : /* ensure value of coherence is between [0,1] */
410 1064 : hISMDTX->coh[sce_id] = check_bounds( hISMDTX->coh[sce_id], 0.0f, 1.0f );
411 : }
412 :
413 500 : return;
414 : }
|