Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include "options.h"
34 : #include <stdlib.h>
35 : #include "ivas_cnst.h"
36 : #include "ivas_prot_rend.h"
37 : #include "ivas_prot.h"
38 : #include "prot.h"
39 : #include "ivas_stat_rend.h"
40 : #include "ivas_rom_com.h"
41 : #ifdef DEBUGGING
42 : #include "debug.h"
43 : #endif
44 : #include "wmc_auto.h"
45 :
46 :
47 : /*-------------------------------------------------------------------------
48 : * Local function prototypes
49 : *------------------------------------------------------------------------*/
50 :
51 : static void ivas_dirac_param_est_ana( DIRAC_ANA_HANDLE hDirAC, float data_f[][L_FRAME48k], float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS], const int16_t input_frame );
52 :
53 : static void ivas_dirac_dmx( float data_in_f[][L_FRAME48k], const int16_t input_frame, const int16_t nchan_transport );
54 :
55 :
56 : /*--------------------------------------------------------------------------*
57 : * ivas_dirac_ana_open()
58 : *
59 : * Allocate and initialize DIRAC handle
60 : *--------------------------------------------------------------------------*/
61 :
62 1 : ivas_error ivas_dirac_ana_open(
63 : DIRAC_ANA_HANDLE *hDirACPtr, /* i/o: DIRAC data handle pointer */
64 : int32_t input_Fs /* i : Sampling frequency */
65 : )
66 : {
67 : int16_t i, j;
68 : DIRAC_ANA_HANDLE hDirAC;
69 : int16_t numAnalysisChannels;
70 : int16_t maxBin;
71 : ivas_error error;
72 :
73 1 : error = IVAS_ERR_OK;
74 :
75 1 : if ( ( hDirAC = (DIRAC_ANA_HANDLE) malloc( sizeof( DIRAC_ANA_DATA ) ) ) == NULL )
76 : {
77 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for DIRAC\n" ) );
78 : }
79 :
80 1 : numAnalysisChannels = FOA_CHANNELS;
81 :
82 : /* Determine the number of bands */
83 1 : hDirAC->nbands = MASA_FREQUENCY_BANDS;
84 :
85 : /* Determine band grouping */
86 1 : mvs2s( MASA_band_grouping_24, hDirAC->band_grouping, 24 + 1 );
87 :
88 1 : maxBin = (int16_t) ( input_Fs * INV_CLDFB_BANDWIDTH + 0.5f );
89 24 : for ( i = 1; i < hDirAC->nbands + 1; i++ )
90 : {
91 24 : if ( hDirAC->band_grouping[i] >= maxBin )
92 : {
93 1 : hDirAC->band_grouping[i] = maxBin;
94 1 : hDirAC->nbands = i;
95 1 : break;
96 : }
97 : }
98 :
99 : /* Determine block grouping */
100 1 : mvs2s( DirAC_block_grouping, hDirAC->block_grouping, MAX_PARAM_SPATIAL_SUBFRAMES + 1 );
101 :
102 : /* open/initialize CLDFB */
103 1 : hDirAC->num_Cldfb_instances = numAnalysisChannels;
104 5 : for ( i = 0; i < hDirAC->num_Cldfb_instances; i++ )
105 : {
106 4 : if ( ( error = openCldfb( &( hDirAC->cldfbAnaEnc[i] ), CLDFB_ANALYSIS, input_Fs, CLDFB_PROTOTYPE_5_00MS ) ) != IVAS_ERR_OK )
107 : {
108 0 : return error;
109 : }
110 : }
111 :
112 : /* intensity 3-dim */
113 4 : for ( i = 0; i < DIRAC_NUM_DIMS; i++ )
114 : {
115 3 : if ( ( hDirAC->direction_vector_m[i] = (float **) malloc( MAX_PARAM_SPATIAL_SUBFRAMES * sizeof( float * ) ) ) == NULL )
116 : {
117 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) );
118 : }
119 :
120 15 : for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ )
121 : {
122 12 : if ( ( hDirAC->direction_vector_m[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ) ) == NULL )
123 : {
124 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) );
125 : }
126 12 : set_zero( hDirAC->direction_vector_m[i][j], MASA_FREQUENCY_BANDS );
127 : }
128 : }
129 :
130 4 : for ( i = 0; i < DIRAC_NUM_DIMS; i++ )
131 : {
132 99 : for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ )
133 : {
134 96 : if ( ( hDirAC->buffer_intensity_real[i][j] = (float *) malloc( MASA_FREQUENCY_BANDS * sizeof( float ) ) ) == NULL )
135 : {
136 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) );
137 : }
138 96 : set_zero( hDirAC->buffer_intensity_real[i][j], MASA_FREQUENCY_BANDS );
139 : }
140 : }
141 :
142 1 : set_zero( hDirAC->buffer_energy, DIRAC_NO_COL_AVG_DIFF * MASA_FREQUENCY_BANDS );
143 :
144 1 : hDirAC->index_buffer_intensity = 0;
145 :
146 1 : if ( ( hDirAC->hMasaOut = (MASA_DECODER_EXT_OUT_META_HANDLE) malloc( sizeof( MASA_DECODER_EXT_OUT_META ) ) ) == NULL )
147 : {
148 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) );
149 : }
150 :
151 1 : if ( ( hDirAC->sph_grid16 = (SPHERICAL_GRID_DATA *) malloc( sizeof( SPHERICAL_GRID_DATA ) ) ) == NULL )
152 : {
153 0 : return ( IVAS_ERROR( IVAS_ERR_FAILED_ALLOC, "Can not allocate memory for MASA decoder\n" ) );
154 : }
155 1 : generate_gridEq( hDirAC->sph_grid16 );
156 :
157 1 : ( *hDirACPtr ) = hDirAC;
158 :
159 1 : return error;
160 : }
161 :
162 :
163 : /*--------------------------------------------------------------------------*
164 : * ivas_dirac_ana_close()
165 : *
166 : * Close DIRAC handle
167 : *--------------------------------------------------------------------------*/
168 :
169 666 : void ivas_dirac_ana_close(
170 : DIRAC_ANA_HANDLE( *hDirAC ) /* i/o: analysis DIRAC handle */
171 : )
172 : {
173 : int16_t i, j;
174 :
175 666 : if ( hDirAC == NULL || *hDirAC == NULL )
176 : {
177 665 : return;
178 : }
179 :
180 5 : for ( i = 0; i < ( *hDirAC )->num_Cldfb_instances; i++ )
181 : {
182 4 : deleteCldfb( &( ( *hDirAC )->cldfbAnaEnc[i] ) );
183 : }
184 :
185 4 : for ( i = 0; i < DIRAC_NUM_DIMS; i++ )
186 : {
187 15 : for ( j = 0; j < MAX_PARAM_SPATIAL_SUBFRAMES; j++ )
188 : {
189 12 : free( ( *hDirAC )->direction_vector_m[i][j] );
190 12 : ( *hDirAC )->direction_vector_m[i][j] = NULL;
191 : }
192 :
193 99 : for ( j = 0; j < DIRAC_NO_COL_AVG_DIFF; j++ )
194 : {
195 96 : free( ( *hDirAC )->buffer_intensity_real[i][j] );
196 96 : ( *hDirAC )->buffer_intensity_real[i][j] = NULL;
197 : }
198 :
199 3 : free( ( *hDirAC )->direction_vector_m[i] );
200 3 : ( *hDirAC )->direction_vector_m[i] = NULL;
201 : }
202 :
203 1 : free( ( *hDirAC )->hMasaOut );
204 1 : ( *hDirAC )->hMasaOut = NULL;
205 1 : free( ( *hDirAC )->sph_grid16 );
206 1 : ( *hDirAC )->sph_grid16 = NULL;
207 :
208 1 : free( ( *hDirAC ) );
209 1 : ( *hDirAC ) = NULL;
210 :
211 1 : return;
212 : }
213 :
214 :
215 : /*--------------------------------------------------------------------------*
216 : * ivas_dirac_ana()
217 : *
218 : * DIRAC analysis function
219 : *--------------------------------------------------------------------------*/
220 :
221 151 : void ivas_dirac_ana(
222 : DIRAC_ANA_HANDLE hDirAC, /* i/o: DIRAC analysis handle */
223 : float data_in_f[][L_FRAME48k], /* i/o: Input / transport audio signals */
224 : const int16_t input_frame, /* i : Input frame size */
225 : const int16_t nchan_transport /* i : Number of transport channels */
226 : )
227 : {
228 : float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
229 : float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
230 : float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
231 : float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
232 : float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS];
233 :
234 : /* Estimate MASA parameters from the SBA signals */
235 151 : ivas_dirac_param_est_ana( hDirAC, data_in_f, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence, input_frame );
236 :
237 : /* Add zeros to higher bands in case of lower sampling rates */
238 151 : if ( hDirAC->nbands < MASA_FREQUENCY_BANDS )
239 : {
240 0 : ivas_masa_zero_high_bands( hDirAC->nbands, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence );
241 : }
242 :
243 : /* Create MASA metadata buffer from the estimated values */
244 151 : ivas_create_masa_out_meta( hDirAC->hMasaOut, hDirAC->sph_grid16, nchan_transport, elevation_m_values, azimuth_m_values, energyRatio, spreadCoherence, surroundingCoherence );
245 :
246 : /* Downmix */
247 151 : ivas_dirac_dmx( data_in_f, input_frame, nchan_transport );
248 :
249 151 : return;
250 : }
251 :
252 :
253 : /*--------------------------------------------------------------------------*
254 : * Local functions
255 : *--------------------------------------------------------------------------*/
256 :
257 : /* Estimate MASA parameters from the SBA signals */
258 151 : static void ivas_dirac_param_est_ana(
259 : DIRAC_ANA_HANDLE hDirAC,
260 : float data_f[][L_FRAME48k],
261 : float elevation_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS],
262 : float azimuth_m_values[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS],
263 : float energyRatio[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS],
264 : float spreadCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS],
265 : float surroundingCoherence[MAX_PARAM_SPATIAL_SUBFRAMES][MASA_FREQUENCY_BANDS],
266 : const int16_t input_frame )
267 : {
268 : float reference_power[CLDFB_NO_COL_MAX][CLDFB_NO_CHANNELS_MAX];
269 : int16_t ts, i, d, j;
270 : int16_t num_freq_bands, index;
271 : float dir_v[DIRAC_NUM_DIMS];
272 : int16_t l_ts;
273 : float Foa_RealBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX];
274 : float Foa_ImagBuffer[FOA_CHANNELS][CLDFB_NO_CHANNELS_MAX];
275 : float intensity_real[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS];
276 : float direction_vector[DIRAC_NUM_DIMS][MASA_FREQUENCY_BANDS];
277 : float diffuseness_vector[MASA_FREQUENCY_BANDS];
278 : float diffuseness_m[MASA_FREQUENCY_BANDS];
279 :
280 : int16_t band_m_idx, block_m_idx;
281 : float renormalization_factor_diff[MASA_FREQUENCY_BANDS];
282 : float norm_tmp;
283 : int16_t mrange[2];
284 : int16_t brange[2];
285 : int16_t numAnalysisChannels;
286 :
287 151 : num_freq_bands = hDirAC->nbands;
288 151 : l_ts = input_frame / CLDFB_NO_COL_MAX;
289 151 : numAnalysisChannels = FOA_CHANNELS;
290 :
291 :
292 : /* do processing over all CLDFB time slots */
293 755 : for ( block_m_idx = 0; block_m_idx < MAX_PARAM_SPATIAL_SUBFRAMES; block_m_idx++ )
294 : {
295 604 : mrange[0] = hDirAC->block_grouping[block_m_idx];
296 604 : mrange[1] = hDirAC->block_grouping[block_m_idx + 1];
297 :
298 15100 : for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ )
299 : {
300 14496 : hDirAC->direction_vector_m[0][block_m_idx][band_m_idx] = 0.0f;
301 14496 : hDirAC->direction_vector_m[1][block_m_idx][band_m_idx] = 0.0f;
302 14496 : hDirAC->direction_vector_m[2][block_m_idx][band_m_idx] = 0.0f;
303 : }
304 :
305 : /* Need to initialize renormalization_factors, and variables to be normalized */
306 604 : set_zero( renormalization_factor_diff, hDirAC->nbands );
307 604 : set_zero( diffuseness_m, hDirAC->nbands );
308 604 : set_zero( hDirAC->energy[block_m_idx], MASA_FREQUENCY_BANDS );
309 :
310 3020 : for ( ts = mrange[0]; ts < mrange[1]; ts++ )
311 : {
312 12080 : for ( i = 0; i < numAnalysisChannels; i++ )
313 : {
314 9664 : cldfbAnalysis_ts( &( data_f[i][l_ts * ts] ), Foa_RealBuffer[i], Foa_ImagBuffer[i], l_ts, hDirAC->cldfbAnaEnc[i] );
315 : }
316 :
317 : /* Compute omni energy for metadata processing */
318 60400 : for ( band_m_idx = 0; band_m_idx < num_freq_bands; band_m_idx++ )
319 : {
320 57984 : brange[0] = hDirAC->band_grouping[band_m_idx];
321 57984 : brange[1] = hDirAC->band_grouping[band_m_idx + 1];
322 202944 : for ( j = brange[0]; j < brange[1]; j++ )
323 : {
324 144960 : hDirAC->energy[block_m_idx][band_m_idx] += Foa_RealBuffer[0][j] * Foa_RealBuffer[0][j] + Foa_ImagBuffer[0][j] * Foa_ImagBuffer[0][j];
325 : }
326 : }
327 :
328 : /* Direction estimation */
329 2416 : computeIntensityVector_ana( hDirAC->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, num_freq_bands, intensity_real );
330 2416 : computeDirectionVectors( intensity_real[0], intensity_real[1], intensity_real[2], 0, num_freq_bands, direction_vector[0], direction_vector[1], direction_vector[2] );
331 :
332 : /* Power estimation for diffuseness */
333 2416 : computeReferencePower_ana( hDirAC->band_grouping, Foa_RealBuffer, Foa_ImagBuffer, reference_power[ts], num_freq_bands );
334 :
335 : /* Fill buffers of length "averaging_length" time slots for intensity and energy */
336 2416 : hDirAC->index_buffer_intensity = ( hDirAC->index_buffer_intensity % DIRAC_NO_COL_AVG_DIFF ) + 1; /* averaging_length = 32 */
337 2416 : index = hDirAC->index_buffer_intensity;
338 9664 : for ( i = 0; i < DIRAC_NUM_DIMS; i++ )
339 : {
340 : /* only real part needed */
341 7248 : mvr2r( intensity_real[i], &( hDirAC->buffer_intensity_real[i][index - 1][0] ), num_freq_bands );
342 : }
343 2416 : mvr2r( reference_power[ts], &( hDirAC->buffer_energy[( index - 1 ) * num_freq_bands] ), num_freq_bands );
344 :
345 2416 : computeDiffuseness( hDirAC->buffer_intensity_real, hDirAC->buffer_energy, num_freq_bands, diffuseness_vector );
346 :
347 60400 : for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ )
348 : {
349 57984 : norm_tmp = reference_power[ts][band_m_idx] * ( 1 - diffuseness_vector[band_m_idx] );
350 :
351 57984 : hDirAC->direction_vector_m[0][block_m_idx][band_m_idx] += norm_tmp * direction_vector[0][band_m_idx];
352 57984 : hDirAC->direction_vector_m[1][block_m_idx][band_m_idx] += norm_tmp * direction_vector[1][band_m_idx];
353 57984 : hDirAC->direction_vector_m[2][block_m_idx][band_m_idx] += norm_tmp * direction_vector[2][band_m_idx];
354 :
355 57984 : diffuseness_m[band_m_idx] += reference_power[ts][band_m_idx] * diffuseness_vector[band_m_idx];
356 57984 : renormalization_factor_diff[band_m_idx] += reference_power[ts][band_m_idx];
357 : }
358 : }
359 :
360 15100 : for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ )
361 : {
362 57984 : for ( d = 0; d < DIRAC_NUM_DIMS; d++ )
363 : {
364 43488 : dir_v[d] = hDirAC->direction_vector_m[d][block_m_idx][band_m_idx];
365 : }
366 14496 : ivas_qmetadata_direction_vector_to_azimuth_elevation( dir_v, &azimuth_m_values[block_m_idx][band_m_idx], &elevation_m_values[block_m_idx][band_m_idx] );
367 : }
368 :
369 : /* Determine energy ratios */
370 15100 : for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ )
371 : {
372 14496 : if ( renormalization_factor_diff[band_m_idx] > EPSILON )
373 : {
374 14472 : diffuseness_m[band_m_idx] /= renormalization_factor_diff[band_m_idx];
375 : }
376 : else
377 : {
378 24 : diffuseness_m[band_m_idx] = 0.0f;
379 : }
380 :
381 14496 : energyRatio[block_m_idx][band_m_idx] = 1.0f - diffuseness_m[band_m_idx];
382 : }
383 :
384 15100 : for ( band_m_idx = 0; band_m_idx < hDirAC->nbands; band_m_idx++ )
385 : {
386 14496 : spreadCoherence[block_m_idx][band_m_idx] = 0.0f;
387 14496 : surroundingCoherence[block_m_idx][band_m_idx] = 0.0f;
388 : }
389 : }
390 :
391 151 : return;
392 : }
393 :
394 :
395 : /* Compute downmix */
396 151 : static void ivas_dirac_dmx(
397 : float data_in_f[][L_FRAME48k],
398 : const int16_t input_frame,
399 : const int16_t nchan_transport )
400 : {
401 : int16_t i;
402 : float data_out_f[MASA_MAX_TRANSPORT_CHANNELS][L_FRAME48k];
403 :
404 151 : if ( nchan_transport == 2 )
405 : {
406 151 : v_add( data_in_f[0], data_in_f[1], data_out_f[0], input_frame );
407 151 : v_multc( data_out_f[0], 0.5f, data_out_f[0], input_frame );
408 :
409 151 : v_sub( data_in_f[0], data_in_f[1], data_out_f[1], input_frame );
410 151 : v_multc( data_out_f[1], 0.5f, data_out_f[1], input_frame );
411 :
412 453 : for ( i = 0; i < nchan_transport; i++ )
413 : {
414 302 : mvr2r( data_out_f[i], data_in_f[i], input_frame );
415 : }
416 : }
417 :
418 151 : return;
419 : }
|