Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2026 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : #include <assert.h>
34 : #include <stdint.h>
35 : #include "options.h"
36 : #ifdef DEBUGGING
37 : #include "debug.h"
38 : #endif
39 : #include <math.h>
40 : #include "prot.h"
41 : #include "ivas_prot.h"
42 : #include "wmc_auto.h"
43 : #include "ivas_rom_com.h"
44 : #include "basop_settings.h"
45 :
46 : /*---------------------------------------------------------------
47 : * sumAbs()
48 : *
49 : * sum of absolute values
50 : * ---------------------------------------------------------------*/
51 :
52 : /*! r: sum abs of all vector elements */
53 124708 : float sumAbs(
54 : const float *vec, /* i : input vector */
55 : const int16_t lvec /* i : length of input vector */
56 : )
57 : {
58 : int16_t i;
59 : float tmp;
60 :
61 124708 : tmp = 0.0f;
62 10101348 : for ( i = 0; i < lvec; i++ )
63 : {
64 9976640 : tmp += fabsf( vec[i] );
65 : }
66 :
67 124708 : return tmp;
68 : }
69 :
70 : /*---------------------------------------------------------------------*
71 : * mvc2c()
72 : *
73 : * Transfer the contents of vector x[] to vector y[] in char format
74 : *---------------------------------------------------------------------*/
75 :
76 217929520 : void mvc2c(
77 : const uint8_t x[], /* i : input vector */
78 : uint8_t y[], /* o : output vector */
79 : const int16_t n /* i : vector size */
80 : )
81 : {
82 : int16_t i;
83 :
84 217929520 : if ( n <= 0 )
85 : {
86 : /* no need to transfer vectors with size 0 */
87 0 : return;
88 : }
89 :
90 217929520 : if ( y < x )
91 : {
92 3740330606 : for ( i = 0; i < n; i++ )
93 : {
94 3569956862 : y[i] = x[i];
95 : }
96 : }
97 : else
98 : {
99 776562905 : for ( i = n - 1; i >= 0; i-- )
100 : {
101 729007129 : y[i] = x[i];
102 : }
103 : }
104 :
105 217929520 : return;
106 : }
107 :
108 :
109 : /*-------------------------------------------------------------------*
110 : * ivas_syn_output()
111 : *
112 : * Output ivas synthesis signal with compensation for saturation
113 : * returns number of clipped samples
114 : *-------------------------------------------------------------------*/
115 :
116 : /*! r: number of clipped samples */
117 42389993 : uint32_t ivas_syn_output(
118 : float *synth[], /* i/o: float synthesis signal */
119 : const int16_t output_frame, /* i : output frame length (one channel) */
120 : const int16_t n_channels, /* i : number of output channels */
121 : int16_t *synth_out /* o : integer 16 bits synthesis signal */
122 : )
123 : {
124 : int16_t i, n;
125 : int16_t synth_loc[MAX_JBM_L_FRAME48k];
126 42389993 : uint32_t noClipping = 0;
127 :
128 : /*-----------------------------------------------------------------*
129 : * float to integer conversion with saturation control
130 : *-----------------------------------------------------------------*/
131 :
132 204855975 : for ( n = 0; n < n_channels; n++ )
133 : {
134 162465982 : noClipping += mvr2s( synth[n], synth_loc, output_frame );
135 :
136 : #ifdef DEBUG_MODE_LFE
137 : if ( n == LFE_CHANNEL )
138 : {
139 : dbgwrite( synth_loc, sizeof( int16_t ), output_frame, 1, "./lfe_out.raw" );
140 : }
141 : #endif
142 67016593502 : for ( i = 0; i < output_frame; i++ )
143 : {
144 66854127520 : synth_out[i * n_channels + n] = synth_loc[i];
145 : }
146 : }
147 :
148 42389993 : return noClipping;
149 : }
150 :
151 :
152 : /*-------------------------------------------------------------------*
153 : * ivas_buffer_interleaved_to_deinterleaved()
154 : *
155 : * Convert an interleaved buffer of audio channels to deinterleaved one
156 : *-------------------------------------------------------------------*/
157 :
158 3126749 : void ivas_buffer_interleaved_to_deinterleaved(
159 : float *audio_in, /* i : interleaved audio buffer */
160 : float *audio_out[], /* o : pointers to each channel of deinterleaved audio buffer */
161 : const int16_t n_channels, /* i : number of channels */
162 : const int16_t frame_length /* i : frame length (one channel) */
163 : )
164 : {
165 : int16_t ch, s;
166 : float buffer[MAX_TRANSPORT_CHANNELS][MAX_JBM_L_FRAME48k]; /* temp buffer needed when "*audio_in" and "*audio_out[]" point to the same memory */
167 :
168 12777465 : for ( ch = 0; ch < n_channels; ch++ )
169 : {
170 7271038334 : for ( s = 0; s < frame_length; s++ )
171 : {
172 7261387618 : buffer[ch][s] = audio_in[s * n_channels + ch];
173 : }
174 : }
175 :
176 12777465 : for ( ch = 0; ch < n_channels; ch++ )
177 : {
178 7271038334 : for ( s = 0; s < frame_length; s++ )
179 : {
180 7261387618 : audio_out[ch][s] = buffer[ch][s];
181 : }
182 : }
183 :
184 3126749 : return;
185 : }
186 :
187 : /*-------------------------------------------------------------------*
188 : * ivas_buffer_deinterleaved_to_interleaved()
189 : *
190 : * Convert a deinterleaved buffer of audio channels to interleaved one
191 : *-------------------------------------------------------------------*/
192 :
193 3736390 : void ivas_buffer_deinterleaved_to_interleaved(
194 : float *audio_in[], /* i : pointers to each channel of deinterleaved audio buffer */
195 : float *audio_out, /* o : interleaved audio buffer */
196 : const int16_t n_channels, /* i : number of channels */
197 : const int16_t frame_length /* i : frame length (one channel) */
198 : )
199 : {
200 : int16_t ch, s;
201 : float buffer[MAX_OUTPUT_CHANNELS + MAX_NUM_OBJECTS][L_FRAME48k]; /* temp buffer needed when "*audio_in[]" and "*audio_out" point to the same memory */
202 :
203 17874088 : for ( ch = 0; ch < n_channels; ch++ )
204 : {
205 10893565218 : for ( s = 0; s < frame_length; s++ )
206 : {
207 10879427520 : buffer[ch][s] = audio_in[ch][s];
208 : }
209 : }
210 :
211 17874088 : for ( ch = 0; ch < n_channels; ch++ )
212 : {
213 10893565218 : for ( s = 0; s < frame_length; s++ )
214 : {
215 10879427520 : audio_out[s * n_channels + ch] = buffer[ch][s];
216 : }
217 : }
218 :
219 3736390 : return;
220 : }
221 :
222 :
223 : /*-------------------------------------------------------------------*
224 : * mvr2r_inc()
225 : *
226 : *
227 : *-------------------------------------------------------------------*/
228 :
229 5208806755 : void mvr2r_inc(
230 : const float x[], /* i : input vector */
231 : const int16_t x_inc, /* i : increment for vector x[] */
232 : float y[], /* o : output vector */
233 : const int16_t y_inc, /* i : increment for vector y[] */
234 : const int16_t n /* i : vector size */
235 : )
236 : {
237 : int16_t i;
238 : int16_t ix;
239 : int16_t iy;
240 :
241 5208806755 : if ( n <= 0 )
242 : {
243 : /* cannot transfer vectors with size 0 */
244 0 : return;
245 : }
246 :
247 5208806755 : if ( y < x )
248 : {
249 4199199512 : ix = 0;
250 4199199512 : iy = 0;
251 60941928709 : for ( i = 0; i < n; i++ )
252 : {
253 56742729197 : y[iy] = x[ix];
254 56742729197 : ix += x_inc;
255 56742729197 : iy += y_inc;
256 : }
257 : }
258 : else
259 : {
260 1009607243 : ix = ( n - 1 ) * x_inc;
261 1009607243 : iy = ( n - 1 ) * y_inc;
262 8981190342 : for ( i = n - 1; i >= 0; i-- )
263 : {
264 7971583099 : y[iy] = x[ix];
265 7971583099 : ix -= x_inc;
266 7971583099 : iy -= y_inc;
267 : }
268 : }
269 :
270 5208806755 : return;
271 : }
272 :
273 : /*-------------------------------------------------------------------*
274 : * v_add_inc()
275 : *
276 : * Addition of two vectors sample by sample with explicit increments
277 : *-------------------------------------------------------------------*/
278 :
279 1509564277 : void v_add_inc(
280 : const float x1[], /* i : Input vector 1 */
281 : const int16_t x_inc, /* i : Increment for input vector 1 */
282 : const float x2[], /* i : Input vector 2 */
283 : const int16_t x2_inc, /* i : Increment for input vector 2 */
284 : float y[], /* o : Output vector that contains vector 1 + vector 2 */
285 : const int16_t y_inc, /* i : increment for vector y[] */
286 : const int16_t N /* i : Vector length */
287 : )
288 : {
289 : int16_t i;
290 1509564277 : int16_t ix1 = 0;
291 1509564277 : int16_t ix2 = 0;
292 1509564277 : int16_t iy = 0;
293 :
294 29882942513 : for ( i = 0; i < N; i++ )
295 : {
296 28373378236 : y[iy] = x1[ix1] + x2[ix2];
297 28373378236 : ix1 += x_inc;
298 28373378236 : ix2 += x2_inc;
299 28373378236 : iy += y_inc;
300 : }
301 :
302 1509564277 : return;
303 : }
304 :
305 :
306 : /*-------------------------------------------------------------------*
307 : * v_mult_inc()
308 : *
309 : * Multiplication of two vectors with explicit increments
310 : *-------------------------------------------------------------------*/
311 :
312 791601736 : void v_mult_inc(
313 : const float x1[], /* i : Input vector 1 */
314 : const int16_t x1_inc, /* i : Increment for input vector 1 */
315 : const float x2[], /* i : Input vector 2 */
316 : const int16_t x2_inc, /* i : Increment for input vector 1 */
317 : float y[], /* o : Output vector that contains vector 1 .* vector 2 */
318 : const int16_t y_inc, /* i : increment for vector y[i] */
319 : const int16_t N /* i : Vector length */
320 : )
321 : {
322 : int16_t i;
323 791601736 : int16_t ix1 = 0;
324 791601736 : int16_t ix2 = 0;
325 791601736 : int16_t iy = 0;
326 :
327 13023020564 : for ( i = 0; i < N; i++ )
328 : {
329 12231418828 : y[iy] = x1[ix1] * x2[ix2];
330 12231418828 : ix1 += x1_inc;
331 12231418828 : ix2 += x2_inc;
332 12231418828 : iy += y_inc;
333 : }
334 :
335 791601736 : return;
336 : }
337 :
338 : /*-------------------------------------------------------------------*
339 : * v_addc()
340 : *
341 : * Addition of constant to vector
342 : *-------------------------------------------------------------------*/
343 :
344 508914345 : void v_addc(
345 : const float x[], /* i : Input vector */
346 : const float c, /* i : Constant */
347 : float y[], /* o : Output vector that contains c*x */
348 : const int16_t N /* i : Vector length */
349 : )
350 : {
351 : int16_t i;
352 :
353 9535574005 : for ( i = 0; i < N; i++ )
354 : {
355 9026659660 : y[i] = c + x[i];
356 : }
357 :
358 508914345 : return;
359 : }
360 :
361 : /*-------------------------------------------------------------------*
362 : * v_min()
363 : *
364 : * minimum of two vectors
365 : *-------------------------------------------------------------------*/
366 :
367 774312 : void v_min(
368 : const float x1[], /* i : Input vector 1 */
369 : const float x2[], /* i : Input vector 2 */
370 : float y[], /* o : Output vector that contains vector 1 .* vector 2 */
371 : const int16_t N /* i : Vector length */
372 : )
373 : {
374 : int16_t i;
375 :
376 12068976 : for ( i = 0; i < N; i++ )
377 : {
378 11294664 : y[i] = ( x1[i] < x2[i] ) ? x1[i] : x2[i];
379 : }
380 :
381 774312 : return;
382 : }
383 :
384 :
385 : /*-------------------------------------------------------------------*
386 : * v_sqrt()
387 : *
388 : * square root of vector
389 : *-------------------------------------------------------------------*/
390 :
391 10692120 : void v_sqrt(
392 : const float x[], /* i : Input vector */
393 : float y[], /* o : Output vector that contains sqrt(x) */
394 : const int16_t N /* i : Vector length */
395 : )
396 : {
397 : int16_t i;
398 :
399 32076360 : for ( i = 0; i < N; i++ )
400 : {
401 21384240 : y[i] = sqrtf( x[i] );
402 : }
403 :
404 10692120 : return;
405 : }
406 :
407 :
408 : /*-------------------------------------------------------------------*
409 : * v_sub_s()
410 : *
411 : * Subtraction of two vectors
412 : *-------------------------------------------------------------------*/
413 :
414 6614530 : void v_sub_s(
415 : const int16_t x1[], /* i : Input vector 1 */
416 : const int16_t x2[], /* i : Input vector 2 */
417 : int16_t y[], /* o : Output vector that contains vector 1 - vector 2 */
418 : const int16_t N /* i : Vector length */
419 : )
420 : {
421 : int16_t i;
422 :
423 25113137 : for ( i = 0; i < N; i++ )
424 : {
425 18498607 : y[i] = x1[i] - x2[i];
426 : }
427 :
428 6614530 : return;
429 : }
430 :
431 :
432 : /*---------------------------------------------------------------------*
433 : * dot_product_cholesky()
434 : *
435 : * Calculates dot product of type x'*A*A'*x, where x is column vector of size m,
436 : * and A is a Cholesky decomposition of some Hermitian matrix S whose size is m*m.
437 : * Therefore, S=A*A' where A is upper triangular matrix of size (m*m+m)/2 (zeros ommitted, column-wise)
438 : *---------------------------------------------------------------------*/
439 :
440 : /*! r: the dot product x'*A*A'*x */
441 331898076 : float dot_product_cholesky(
442 : const float *x, /* i : vector x */
443 : const float *A, /* i : Cholesky matrix A */
444 : const int16_t N /* i : vector & matrix size */
445 : )
446 : {
447 : int16_t i, j;
448 : float suma, tmp_sum;
449 : const float *pt_x, *pt_A;
450 :
451 331898076 : pt_A = A;
452 331898076 : suma = 0;
453 :
454 4314674988 : for ( i = 0; i < N; i++ )
455 : {
456 3982776912 : tmp_sum = 0;
457 3982776912 : pt_x = x;
458 29870826840 : for ( j = 0; j <= i; j++ )
459 : {
460 25888049928 : tmp_sum += *pt_x++ * *pt_A++;
461 : }
462 :
463 3982776912 : suma += tmp_sum * tmp_sum;
464 : }
465 :
466 331898076 : return suma;
467 : }
468 :
469 : /*---------------------------------------------------------------------*
470 : * v_mult_mat()
471 : *
472 : * Multiplication of row vector x by matrix A, where x has size Nr and
473 : * A has size Nr x Nc ans it is stored column-wise in memory.
474 : * The resulting row vector y has size Nc
475 : *---------------------------------------------------------------------*/
476 :
477 36877564 : void v_mult_mat(
478 : float *y, /* o : the product x*A */
479 : const float *x, /* i : vector x */
480 : const float *A, /* i : matrix A */
481 : const int16_t Nr, /* i : number of rows */
482 : const int16_t Nc /* i : number of columns */
483 : )
484 : {
485 : int16_t i, j;
486 : const float *pt_x, *pt_A;
487 : float tmp_y[MAX_V_MULT_MAT];
488 : float *pt_y;
489 :
490 36877564 : pt_y = tmp_y;
491 36877564 : pt_A = A;
492 497847114 : for ( i = 0; i < Nc; i++ )
493 : {
494 460969550 : pt_x = x;
495 460969550 : *pt_y = 0;
496 13368116950 : for ( j = 0; j < Nr; j++ )
497 : {
498 12907147400 : *pt_y += ( *pt_x++ ) * ( *pt_A++ );
499 : }
500 460969550 : pt_y++;
501 : }
502 :
503 36877564 : mvr2r( tmp_y, y, Nc );
504 :
505 36877564 : return;
506 : }
507 :
508 :
509 : /*---------------------------------------------------------------------*
510 : * logsumexp()
511 : *
512 : * Compute logarithm of the sum of exponentials of input vector in a numerically stable way
513 : *---------------------------------------------------------------------*/
514 :
515 : /*! r: log(sum(exp(x)) of the input array x */
516 55316346 : float logsumexp(
517 : const float x[], /* i : input array x */
518 : const int16_t N /* i : number of elements in array x */
519 : )
520 : {
521 : float max_exp;
522 : float sum;
523 : int16_t i;
524 :
525 55316346 : max_exp = x[0];
526 55316346 : sum = 0;
527 331898076 : for ( i = 1; i < N; i++ )
528 : {
529 276581730 : if ( x[i] > max_exp )
530 : {
531 88552692 : max_exp = x[i];
532 : }
533 : }
534 :
535 387214422 : for ( i = 0; i < N; i++ )
536 : {
537 331898076 : sum += expf( x[i] - max_exp );
538 : }
539 :
540 55316346 : return logf( sum ) + max_exp;
541 : }
542 :
543 :
544 : /*---------------------------------------------------------------------*
545 : * lin_interp()
546 : *
547 : * Linearly maps x from source range <x1, x2> to the target range <y1, y2>
548 : *---------------------------------------------------------------------*/
549 :
550 : /*! r: mapped output value */
551 58532527 : float lin_interp(
552 : const float x, /* i : the value to be mapped */
553 : const float x1, /* i : source range interval: low end */
554 : const float y1, /* i : source range interval: high end */
555 : const float x2, /* i : target range interval: low */
556 : const float y2, /* i : target range interval: high */
557 : const int16_t flag_sat /* i : flag to indicate whether to apply saturation */
558 : )
559 : {
560 58532527 : if ( x2 - x1 == 0 )
561 : {
562 0 : return y1;
563 : }
564 58532527 : else if ( flag_sat )
565 : {
566 58517670 : if ( x >= max( x1, x2 ) )
567 : {
568 917409 : return x1 > x2 ? y1 : y2;
569 : }
570 57600261 : else if ( x <= min( x1, x2 ) )
571 : {
572 40457972 : return x1 < x2 ? y1 : y2;
573 : }
574 : }
575 :
576 17157146 : return y1 + ( x - x1 ) * ( y2 - y1 ) / ( x2 - x1 );
577 : }
578 :
579 :
580 : /*-------------------------------------------------------------------*
581 : * check_bounds()
582 : *
583 : * Ensure the input value is within the given limits
584 : *-------------------------------------------------------------------*/
585 :
586 : /*! r: Adjusted value */
587 29778214 : float check_bounds(
588 : const float value, /* i : Input value */
589 : const float low, /* i : Low limit */
590 : const float high /* i : High limit */
591 : )
592 : {
593 : float value_adj;
594 :
595 29778214 : value_adj = min( max( value, low ), high );
596 :
597 29778214 : return value_adj;
598 : }
599 :
600 :
601 : /*-------------------------------------------------------------------*
602 : * check_bounds_s()
603 : *
604 : * Ensure the input value is within the given limits
605 : *-------------------------------------------------------------------*/
606 :
607 : /*! r: Adjusted value */
608 45443715 : int16_t check_bounds_s(
609 : const int16_t value, /* i : Input value */
610 : const int16_t low, /* i : Low limit */
611 : const int16_t high /* i : High limit */
612 : )
613 : {
614 : int16_t value_adj;
615 :
616 45443715 : value_adj = min( max( value, low ), high );
617 :
618 45443715 : return value_adj;
619 : }
620 :
621 :
622 : /*---------------------------------------------------------------------*
623 : * set_zero_l()
624 : *
625 : * Set a vector vec[] of dimension lvec to zero
626 : *---------------------------------------------------------------------*/
627 :
628 467127 : void set_zero_l(
629 : float *vec, /* i/o: input/output vector */
630 : const uint32_t lvec /* i : length of the vector */
631 : )
632 : {
633 : uint32_t i;
634 :
635 1081444486 : for ( i = 0; i < lvec; i++ )
636 : {
637 1080977359 : *vec++ = 0.0f;
638 : }
639 :
640 467127 : return;
641 : }
642 :
643 :
644 : /****************************************************************************/
645 : /* matrix functions */
646 : /* matrices are ordered column-wise in memory */
647 : /****************************************************************************/
648 :
649 : /*---------------------------------------------------------------------*
650 : * matrix product
651 : *
652 : * comput the matrix product of two matrices (Z=X*Y)
653 : *---------------------------------------------------------------------*/
654 :
655 : /*! r: success or failure */
656 1161518372 : int16_t matrix_product(
657 : const float *X, /* i : left hand matrix */
658 : const int16_t rowsX, /* i : number of rows of the left hand matrix */
659 : const int16_t colsX, /* i : number of columns of the left hand matrix */
660 : const int16_t transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication */
661 : const float *Y, /* i : right hand matrix */
662 : const int16_t rowsY, /* i : number of rows of the right hand matrix */
663 : const int16_t colsY, /* i : number of columns of the right hand matrix */
664 : const int16_t transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication */
665 : float *Z /* o : resulting matrix after the matrix multiplication */
666 : )
667 : {
668 : int16_t i, j, k;
669 1161518372 : float *Zp = Z;
670 :
671 : /* Processing */
672 1161518372 : if ( transpX == 1 && transpY == 0 ) /* We use X transpose */
673 : {
674 0 : if ( rowsX != rowsY )
675 : {
676 0 : return EXIT_FAILURE;
677 : }
678 0 : for ( j = 0; j < colsY; ++j )
679 : {
680 0 : for ( i = 0; i < colsX; ++i )
681 : {
682 0 : ( *Zp ) = 0.0f;
683 0 : for ( k = 0; k < rowsX; ++k )
684 : {
685 0 : ( *Zp ) += X[k + i * rowsX] * Y[k + j * rowsY];
686 : }
687 0 : Zp++;
688 : }
689 : }
690 : }
691 1161518372 : else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */
692 : {
693 44388441 : if ( colsX != colsY )
694 : {
695 0 : return EXIT_FAILURE;
696 : }
697 287341046 : for ( j = 0; j < rowsY; ++j )
698 : {
699 2108694532 : for ( i = 0; i < rowsX; ++i )
700 : {
701 1865741927 : ( *Zp ) = 0.0f;
702 6836283630 : for ( k = 0; k < colsX; ++k )
703 : {
704 4970541703 : ( *Zp ) += X[i + k * rowsX] * Y[j + k * rowsY];
705 : }
706 1865741927 : Zp++;
707 : }
708 : }
709 : }
710 1117129931 : else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */
711 : {
712 5894263 : if ( rowsX != colsY )
713 : {
714 0 : return EXIT_FAILURE;
715 : }
716 41127666 : for ( j = 0; j < rowsY; ++j )
717 : {
718 113747720 : for ( i = 0; i < colsX; ++i )
719 : {
720 78514317 : ( *Zp ) = 0.0f;
721 259685484 : for ( k = 0; k < colsX; ++k )
722 : {
723 181171167 : ( *Zp ) += X[k + i * rowsX] * Y[j + k * rowsY];
724 : }
725 :
726 78514317 : Zp++;
727 : }
728 : }
729 : }
730 : else /* Regular case */
731 : {
732 1111235668 : if ( colsX != rowsY )
733 : {
734 0 : return EXIT_FAILURE;
735 : }
736 :
737 2343214515 : for ( j = 0; j < colsY; ++j )
738 : {
739 9387855473 : for ( i = 0; i < rowsX; ++i )
740 : {
741 8155876626 : ( *Zp ) = 0.0f;
742 38040749092 : for ( k = 0; k < colsX; ++k )
743 : {
744 29884872466 : ( *Zp ) += X[i + k * rowsX] * Y[k + j * rowsY];
745 : }
746 8155876626 : Zp++;
747 : }
748 : }
749 : }
750 :
751 1161518372 : return EXIT_SUCCESS;
752 : }
753 :
754 :
755 : /*---------------------------------------------------------------------*
756 : * matrix_diag_product
757 : *
758 : * compute the product of a matrix with a diagonal of a matrix (Z=X*diag(Y))
759 : *---------------------------------------------------------------------*/
760 :
761 : /*! r: success or failure */
762 64134813 : int16_t matrix_diag_product(
763 : const float *X, /* i : left hand matrix */
764 : const int16_t rowsX, /* i : number of rows of the left hand matrix */
765 : const int16_t colsX, /* i : number of columns of the left hand matrix */
766 : const int16_t transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication */
767 : const float *Y, /* i : right hand diagonal matrix as vector containing the diagonal elements */
768 : const int16_t entriesY, /* i : number of entries in the diagonal */
769 : float *Z /* o : resulting matrix after the matrix multiplication */
770 : )
771 : {
772 : int16_t i, j;
773 64134813 : float *Zp = Z;
774 :
775 : /* Processing */
776 64134813 : if ( transpX == 1 ) /* We use X transpose */
777 : {
778 0 : if ( rowsX != entriesY )
779 : {
780 0 : return EXIT_FAILURE;
781 : }
782 0 : for ( j = 0; j < entriesY; ++j )
783 : {
784 0 : for ( i = 0; i < colsX; ++i )
785 : {
786 0 : *( Zp++ ) = X[j + i * rowsX] * Y[j];
787 : }
788 : }
789 : }
790 : else /* Regular case */
791 : {
792 64134813 : if ( colsX != entriesY )
793 : {
794 0 : return EXIT_FAILURE;
795 : }
796 :
797 314156120 : for ( j = 0; j < entriesY; ++j )
798 : {
799 1518871470 : for ( i = 0; i < rowsX; ++i )
800 : {
801 1268850163 : *( Zp++ ) = *( X++ ) * Y[j];
802 : }
803 : }
804 : }
805 :
806 64134813 : return EXIT_SUCCESS;
807 : }
808 :
809 : /*---------------------------------------------------------------------*
810 : * diag_matrix_product()
811 : *
812 : * compute the matrix product of a diagonal matrix X with a full matrix Y (Z=diag(Y)*X)
813 : *---------------------------------------------------------------------*/
814 :
815 : /*! r: success or failure */
816 27278503 : int16_t diag_matrix_product(
817 : const float *Y, /* i : left hand diagonal matrix as vector containing the diagonal elements */
818 : const int16_t entriesY, /* i : length of the diagonal of the left hand matrix */
819 : const float *X, /* i : right hand matrix */
820 : const int16_t rowsX, /* i : number of rows of the right hand matrix */
821 : const int16_t colsX, /* i : number of columns of the right hand matrix */
822 : const int16_t transpX, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication */
823 : float *Z /* o : resulting matrix after the matrix multiplication */
824 : )
825 : {
826 : int16_t i, j;
827 27278503 : float *Zp = Z;
828 :
829 : /* Processing */
830 27278503 : if ( transpX == 1 ) /* We use X transpose */
831 : {
832 10692120 : if ( colsX != entriesY )
833 : {
834 0 : return EXIT_FAILURE;
835 : }
836 111105120 : for ( i = 0; i < rowsX; ++i )
837 : {
838 301239000 : for ( j = 0; j < entriesY; ++j )
839 : {
840 200826000 : *( Zp++ ) = X[i + j * rowsX] * Y[j];
841 : }
842 : }
843 : }
844 : else /* Regular case */
845 : {
846 16586383 : if ( rowsX != entriesY )
847 : {
848 0 : return EXIT_FAILURE;
849 : }
850 69048882 : for ( i = 0; i < colsX; ++i )
851 : {
852 452969558 : for ( j = 0; j < entriesY; ++j )
853 : {
854 400507059 : *( Zp++ ) = *( X++ ) * Y[j];
855 : }
856 : }
857 : }
858 :
859 27278503 : return EXIT_SUCCESS;
860 : }
861 :
862 :
863 : /*---------------------------------------------------------------------*
864 : * matrix_product_diag()
865 : *
866 : * compute only the main diagonal of X*Y (Z=diag(X*Y))
867 : *---------------------------------------------------------------------*/
868 :
869 : /*! r: success or failure */
870 31554358 : int16_t matrix_product_diag(
871 : const float *X, /* i : left hand matrix */
872 : const int16_t rowsX, /* i : number of rows of the left hand matrix */
873 : const int16_t colsX, /* i : number of columns of the left hand matrix */
874 : const int16_t transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication */
875 : const float *Y, /* i : right hand matrix */
876 : const int16_t rowsY, /* i : number of rows of the right hand matrix */
877 : const int16_t colsY, /* i : number of columns of the right hand matrix */
878 : const int16_t transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication */
879 : float *Z /* o : resulting matrix after the matrix multiplication */
880 : )
881 : {
882 : int16_t j, k;
883 31554358 : float *Zp = Z;
884 :
885 : /* Processing */
886 31554358 : if ( transpX == 1 && transpY == 0 ) /* We use X transpose */
887 : {
888 0 : if ( rowsX != rowsY )
889 : {
890 0 : return EXIT_FAILURE;
891 : }
892 :
893 0 : for ( j = 0; j < colsY; ++j )
894 : {
895 0 : ( *Zp ) = 0.0f;
896 0 : for ( k = 0; k < rowsX; ++k )
897 : {
898 0 : ( *Zp ) += X[k + j * rowsX] * Y[k + j * rowsY];
899 : }
900 0 : Zp++;
901 : }
902 : }
903 31554358 : else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */
904 : {
905 27260596 : if ( colsX != colsY )
906 : {
907 0 : return EXIT_FAILURE;
908 : }
909 226816946 : for ( j = 0; j < rowsY; ++j )
910 : {
911 199556350 : ( *Zp ) = 0.0f;
912 742424750 : for ( k = 0; k < colsX; ++k )
913 : {
914 542868400 : ( *Zp ) += X[j + k * rowsX] * Y[j + k * rowsY];
915 : }
916 199556350 : Zp++;
917 : }
918 : }
919 4293762 : else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */
920 : {
921 0 : if ( rowsX != colsY )
922 : {
923 0 : return EXIT_FAILURE;
924 : }
925 :
926 0 : for ( j = 0; j < rowsY; ++j )
927 : {
928 :
929 0 : ( *Zp ) = 0.0f;
930 0 : for ( k = 0; k < colsX; ++k )
931 : {
932 0 : ( *Zp ) += X[k + j * rowsX] * Y[j + k * rowsY];
933 : }
934 :
935 0 : Zp++;
936 : }
937 : }
938 : else /* Regular case */
939 : {
940 4293762 : if ( colsX != rowsY )
941 : {
942 0 : return EXIT_FAILURE;
943 : }
944 :
945 44213906 : for ( j = 0; j < colsY; ++j )
946 : {
947 39920144 : ( *Zp ) = 0.0f;
948 79840288 : for ( k = 0; k < colsX; ++k )
949 : {
950 39920144 : ( *Zp ) += X[j + k * rowsX] * Y[k + j * rowsY];
951 : }
952 39920144 : Zp++;
953 : }
954 : }
955 :
956 31554358 : return EXIT_SUCCESS;
957 : }
958 :
959 :
960 : /*---------------------------------------------------------------------*
961 : * cmplx_matrix_square()
962 : *
963 : * compute the square of a complex matrix (Z=X*X)
964 : *---------------------------------------------------------------------*/
965 :
966 : /*! r: success or failure */
967 92571577 : void cmplx_matrix_square(
968 : const float *realX, /* i : real part of the matrix */
969 : const float *imagX, /* i : imaginary part of the matrix */
970 : const int16_t mRows, /* i : number of rows of the matrix */
971 : const int16_t nCols, /* i : number of columns of the matrix */
972 : float *realZ, /* o : real part of the resulting matrix */
973 : float *imagZ /* o : imaginary part of the resulting matrix */
974 : )
975 : {
976 : int16_t i, j, k;
977 : float *realZp, *imagZp;
978 : const float *p_real1, *p_real2, *p_imag1, *p_imag2;
979 :
980 : /* resulting matrix is hermitean, we only need to calc the upper triangle */
981 : /* we assume transposition needed */
982 :
983 : /* column*column = column/column */
984 291595289 : for ( i = 0; i < nCols; i++ )
985 : {
986 518380117 : for ( j = i; j < nCols; j++ )
987 : {
988 319356405 : p_real1 = realX + i * mRows;
989 319356405 : p_imag1 = imagX + i * mRows;
990 319356405 : p_real2 = realX + j * mRows;
991 319356405 : p_imag2 = imagX + j * mRows;
992 319356405 : realZp = realZ + ( i + nCols * j );
993 319356405 : imagZp = imagZ + ( i + nCols * j );
994 319356405 : *( realZp ) = 0.0f;
995 319356405 : *( imagZp ) = 0.0f;
996 :
997 1592770485 : for ( k = 0; k < mRows; k++ )
998 : {
999 1273414080 : *( imagZp ) += *( p_real1 ) * ( *( p_imag2 ) ) - ( *( p_real2 ) ) * ( *( p_imag1 ) );
1000 1273414080 : *( realZp ) += *( p_real1++ ) * ( *( p_real2++ ) ) + *( p_imag1++ ) * ( *( p_imag2++ ) );
1001 : }
1002 : }
1003 : }
1004 :
1005 : /* fill lower triangle */
1006 199023712 : for ( i = 1; i < nCols; i++ )
1007 : {
1008 226784828 : for ( j = 0; j < i; j++ )
1009 : {
1010 120332693 : realZ[i + nCols * j] = realZ[j + nCols * i];
1011 120332693 : imagZ[i + nCols * j] = imagZ[j + nCols * i];
1012 : }
1013 : }
1014 :
1015 92571577 : return;
1016 : }
1017 :
1018 : /*-------------------------------------------------------------------*
1019 : * v_multc_acc()
1020 : *
1021 : * Multiplication of vector by constant, accumulate to the output
1022 : *-------------------------------------------------------------------*/
1023 :
1024 227207384 : void v_multc_acc(
1025 : const float x[], /* i : Input vector */
1026 : const float c, /* i : Constant */
1027 : float y[], /* o : Output vector that contains y + c*x */
1028 : const int16_t N /* i : Vector length */
1029 : )
1030 : {
1031 : int16_t i;
1032 :
1033 19232185824 : for ( i = 0; i < N; i++ )
1034 : {
1035 19004978440 : y[i] += c * x[i];
1036 : }
1037 :
1038 227207384 : return;
1039 : }
1040 :
1041 : /*---------------------------------------------------------------------*
1042 : * lls_interp_n()
1043 : *
1044 : * Linear least squares interpolation of an input vector
1045 : * The function calculates the slope 'a' and the offset 'b' of a LLS estimate for an input vector x such that
1046 : * y_i = a*i + b where i=1,..,N and (a,b) = min(a,b) [sum_N[(y_i - x_i)^2]]
1047 : * the interpolated vector is return as x[], if requested
1048 : *---------------------------------------------------------------------*/
1049 :
1050 :
1051 1352283 : void lls_interp_n(
1052 : float x[], /* i/o: input/output vector */
1053 : const int16_t N, /* i : length of the input vector */
1054 : float *a, /* o : calculated slope */
1055 : float *b, /* o : calculated offset */
1056 : const int16_t upd /* i : use 1 to update x[] with the interpolated output */
1057 : )
1058 : {
1059 : int16_t i;
1060 1352283 : const float n_i[11] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
1061 1352283 : const float one_by_n[11] = { 0, 1, 0.5f, 0.333f, 0.25f, 0.2f, 0.1666f, 0.14286f, 0.125f, 0.111111f, 0.1f };
1062 1352283 : const float sum_i[12] = { 0, 0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55 };
1063 1352283 : const float sum_ii[12] = { 0, 0, 1, 5, 14, 30, 55, 91, 140, 204, 285, 385 };
1064 : float sum_x, sum_ix, slope, offset;
1065 :
1066 1352283 : assert( N > 0 && N <= 10 );
1067 :
1068 1352283 : sum_x = sum_f( x, N );
1069 1352283 : sum_ix = dotp( x, n_i, N );
1070 :
1071 1352283 : slope = ( N * sum_ix - sum_i[N] * sum_x ) / ( N * sum_ii[N] - sum_i[N] * sum_i[N] );
1072 1352283 : offset = ( sum_x - slope * sum_i[N] ) * one_by_n[N];
1073 :
1074 1352283 : if ( upd )
1075 : {
1076 6663195 : for ( i = 0; i < N; i++ )
1077 : {
1078 5330556 : x[i] = slope * i + offset;
1079 : }
1080 : }
1081 :
1082 1352283 : if ( a != NULL )
1083 : {
1084 1352283 : *a = slope;
1085 : }
1086 :
1087 1352283 : if ( b != NULL )
1088 : {
1089 1352283 : *b = offset;
1090 : }
1091 :
1092 1352283 : return;
1093 : }
1094 :
1095 : /* helper function for panning_wrap_angles */
1096 1059887721 : static float wrap_azi(
1097 : const float azi_deg )
1098 : {
1099 1059887721 : float azi = azi_deg;
1100 :
1101 : /* Wrap azimuth value */
1102 1120039339 : while ( azi > 180 )
1103 : {
1104 60151618 : azi -= 360.0f;
1105 : }
1106 :
1107 1066632668 : while ( azi <= -180 )
1108 : {
1109 6744947 : azi += 360;
1110 : }
1111 :
1112 1059887721 : return azi;
1113 : }
1114 :
1115 : /*-------------------------------------------------------------------*
1116 : * panning_wrap_angles()
1117 : *
1118 : * Wrap angles for amplitude panning to the range:
1119 : * azimuth = (-180, 180]
1120 : * elevation = [-90, 90]
1121 : * Considers direction changes from large elevation values
1122 : *-------------------------------------------------------------------*/
1123 1060165116 : void panning_wrap_angles(
1124 : const float azi_deg, /* i : azimuth in degrees for panning direction (positive left) */
1125 : const float ele_deg, /* i : elevation in degrees for panning direction (positive up) */
1126 : float *azi_wrapped, /* o : wrapped azimuth component */
1127 : float *ele_wrapped /* o : wrapped elevation component */
1128 : )
1129 : {
1130 : float azi, ele;
1131 :
1132 1060165116 : azi = azi_deg;
1133 1060165116 : ele = ele_deg;
1134 :
1135 1060165116 : if ( fabsf( ele ) < 90 )
1136 : {
1137 1059887721 : *ele_wrapped = ele;
1138 1059887721 : *azi_wrapped = wrap_azi( azi );
1139 1059887721 : return;
1140 : }
1141 : else
1142 : {
1143 : /* Special case when elevation is a multiple of 90; azimuth is irrelevant */
1144 277395 : if ( ( fmodf( ele, 90 ) == 0 ) && ( fmodf( ele, 180 ) != 0 ) )
1145 : {
1146 277395 : *azi_wrapped = 0;
1147 277395 : while ( ele > 90 )
1148 : {
1149 0 : ele -= 360;
1150 : }
1151 277395 : while ( ele < -90 )
1152 : {
1153 0 : ele += 360;
1154 : }
1155 277395 : *ele_wrapped = ele;
1156 : }
1157 : else
1158 : {
1159 : /* Wrap elevation and adjust azimuth accordingly */
1160 0 : while ( fabsf( ele ) > 90 )
1161 : {
1162 : /* Flip to other hemisphere */
1163 0 : azi += 180;
1164 :
1165 : /* Compensate elevation accordingly */
1166 0 : if ( ele > 90 )
1167 : {
1168 0 : ele = 180 - ele;
1169 : }
1170 0 : else if ( ele < -90 )
1171 : {
1172 0 : ele = -180 - ele;
1173 : }
1174 : }
1175 0 : *azi_wrapped = wrap_azi( azi );
1176 0 : *ele_wrapped = ele;
1177 : }
1178 :
1179 277395 : return;
1180 : }
1181 : }
1182 :
1183 : /*-------------------------------------------------------------------------*
1184 : * v_sort_ind()
1185 : *
1186 : * Sort a float array
1187 : * (modified version of v_sort() to return an index array)
1188 : *-------------------------------------------------------------------------*/
1189 :
1190 981577 : void v_sort_ind(
1191 : float *x, /* i/o: Vector to be sorted */
1192 : int16_t *idx, /* o : Original index positions */
1193 : const int16_t len /* i : vector length */
1194 : )
1195 : {
1196 : int16_t i, j;
1197 : float tempr;
1198 : int16_t tempi;
1199 :
1200 4741857 : for ( i = 0; i < len; i++ )
1201 : {
1202 3760280 : idx[i] = i;
1203 : }
1204 :
1205 3760280 : for ( i = len - 2; i >= 0; i-- )
1206 : {
1207 2778703 : tempr = x[i];
1208 2778703 : tempi = idx[i];
1209 7116110 : for ( j = i + 1; ( j < len ) && ( tempr > x[j] ); j++ )
1210 : {
1211 4337407 : x[j - 1] = x[j];
1212 4337407 : idx[j - 1] = idx[j];
1213 : }
1214 2778703 : x[j - 1] = tempr;
1215 2778703 : idx[j - 1] = tempi;
1216 : }
1217 :
1218 981577 : return;
1219 : }
1220 :
1221 :
1222 : /*-------------------------------------------------------------------*
1223 : * is_IVAS_bitrate()
1224 : *
1225 : * check if the bitrate is IVAS supported active bitrate
1226 : *-------------------------------------------------------------------*/
1227 :
1228 : /*! r: flag indicating a valid bitrate */
1229 793590 : int16_t is_IVAS_bitrate(
1230 : const int32_t ivas_total_brate /* i : IVAS total bitrate */
1231 : )
1232 : {
1233 : int16_t j;
1234 :
1235 793590 : j = SIZE_IVAS_BRATE_TBL - IVAS_NUM_ACTIVE_BRATES; /* skip NO_DATA and SID bitrates */
1236 5231412 : while ( j < SIZE_IVAS_BRATE_TBL && ivas_total_brate != ivas_brate_tbl[j] )
1237 : {
1238 4437822 : j++;
1239 : }
1240 :
1241 793590 : if ( j >= SIZE_IVAS_BRATE_TBL )
1242 : {
1243 0 : return 0;
1244 : }
1245 :
1246 793590 : return 1;
1247 : }
1248 :
1249 :
1250 : /*-------------------------------------------------------------------*
1251 : * is_DTXrate()
1252 : *
1253 : * identify DTX frame bitrates
1254 : *-------------------------------------------------------------------*/
1255 :
1256 190046053 : int16_t is_DTXrate(
1257 : const int32_t ivas_total_brate /* i : IVAS total bitrate */
1258 : )
1259 : {
1260 190046053 : int16_t dtx_rate_flag = 0;
1261 :
1262 190046053 : if ( is_SIDrate( ivas_total_brate ) || ( ivas_total_brate == FRAME_NO_DATA ) )
1263 : {
1264 2564939 : dtx_rate_flag = 1;
1265 : }
1266 :
1267 190046053 : return dtx_rate_flag;
1268 : }
1269 :
1270 : /*-------------------------------------------------------------------*
1271 : * is_SIDrate()
1272 : *
1273 : * identify SID frame bitrates
1274 : *-------------------------------------------------------------------*/
1275 :
1276 211936132 : int16_t is_SIDrate(
1277 : const int32_t ivas_total_brate /* i : IVAS total bitrate */
1278 : )
1279 : {
1280 211936132 : int16_t sid_rate_flag = 0;
1281 :
1282 211936132 : if ( ( ivas_total_brate == SID_1k75 ) ||
1283 211935260 : ( ivas_total_brate == SID_2k40 ) ||
1284 : ( ivas_total_brate == IVAS_SID_5k2 ) )
1285 : {
1286 289072 : sid_rate_flag = 1;
1287 : }
1288 :
1289 211936132 : return sid_rate_flag;
1290 : }
1291 :
1292 :
1293 : /*-------------------------------------------------------------------*
1294 : * rand_triangular_signed()
1295 : *
1296 : * generate a random value with a triangular pdf in [-0.5, 0.5]
1297 : *-------------------------------------------------------------------*/
1298 :
1299 1219252456 : float rand_triangular_signed(
1300 : int16_t *seed )
1301 : {
1302 : float rand_val;
1303 :
1304 1219252456 : rand_val = own_random( seed ) * OUTMAX_INV;
1305 1219252456 : if ( rand_val <= 0.0f )
1306 : {
1307 : /* rand_val in [-1, 0] */
1308 608702245 : return 0.5f * sqrtf( rand_val + 1.0f ) - 0.5f;
1309 : }
1310 : else
1311 : {
1312 : /* rand_val in (0, 1) */
1313 610550211 : return 0.5f - 0.5f * sqrtf( 1.0f - rand_val );
1314 : }
1315 : }
1316 193551 : Word16 matrix_product_fx(
1317 : const Word32 *X_fx, /* i : left hand matrix Qx*/
1318 : const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/
1319 : const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/
1320 : const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/
1321 : const Word32 *Y_fx, /* i : right hand matrix Qy*/
1322 : const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/
1323 : const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/
1324 : const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
1325 : Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Qx + Qy - 31*/
1326 : )
1327 : {
1328 : Word16 i, j, k;
1329 : Word16 x_idx, y_idx;
1330 193551 : Word32 *Zp_fx = Z_fx;
1331 :
1332 : /* Processing */
1333 :
1334 193551 : if ( transpX == 1 && transpY == 0 ) /* We use X transpose */
1335 : {
1336 193551 : if ( rowsX != rowsY )
1337 : {
1338 0 : return EXIT_FAILURE;
1339 : }
1340 967755 : for ( j = 0; j < colsY; ++j )
1341 : {
1342 4645224 : for ( i = 0; i < colsX; ++i )
1343 : {
1344 3871020 : ( *Zp_fx ) = 0;
1345 :
1346 23226120 : for ( k = 0; k < rowsX; ++k )
1347 : {
1348 19355100 : x_idx = k + i * rowsX; /*Q0*/
1349 19355100 : y_idx = k + j * rowsY; /*Q0*/
1350 19355100 : ( *Zp_fx ) = *Zp_fx + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/
1351 : }
1352 3871020 : Zp_fx++;
1353 : }
1354 : }
1355 : }
1356 0 : else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */
1357 : {
1358 0 : if ( colsX != colsY )
1359 : {
1360 0 : return EXIT_FAILURE;
1361 : }
1362 0 : for ( j = 0; j < rowsY; ++j )
1363 : {
1364 0 : for ( i = 0; i < rowsX; ++i )
1365 : {
1366 0 : ( *Zp_fx ) = 0;
1367 0 : for ( k = 0; k < colsX; ++k )
1368 : {
1369 0 : x_idx = i + k * rowsX; /*Q0*/
1370 0 : y_idx = j + k * rowsY; /*Q0*/
1371 0 : ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/
1372 : }
1373 0 : Zp_fx++;
1374 : }
1375 : }
1376 : }
1377 0 : else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */
1378 : {
1379 0 : if ( rowsX != colsY )
1380 : {
1381 0 : return EXIT_FAILURE;
1382 : }
1383 0 : for ( j = 0; j < rowsY; ++j )
1384 : {
1385 0 : for ( i = 0; i < colsX; ++i )
1386 : {
1387 0 : ( *Zp_fx ) = 0;
1388 :
1389 0 : for ( k = 0; k < colsX; ++k )
1390 : {
1391 0 : x_idx = k + i * rowsX; /*Q0*/
1392 0 : y_idx = j + k * rowsY; /*Q0*/
1393 0 : ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31*/
1394 : }
1395 :
1396 0 : Zp_fx++;
1397 : }
1398 : }
1399 : }
1400 : else /* Regular case */
1401 : {
1402 0 : if ( colsX != rowsY )
1403 : {
1404 0 : return EXIT_FAILURE;
1405 : }
1406 :
1407 0 : for ( j = 0; j < colsY; ++j )
1408 : {
1409 0 : for ( i = 0; i < rowsX; ++i )
1410 : {
1411 0 : ( *Zp_fx ) = 0;
1412 :
1413 0 : for ( k = 0; k < colsX; ++k )
1414 : {
1415 0 : x_idx = i + k * rowsX; /*Q0*/
1416 0 : y_idx = k + j * rowsY; /*Q0*/
1417 0 : ( *Zp_fx ) = ( *Zp_fx ) + Mpy_32_32( X_fx[x_idx], Y_fx[y_idx] ); /*Qx + Qy - 31 L_sat_add() */
1418 : /* TODO: overflow of Z_fx to be checked */
1419 0 : move32();
1420 : }
1421 0 : Zp_fx++;
1422 : }
1423 : }
1424 : }
1425 :
1426 193551 : return EXIT_SUCCESS;
1427 : }
1428 :
1429 352160 : Word16 matrix_product_q30_fx(
1430 : const Word32 *X_fx, /* i : left hand matrix Q31*/
1431 : const Word16 rowsX, /* i : number of rows of the left hand matrix Q0*/
1432 : const Word16 colsX, /* i : number of columns of the left hand matrix Q0*/
1433 : const Word16 transpX, /* i : flag indicating the transposition of the left hand matrix prior to the multiplication Q0*/
1434 : const Word32 *Y_fx, /* i : right hand matrix Q25*/
1435 : const Word16 rowsY, /* i : number of rows of the right hand matrix Q0*/
1436 : const Word16 colsY, /* i : number of columns of the right hand matrix Q0*/
1437 : const Word16 transpY, /* i : flag indicating the transposition of the right hand matrix prior to the multiplication Q0*/
1438 : Word32 *Z_fx /* o : resulting matrix after the matrix multiplication Q30*/
1439 : )
1440 : {
1441 : Word16 i, j, k;
1442 : Word16 x_idx, y_idx;
1443 352160 : Word32 *Zp_fx = Z_fx;
1444 : int64_t W_tmp;
1445 :
1446 : /* Processing */
1447 352160 : test();
1448 352160 : test();
1449 352160 : test();
1450 352160 : if ( transpX == 1 && transpY == 0 ) /* We use X transpose */
1451 : {
1452 158609 : if ( rowsX != rowsY )
1453 : {
1454 0 : return EXIT_FAILURE;
1455 : }
1456 317218 : for ( j = 0; j < colsY; ++j )
1457 : {
1458 1657268 : for ( i = 0; i < colsX; ++i )
1459 : {
1460 1498659 : W_tmp = 0;
1461 17478586 : for ( k = 0; k < rowsX; ++k )
1462 : {
1463 : /*( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[k + j * rowsY] ) ); */
1464 15979927 : x_idx = k + i * rowsX; /* Q0 */
1465 15979927 : y_idx = k + j * rowsY; /* Q0 */
1466 15979927 : W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */
1467 : }
1468 1498659 : W_tmp = W_tmp * 64; /* W_shl( W_tmp, 6 ); */ /*Q62*/
1469 1498659 : ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /* W_round64_L( W_tmp ); */ /*Q30*/
1470 1498659 : Zp_fx++;
1471 : }
1472 : }
1473 : }
1474 193551 : else if ( transpX == 0 && transpY == 1 ) /* We use Y transpose */
1475 : {
1476 0 : if ( colsX != colsY )
1477 : {
1478 0 : return EXIT_FAILURE;
1479 : }
1480 0 : for ( j = 0; j < rowsY; ++j )
1481 : {
1482 0 : for ( i = 0; i < rowsX; ++i )
1483 : {
1484 0 : W_tmp = 0;
1485 0 : for ( k = 0; k < colsX; ++k )
1486 : {
1487 : /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[j + k * rowsY] ) ); */
1488 0 : x_idx = i + k * rowsX; /*Q0*/
1489 0 : y_idx = j + k * rowsY; /*Q0*/
1490 0 : W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56 */
1491 : }
1492 0 : W_tmp = W_tmp * 64; /*Q62*/
1493 0 : ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/
1494 0 : Zp_fx++;
1495 : }
1496 : }
1497 : }
1498 193551 : else if ( transpX == 1 && transpY == 1 ) /* We use both transpose */
1499 : {
1500 0 : if ( rowsX != colsY )
1501 : {
1502 0 : return EXIT_FAILURE;
1503 : }
1504 0 : for ( j = 0; j < rowsY; ++j )
1505 : {
1506 0 : for ( i = 0; i < colsX; ++i )
1507 : {
1508 0 : W_tmp = 0;
1509 0 : for ( k = 0; k < colsX; ++k )
1510 : {
1511 : /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[k + i * rowsX], Y_fx[j + k * rowsY] ) ); */
1512 0 : x_idx = k + i * rowsX; /*Q0*/
1513 0 : y_idx = j + k * rowsY; /*Q0*/
1514 0 : W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/
1515 : }
1516 0 : W_tmp = W_tmp * 64; /*Q62*/
1517 0 : ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/
1518 :
1519 0 : Zp_fx++;
1520 : }
1521 : }
1522 : }
1523 : else /* Regular case */
1524 : {
1525 193551 : if ( colsX != rowsY )
1526 : {
1527 0 : return EXIT_FAILURE;
1528 : }
1529 :
1530 967755 : for ( j = 0; j < colsY; ++j )
1531 : {
1532 4645224 : for ( i = 0; i < rowsX; ++i )
1533 : {
1534 3871020 : W_tmp = 0;
1535 :
1536 19355100 : for ( k = 0; k < colsX; ++k )
1537 : {
1538 : /* ( *Zp_fx ) = L_add( *Zp_fx, Mpy_32_32( X_fx[i + k * rowsX], Y_fx[k + j * rowsY] ) ); */
1539 15484080 : x_idx = i + k * rowsX; /*Q0*/
1540 15484080 : y_idx = k + j * rowsY; /*Q0*/
1541 15484080 : W_tmp += ( (int64_t) X_fx[x_idx] * (int64_t) Y_fx[y_idx] ); /* Q56*/
1542 : }
1543 3871020 : W_tmp = W_tmp * 64; /*Q62*/
1544 3871020 : ( *Zp_fx ) = ( W_tmp + 0x80000000 ) >> 32; /*Q30*/
1545 :
1546 3871020 : Zp_fx++;
1547 : }
1548 : }
1549 : }
1550 :
1551 352160 : return EXIT_SUCCESS;
1552 : }
|