Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include <assert.h>
40 : #include "prot.h"
41 : #include "ivas_cnst.h"
42 : #include "wmc_auto.h"
43 :
44 : #if __STDC_VERSION__ >= 199901L
45 : #if defined __ICL
46 : #define restrict __restrict
47 : #endif
48 : #else
49 : #define restrict
50 : #endif
51 :
52 :
53 : static void fft8( float *vec );
54 : static void fft10( float *vec );
55 : static void fft16( float *vec );
56 : static void fft20( float *vec );
57 : static void fft30( float *vec );
58 : static void fft5s( float *x, const int16_t stride );
59 :
60 :
61 : #define COS_PI_DIV8 9.238795325112867e-1f
62 : #define COS_3PI_DIV8 3.826834323650898e-1f
63 : #define SQRT2PLUS1 2.414213562373095f
64 : #define SQRT2MINUS1 4.142135623730952e-1f
65 :
66 :
67 : /*******************************************************************************
68 : Functionname: fft8
69 : *******************************************************************************
70 :
71 : Description: 8-point FFT. Complex-valued input takes 52 real additions
72 : and 4 real multiplications.
73 :
74 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
75 :
76 : Return: none
77 :
78 : *******************************************************************************/
79 116897328 : static void fft8( float *restrict vec )
80 : {
81 : float temp1[16];
82 : float temp2[16];
83 :
84 :
85 : /* Pre-additions */
86 116897328 : temp1[0] = vec[0] + vec[8];
87 116897328 : temp1[2] = vec[0] - vec[8];
88 116897328 : temp1[1] = vec[1] + vec[9];
89 116897328 : temp1[3] = vec[1] - vec[9];
90 116897328 : temp1[4] = vec[2] + vec[10];
91 116897328 : temp1[6] = vec[2] - vec[10];
92 116897328 : temp1[5] = vec[3] + vec[11];
93 116897328 : temp1[7] = vec[3] - vec[11];
94 116897328 : temp1[8] = vec[4] + vec[12];
95 116897328 : temp1[10] = vec[4] - vec[12];
96 116897328 : temp1[9] = vec[5] + vec[13];
97 116897328 : temp1[11] = vec[5] - vec[13];
98 116897328 : temp1[12] = vec[6] + vec[14];
99 116897328 : temp1[14] = vec[6] - vec[14];
100 116897328 : temp1[13] = vec[7] + vec[15];
101 116897328 : temp1[15] = vec[7] - vec[15];
102 :
103 : /* Pre-additions and core multiplications */
104 116897328 : temp2[0] = temp1[0] + temp1[8];
105 116897328 : temp2[4] = temp1[0] - temp1[8];
106 116897328 : temp2[1] = temp1[1] + temp1[9];
107 116897328 : temp2[5] = temp1[1] - temp1[9];
108 116897328 : temp2[8] = temp1[2] - temp1[11];
109 116897328 : temp2[10] = temp1[2] + temp1[11];
110 116897328 : temp2[9] = temp1[3] + temp1[10];
111 116897328 : temp2[11] = temp1[3] - temp1[10];
112 116897328 : temp2[2] = temp1[4] + temp1[12];
113 116897328 : temp2[7] = temp1[4] - temp1[12];
114 116897328 : temp2[3] = temp1[5] + temp1[13];
115 116897328 : temp2[6] = temp1[13] - temp1[5];
116 :
117 116897328 : temp1[1] = temp1[6] + temp1[14];
118 116897328 : temp1[2] = temp1[6] - temp1[14];
119 116897328 : temp1[0] = temp1[7] + temp1[15];
120 116897328 : temp1[3] = temp1[7] - temp1[15];
121 :
122 116897328 : temp2[12] = ( temp1[0] + temp1[2] ) * INV_SQRT2;
123 116897328 : temp2[14] = ( temp1[0] - temp1[2] ) * INV_SQRT2;
124 116897328 : temp2[13] = ( temp1[3] - temp1[1] ) * INV_SQRT2;
125 116897328 : temp2[15] = ( temp1[1] + temp1[3] ) * -INV_SQRT2;
126 :
127 : /* Post-additions */
128 116897328 : vec[0] = temp2[0] + temp2[2];
129 116897328 : vec[8] = temp2[0] - temp2[2];
130 116897328 : vec[1] = temp2[1] + temp2[3];
131 116897328 : vec[9] = temp2[1] - temp2[3];
132 116897328 : vec[4] = temp2[4] - temp2[6];
133 116897328 : vec[12] = temp2[4] + temp2[6];
134 116897328 : vec[5] = temp2[5] - temp2[7];
135 116897328 : vec[13] = temp2[5] + temp2[7];
136 116897328 : vec[6] = temp2[8] + temp2[14];
137 116897328 : vec[14] = temp2[8] - temp2[14];
138 116897328 : vec[7] = temp2[9] + temp2[15];
139 116897328 : vec[15] = temp2[9] - temp2[15];
140 116897328 : vec[2] = temp2[10] + temp2[12];
141 116897328 : vec[10] = temp2[10] - temp2[12];
142 116897328 : vec[3] = temp2[11] + temp2[13];
143 116897328 : vec[11] = temp2[11] - temp2[13];
144 :
145 116897328 : return;
146 : }
147 :
148 :
149 : /*******************************************************************************
150 : Functionname: fft16
151 : *******************************************************************************
152 :
153 : Description: 16-point FFT. Complex-valued input takes 144 real additions and
154 : 24 real multiplications.
155 :
156 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
157 :
158 : Return: none
159 :
160 : *******************************************************************************/
161 : /* fast implementation, completely unrolled and inlined */
162 206504 : static void fft16( float *restrict vec )
163 : {
164 : float temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17,
165 : temp18, temp19, temp110, temp111, temp112, temp113, temp114, temp115;
166 : float temp20, temp21, temp22, temp23, temp24, temp25, temp26, temp27,
167 : temp28, temp29, temp210, temp211, temp212, temp213, temp214, temp215;
168 : float vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
169 : vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
170 :
171 :
172 : /* even */
173 206504 : vec0 = vec[0] + vec[16];
174 206504 : vec1 = vec[1] + vec[17];
175 206504 : vec2 = vec[2] + vec[18];
176 206504 : vec3 = vec[3] + vec[19];
177 206504 : vec4 = vec[4] + vec[20];
178 206504 : vec5 = vec[5] + vec[21];
179 206504 : vec6 = vec[6] + vec[22];
180 206504 : vec7 = vec[7] + vec[23];
181 206504 : vec8 = vec[8] + vec[24];
182 206504 : vec9 = vec[9] + vec[25];
183 206504 : vec10 = vec[10] + vec[26];
184 206504 : vec11 = vec[11] + vec[27];
185 206504 : vec12 = vec[12] + vec[28];
186 206504 : vec13 = vec[13] + vec[29];
187 206504 : vec14 = vec[14] + vec[30];
188 206504 : vec15 = vec[15] + vec[31];
189 :
190 : /* Pre-additions */
191 206504 : temp10 = vec0 + vec8;
192 206504 : temp12 = vec0 - vec8;
193 206504 : temp11 = vec1 + vec9;
194 206504 : temp13 = vec1 - vec9;
195 206504 : temp14 = vec2 + vec10;
196 206504 : temp16 = vec2 - vec10;
197 206504 : temp15 = vec3 + vec11;
198 206504 : temp17 = vec3 - vec11;
199 206504 : temp18 = vec4 + vec12;
200 206504 : temp110 = vec4 - vec12;
201 206504 : temp19 = vec5 + vec13;
202 206504 : temp111 = vec5 - vec13;
203 206504 : temp112 = vec6 + vec14;
204 206504 : temp114 = vec6 - vec14;
205 206504 : temp113 = vec7 + vec15;
206 206504 : temp115 = vec7 - vec15;
207 :
208 : /* Pre-additions and core multiplications */
209 206504 : temp20 = temp10 + temp18;
210 206504 : temp24 = temp10 - temp18;
211 206504 : temp21 = temp11 + temp19;
212 206504 : temp25 = temp11 - temp19;
213 206504 : temp28 = temp12 - temp111;
214 206504 : temp210 = temp12 + temp111;
215 206504 : temp29 = temp13 + temp110;
216 206504 : temp211 = temp13 - temp110;
217 206504 : temp22 = temp14 + temp112;
218 206504 : temp27 = temp14 - temp112;
219 206504 : temp23 = temp15 + temp113;
220 206504 : temp26 = temp113 - temp15;
221 :
222 206504 : temp11 = temp16 + temp114;
223 206504 : temp12 = temp16 - temp114;
224 206504 : temp10 = temp17 + temp115;
225 206504 : temp13 = temp17 - temp115;
226 :
227 206504 : temp212 = ( temp10 + temp12 ) * INV_SQRT2;
228 206504 : temp214 = ( temp10 - temp12 ) * INV_SQRT2;
229 206504 : temp213 = ( temp13 - temp11 ) * INV_SQRT2;
230 206504 : temp215 = ( temp11 + temp13 ) * -INV_SQRT2;
231 :
232 :
233 : /* odd */
234 206504 : vec0 = vec[0] - vec[16];
235 206504 : vec1 = vec[1] - vec[17];
236 206504 : vec2 = vec[2] - vec[18];
237 206504 : vec3 = vec[3] - vec[19];
238 206504 : vec4 = vec[4] - vec[20];
239 206504 : vec5 = vec[5] - vec[21];
240 206504 : vec6 = vec[6] - vec[22];
241 206504 : vec7 = vec[7] - vec[23];
242 206504 : vec8 = vec[8] - vec[24];
243 206504 : vec9 = vec[9] - vec[25];
244 206504 : vec10 = vec[10] - vec[26];
245 206504 : vec11 = vec[11] - vec[27];
246 206504 : vec12 = vec[12] - vec[28];
247 206504 : vec13 = vec[13] - vec[29];
248 206504 : vec14 = vec[14] - vec[30];
249 206504 : vec15 = vec[15] - vec[31];
250 :
251 : /* Pre-additions and core multiplications */
252 206504 : temp19 = ( vec2 + vec14 ) * -COS_3PI_DIV8;
253 206504 : temp110 = ( vec2 - vec14 ) * COS_PI_DIV8;
254 206504 : temp18 = ( vec3 + vec15 ) * COS_3PI_DIV8;
255 206504 : temp111 = ( vec3 - vec15 ) * COS_PI_DIV8;
256 206504 : temp15 = ( vec4 + vec12 ) * -INV_SQRT2;
257 206504 : temp16 = ( vec4 - vec12 ) * INV_SQRT2;
258 206504 : temp14 = ( vec5 + vec13 ) * INV_SQRT2;
259 206504 : temp17 = ( vec5 - vec13 ) * INV_SQRT2;
260 206504 : temp113 = ( vec6 + vec10 ) * -COS_PI_DIV8;
261 206504 : temp114 = ( vec6 - vec10 ) * COS_3PI_DIV8;
262 206504 : temp112 = ( vec7 + vec11 ) * COS_PI_DIV8;
263 206504 : temp115 = ( vec7 - vec11 ) * COS_3PI_DIV8;
264 :
265 : /* Core multiplications */
266 206504 : vec2 = temp18 * SQRT2PLUS1 - temp112 * SQRT2MINUS1;
267 206504 : vec3 = temp19 * SQRT2PLUS1 - temp113 * SQRT2MINUS1;
268 206504 : vec4 = temp110 * SQRT2MINUS1 - temp114 * SQRT2PLUS1;
269 206504 : vec5 = temp111 * SQRT2MINUS1 - temp115 * SQRT2PLUS1;
270 :
271 : /* Post-additions */
272 206504 : temp18 += temp112;
273 206504 : temp19 += temp113;
274 206504 : temp110 += temp114;
275 206504 : temp111 += temp115;
276 :
277 206504 : vec6 = vec0 + temp14;
278 206504 : vec10 = vec0 - temp14;
279 206504 : vec7 = vec1 + temp15;
280 206504 : vec11 = vec1 - temp15;
281 :
282 206504 : vec12 = temp16 - vec9;
283 206504 : vec14 = temp16 + vec9;
284 206504 : vec13 = vec8 + temp17;
285 206504 : vec15 = vec8 - temp17;
286 :
287 206504 : temp10 = vec6 - vec14;
288 206504 : temp12 = vec6 + vec14;
289 206504 : temp11 = vec7 + vec15;
290 206504 : temp13 = vec7 - vec15;
291 206504 : temp14 = vec10 + vec12;
292 206504 : temp16 = vec10 - vec12;
293 206504 : temp15 = vec11 + vec13;
294 206504 : temp17 = vec11 - vec13;
295 :
296 206504 : vec10 = temp18 + temp110;
297 206504 : temp110 = temp18 - temp110;
298 206504 : vec11 = temp19 + temp111;
299 206504 : temp111 = temp19 - temp111;
300 :
301 206504 : temp112 = vec2 + vec4;
302 206504 : temp114 = vec2 - vec4;
303 206504 : temp113 = vec3 + vec5;
304 206504 : temp115 = vec3 - vec5;
305 :
306 :
307 : /* Post-additions */
308 206504 : *vec++ = temp20 + temp22;
309 206504 : *vec++ = temp21 + temp23;
310 206504 : *vec++ = temp12 + vec10;
311 206504 : *vec++ = temp13 + vec11;
312 206504 : *vec++ = temp210 + temp212;
313 206504 : *vec++ = temp211 + temp213;
314 206504 : *vec++ = temp10 + temp112;
315 206504 : *vec++ = temp11 + temp113;
316 206504 : *vec++ = temp24 - temp26;
317 206504 : *vec++ = temp25 - temp27;
318 206504 : *vec++ = temp16 + temp114;
319 206504 : *vec++ = temp17 + temp115;
320 206504 : *vec++ = temp28 + temp214;
321 206504 : *vec++ = temp29 + temp215;
322 206504 : *vec++ = temp14 + temp110;
323 206504 : *vec++ = temp15 + temp111;
324 206504 : *vec++ = temp20 - temp22;
325 206504 : *vec++ = temp21 - temp23;
326 206504 : *vec++ = temp12 - vec10;
327 206504 : *vec++ = temp13 - vec11;
328 206504 : *vec++ = temp210 - temp212;
329 206504 : *vec++ = temp211 - temp213;
330 206504 : *vec++ = temp10 - temp112;
331 206504 : *vec++ = temp11 - temp113;
332 206504 : *vec++ = temp24 + temp26;
333 206504 : *vec++ = temp25 + temp27;
334 206504 : *vec++ = temp16 - temp114;
335 206504 : *vec++ = temp17 - temp115;
336 206504 : *vec++ = temp28 - temp214;
337 206504 : *vec++ = temp29 - temp215;
338 206504 : *vec++ = temp14 - temp110;
339 206504 : *vec++ = temp15 - temp111;
340 :
341 206504 : return;
342 : }
343 :
344 :
345 : /*******************************************************************************
346 : Functionname: fft15
347 : *******************************************************************************
348 :
349 : Description: 15-point FFT. Complex-valued input takes 176 real additions
350 : and 34 real multiplications.
351 :
352 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
353 :
354 : Return: none
355 :
356 : *******************************************************************************/
357 2749403376 : static void fft15( float *restrict vec )
358 : {
359 :
360 : float r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17;
361 : float i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17;
362 : float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9,
363 : tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18, tmp19,
364 : tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp28, tmp29;
365 :
366 :
367 : /* Pre-additions real part */
368 2749403376 : r1 = vec[2] + vec[8];
369 2749403376 : r2 = vec[2] - vec[8];
370 2749403376 : r3 = vec[4] + vec[16];
371 2749403376 : r4 = vec[4] - vec[16];
372 2749403376 : r5 = vec[6] + vec[24];
373 2749403376 : r6 = vec[6] - vec[24];
374 2749403376 : r7 = vec[10] + vec[20];
375 2749403376 : r8 = vec[10] - vec[20];
376 2749403376 : r9 = vec[12] + vec[18];
377 2749403376 : r10 = vec[12] - vec[18];
378 2749403376 : r11 = vec[14] + vec[26];
379 2749403376 : r12 = vec[14] - vec[26];
380 2749403376 : r13 = vec[22] + vec[28];
381 2749403376 : r14 = vec[22] - vec[28];
382 :
383 2749403376 : tmp2 = r1 + r3;
384 2749403376 : tmp4 = r1 - r3;
385 2749403376 : tmp6 = r2 + r14;
386 2749403376 : tmp8 = r2 - r14;
387 2749403376 : tmp10 = r4 + r12;
388 2749403376 : tmp12 = r4 - r12;
389 2749403376 : tmp14 = r5 + r9;
390 2749403376 : tmp16 = r5 - r9;
391 2749403376 : tmp18 = r11 + r13;
392 2749403376 : tmp20 = r11 - r13;
393 :
394 : /* Pre-additions imaginary part */
395 2749403376 : i1 = vec[3] + vec[9];
396 2749403376 : i2 = vec[3] - vec[9];
397 2749403376 : i3 = vec[5] + vec[17];
398 2749403376 : i4 = vec[5] - vec[17];
399 2749403376 : i5 = vec[7] + vec[25];
400 2749403376 : i6 = vec[7] - vec[25];
401 2749403376 : i7 = vec[11] + vec[21];
402 2749403376 : i8 = vec[11] - vec[21];
403 2749403376 : i9 = vec[13] + vec[19];
404 2749403376 : i10 = vec[13] - vec[19];
405 2749403376 : i11 = vec[15] + vec[27];
406 2749403376 : i12 = vec[15] - vec[27];
407 2749403376 : i13 = vec[23] + vec[29];
408 2749403376 : i14 = vec[23] - vec[29];
409 :
410 2749403376 : tmp3 = i1 + i3;
411 2749403376 : tmp5 = i1 - i3;
412 2749403376 : tmp7 = i2 + i14;
413 2749403376 : tmp9 = i2 - i14;
414 2749403376 : tmp11 = i4 + i12;
415 2749403376 : tmp13 = i4 - i12;
416 2749403376 : tmp15 = i5 + i9;
417 2749403376 : tmp17 = i5 - i9;
418 2749403376 : tmp19 = i11 + i13;
419 2749403376 : tmp21 = i11 - i13;
420 :
421 :
422 : /* Pre-additions and core multiplications */
423 2749403376 : tmp28 = tmp4 + tmp20;
424 2749403376 : tmp29 = tmp5 + tmp21;
425 2749403376 : r4 = tmp2 + tmp18;
426 2749403376 : i4 = tmp3 + tmp19;
427 2749403376 : r3 = ( r4 + tmp14 ) * -1.25f;
428 2749403376 : i3 = ( i4 + tmp15 ) * -1.25f;
429 2749403376 : r2 = ( tmp29 - i8 ) * -8.660254037844387e-1f;
430 2749403376 : i2 = ( tmp28 - r8 ) * 8.660254037844387e-1f;
431 2749403376 : r1 = r4 + r7;
432 2749403376 : i1 = i4 + i7;
433 2749403376 : r0 = r1 + vec[0] + tmp14;
434 2749403376 : i0 = i1 + vec[1] + tmp15;
435 2749403376 : r7 = tmp4 - tmp20;
436 2749403376 : i7 = tmp5 - tmp21;
437 2749403376 : r8 = ( tmp3 - tmp19 ) * -4.841229182759272e-1f;
438 2749403376 : i8 = ( tmp2 - tmp18 ) * 4.841229182759272e-1f;
439 2749403376 : tmp0 = tmp6 + r10;
440 2749403376 : tmp1 = tmp7 + i10;
441 2749403376 : tmp2 = r6 - tmp10;
442 2749403376 : tmp3 = i6 - tmp11;
443 2749403376 : r10 = tmp7 * -2.308262652881440f;
444 2749403376 : i10 = tmp6 * 2.308262652881440f;
445 2749403376 : r11 = tmp8 * 1.332676064001459f;
446 2749403376 : i11 = tmp9 * 1.332676064001459f;
447 2749403376 : r6 = ( r7 - tmp16 ) * 5.590169943749475e-1f;
448 2749403376 : i6 = ( i7 - tmp17 ) * 5.590169943749475e-1f;
449 2749403376 : r12 = ( tmp1 + tmp3 ) * 5.877852522924733e-1f;
450 2749403376 : i12 = ( tmp0 + tmp2 ) * -5.877852522924733e-1f;
451 2749403376 : r13 = ( tmp7 - tmp11 ) * -8.816778784387098e-1f;
452 2749403376 : i13 = ( tmp6 - tmp10 ) * 8.816778784387098e-1f;
453 2749403376 : r14 = ( tmp8 + tmp12 ) * 5.090369604551274e-1f;
454 2749403376 : i14 = ( tmp9 + tmp13 ) * 5.090369604551274e-1f;
455 2749403376 : r16 = tmp11 * 5.449068960040204e-1f;
456 2749403376 : i16 = tmp10 * -5.449068960040204e-1f;
457 2749403376 : r17 = tmp12 * 3.146021430912046e-1f;
458 2749403376 : i17 = tmp13 * 3.146021430912046e-1f;
459 :
460 2749403376 : r4 *= 1.875f;
461 2749403376 : i4 *= 1.875f;
462 2749403376 : r1 *= -1.5f;
463 2749403376 : i1 *= -1.5f;
464 2749403376 : r7 *= -8.385254915624212e-1f;
465 2749403376 : i7 *= -8.385254915624212e-1f;
466 2749403376 : r5 = tmp29 * 1.082531754730548f;
467 2749403376 : i5 = tmp28 * -1.082531754730548f;
468 2749403376 : r9 = tmp1 * 1.538841768587627f;
469 2749403376 : i9 = tmp0 * -1.538841768587627f;
470 2749403376 : r15 = tmp3 * 3.632712640026803e-1f;
471 2749403376 : i15 = tmp2 * -3.632712640026803e-1f;
472 :
473 :
474 : /* Post-additions real part */
475 2749403376 : tmp2 = r0 + r1;
476 2749403376 : tmp4 = r3 + r6;
477 2749403376 : tmp6 = r3 - r6;
478 2749403376 : tmp8 = r4 + r5;
479 2749403376 : tmp10 = r4 - r5;
480 2749403376 : tmp12 = r7 + r8;
481 2749403376 : tmp14 = r7 - r8;
482 2749403376 : tmp16 = r13 + r16;
483 2749403376 : tmp18 = r14 + r17;
484 2749403376 : tmp20 = r10 - r13;
485 2749403376 : tmp22 = r11 - r14;
486 2749403376 : tmp24 = r12 + r15;
487 2749403376 : tmp26 = r12 - r9;
488 :
489 2749403376 : r1 = tmp2 + r2;
490 2749403376 : r2 = tmp2 - r2;
491 2749403376 : r3 = tmp4 + tmp26;
492 2749403376 : r4 = tmp4 - tmp26;
493 2749403376 : r5 = tmp6 + tmp24;
494 2749403376 : r6 = tmp6 - tmp24;
495 2749403376 : r7 = tmp16 + tmp18;
496 2749403376 : r8 = tmp16 - tmp18;
497 2749403376 : r9 = tmp20 - tmp22;
498 2749403376 : r10 = tmp20 + tmp22;
499 2749403376 : r11 = r1 + tmp8;
500 2749403376 : r12 = r2 + tmp10;
501 2749403376 : r13 = r11 - tmp12;
502 2749403376 : r14 = r12 - tmp14;
503 2749403376 : r15 = r12 + tmp14;
504 2749403376 : r16 = r11 + tmp12;
505 :
506 : /* Post-additions imaginary part */
507 2749403376 : tmp3 = i0 + i1;
508 2749403376 : tmp5 = i3 + i6;
509 2749403376 : tmp7 = i3 - i6;
510 2749403376 : tmp9 = i4 + i5;
511 2749403376 : tmp11 = i4 - i5;
512 2749403376 : tmp13 = i7 + i8;
513 2749403376 : tmp15 = i7 - i8;
514 2749403376 : tmp17 = i13 + i16;
515 2749403376 : tmp19 = i14 + i17;
516 2749403376 : tmp21 = i10 - i13;
517 2749403376 : tmp23 = i11 - i14;
518 2749403376 : tmp25 = i12 + i15;
519 2749403376 : tmp27 = i12 - i9;
520 :
521 2749403376 : i1 = tmp3 + i2;
522 2749403376 : i2 = tmp3 - i2;
523 2749403376 : i3 = tmp5 + tmp27;
524 2749403376 : i4 = tmp5 - tmp27;
525 2749403376 : i5 = tmp7 + tmp25;
526 2749403376 : i6 = tmp7 - tmp25;
527 2749403376 : i7 = tmp17 + tmp19;
528 2749403376 : i8 = tmp17 - tmp19;
529 2749403376 : i9 = tmp21 - tmp23;
530 2749403376 : i10 = tmp21 + tmp23;
531 2749403376 : i11 = i1 + tmp9;
532 2749403376 : i12 = i2 + tmp11;
533 2749403376 : i13 = i11 - tmp13;
534 2749403376 : i14 = i12 - tmp15;
535 2749403376 : i15 = i12 + tmp15;
536 2749403376 : i16 = i11 + tmp13;
537 :
538 2749403376 : *vec++ = r0;
539 2749403376 : *vec++ = i0;
540 2749403376 : *vec++ = r13 + r5 + r7;
541 2749403376 : *vec++ = i13 + i5 + i7;
542 2749403376 : *vec++ = r15 + r3 - r9;
543 2749403376 : *vec++ = i15 + i3 - i9;
544 2749403376 : *vec++ = r0 + r4;
545 2749403376 : *vec++ = i0 + i4;
546 2749403376 : *vec++ = r13 + r6 - r7;
547 2749403376 : *vec++ = i13 + i6 - i7;
548 2749403376 : *vec++ = r2;
549 2749403376 : *vec++ = i2;
550 2749403376 : *vec++ = r0 + r5;
551 2749403376 : *vec++ = i0 + i5;
552 2749403376 : *vec++ = r16 + r3 - r10;
553 2749403376 : *vec++ = i16 + i3 - i10;
554 2749403376 : *vec++ = r15 + r4 + r9;
555 2749403376 : *vec++ = i15 + i4 + i9;
556 2749403376 : *vec++ = r0 + r6;
557 2749403376 : *vec++ = i0 + i6;
558 2749403376 : *vec++ = r1;
559 2749403376 : *vec++ = i1;
560 2749403376 : *vec++ = r14 + r5 + r8;
561 2749403376 : *vec++ = i14 + i5 + i8;
562 2749403376 : *vec++ = r0 + r3;
563 2749403376 : *vec++ = i0 + i3;
564 2749403376 : *vec++ = r16 + r4 + r10;
565 2749403376 : *vec++ = i16 + i4 + i10;
566 2749403376 : *vec++ = r14 + r6 - r8;
567 2749403376 : *vec++ = i14 + i6 - i8;
568 :
569 2749403376 : return;
570 : }
571 :
572 : /*******************************************************************************
573 : Functionname: fft5s
574 : *******************************************************************************
575 :
576 : Description: 5-point FFT.
577 :
578 : Arguments: x - pointer to input data (interleaved real / imaginary parts)
579 : stride - stride for input data
580 :
581 : Return: none
582 :
583 : *******************************************************************************/
584 : static const float C51 = 0.9510565162951535f;
585 : static const float C52 = -1.5388417685876270f;
586 : static const float C53 = -0.3632712640026803f;
587 : static const float C54 = 0.5590169943749475f;
588 : static const float C55 = -1.25f;
589 :
590 533276 : static void fft5s( float *x, const int16_t stride )
591 : {
592 : float r1, r2, r3, r4;
593 : float s1, s2, s3, s4;
594 : float t;
595 : /* real part */
596 533276 : r1 = x[1 * stride] + x[4 * stride];
597 533276 : r4 = x[1 * stride] - x[4 * stride];
598 533276 : r3 = x[2 * stride] + x[3 * stride];
599 533276 : r2 = x[2 * stride] - x[3 * stride];
600 533276 : t = ( r1 - r3 ) * C54;
601 533276 : r1 = r1 + r3;
602 533276 : x[0] = x[0] + r1;
603 533276 : r1 = x[0] + ( r1 * C55 );
604 533276 : r3 = r1 - t;
605 533276 : r1 = r1 + t;
606 533276 : t = ( r4 + r2 ) * C51;
607 533276 : r4 = t + ( r4 * C52 );
608 533276 : r2 = t + ( r2 * C53 );
609 :
610 : /* imaginary part */
611 533276 : s1 = x[1 * stride + 1] + x[4 * stride + 1];
612 533276 : s4 = x[1 * stride + 1] - x[4 * stride + 1];
613 533276 : s3 = x[2 * stride + 1] + x[3 * stride + 1];
614 533276 : s2 = x[2 * stride + 1] - x[3 * stride + 1];
615 533276 : t = ( s1 - s3 ) * C54;
616 533276 : s1 = s1 + s3;
617 533276 : x[1] = x[1] + s1;
618 533276 : s1 = x[1] + ( s1 * C55 );
619 533276 : s3 = s1 - t;
620 533276 : s1 = s1 + t;
621 533276 : t = ( s4 + s2 ) * C51;
622 533276 : s4 = t + ( s4 * C52 );
623 533276 : s2 = t + ( s2 * C53 );
624 :
625 : /* combination */
626 533276 : x[1 * stride] = r1 + s2;
627 533276 : x[4 * stride] = r1 - s2;
628 533276 : x[2 * stride] = r3 - s4;
629 533276 : x[3 * stride] = r3 + s4;
630 :
631 533276 : x[1 * stride + 1] = s1 - r2;
632 533276 : x[4 * stride + 1] = s1 + r2;
633 533276 : x[2 * stride + 1] = s3 + r4;
634 533276 : x[3 * stride + 1] = s3 - r4;
635 533276 : }
636 :
637 :
638 : /**
639 : * \brief Function performs a complex 10-point FFT
640 : * The FFT is performed inplace. The result of the FFT
641 : * is scaled by SCALEFACTOR10 bits.
642 : *
643 : * WOPS FLC version: 1093 cycles
644 : * WOPS with 32x16 bit multiplications: 196 cycles
645 : *
646 : * \param [i/o] re real input / output
647 : * \param [i/o] im imag input / output
648 : * \param [i ] s stride real and imag input / output
649 : *
650 : * \return void
651 : */
652 739233110 : static void fft10( float *restrict vec )
653 : {
654 : float t;
655 : float r1, r2, r3, r4;
656 : float s1, s2, s3, s4;
657 : float y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
658 : float y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
659 :
660 : /* 2 fft5 stages */
661 :
662 : /* real part */
663 739233110 : r1 = vec[12] + vec[8];
664 739233110 : r4 = vec[12] - vec[8];
665 739233110 : r3 = vec[4] + vec[16];
666 739233110 : r2 = vec[4] - vec[16];
667 739233110 : t = ( r1 - r3 ) * C54;
668 739233110 : r1 = r1 + r3;
669 739233110 : y00 = vec[0] + r1;
670 739233110 : r1 = y00 + ( r1 * C55 );
671 739233110 : r3 = r1 - t;
672 739233110 : r1 = r1 + t;
673 739233110 : t = ( r4 + r2 ) * C51;
674 739233110 : r4 = t + ( r4 * C52 );
675 739233110 : r2 = t + ( r2 * C53 );
676 :
677 : /* imaginary part */
678 739233110 : s1 = vec[13] + vec[9];
679 739233110 : s4 = vec[13] - vec[9];
680 739233110 : s3 = vec[5] + vec[17];
681 739233110 : s2 = vec[5] - vec[17];
682 739233110 : t = ( s1 - s3 ) * C54;
683 739233110 : s1 = s1 + s3;
684 739233110 : y01 = vec[1] + s1;
685 739233110 : s1 = y01 + ( s1 * C55 );
686 739233110 : s3 = s1 - t;
687 739233110 : s1 = s1 + t;
688 739233110 : t = ( s4 + s2 ) * C51;
689 739233110 : s4 = t + ( s4 * C52 );
690 739233110 : s2 = t + ( s2 * C53 );
691 :
692 : /* combination */
693 739233110 : y04 = r1 + s2;
694 739233110 : y16 = r1 - s2;
695 739233110 : y08 = r3 - s4;
696 739233110 : y12 = r3 + s4;
697 739233110 : y05 = s1 - r2;
698 739233110 : y17 = s1 + r2;
699 739233110 : y09 = s3 + r4;
700 739233110 : y13 = s3 - r4;
701 :
702 : /* real part */
703 739233110 : r1 = vec[2] + vec[18];
704 739233110 : r4 = vec[2] - vec[18];
705 739233110 : r3 = vec[14] + vec[6];
706 739233110 : r2 = vec[14] - vec[6];
707 739233110 : t = ( r1 - r3 ) * C54;
708 739233110 : r1 = r1 + r3;
709 739233110 : y02 = vec[10] + r1;
710 739233110 : r1 = y02 + ( r1 * C55 );
711 739233110 : r3 = r1 - t;
712 739233110 : r1 = r1 + t;
713 739233110 : t = ( r4 + r2 ) * C51;
714 739233110 : r4 = t + ( r4 * C52 );
715 739233110 : r2 = t + ( r2 * C53 );
716 :
717 : /* imaginary part */
718 739233110 : s1 = vec[3] + vec[19];
719 739233110 : s4 = vec[3] - vec[19];
720 739233110 : s3 = vec[15] + vec[7];
721 739233110 : s2 = vec[15] - vec[7];
722 739233110 : t = ( s1 - s3 ) * C54;
723 739233110 : s1 = s1 + s3;
724 739233110 : y03 = vec[11] + s1;
725 739233110 : s1 = y03 + ( s1 * C55 );
726 739233110 : s3 = s1 - t;
727 739233110 : s1 = s1 + t;
728 739233110 : t = ( s4 + s2 ) * C51;
729 739233110 : s4 = t + ( s4 * C52 );
730 739233110 : s2 = t + ( s2 * C53 );
731 :
732 : /* combination */
733 739233110 : y06 = r1 + s2;
734 739233110 : y18 = r1 - s2;
735 739233110 : y10 = r3 - s4;
736 739233110 : y14 = r3 + s4;
737 739233110 : y07 = s1 - r2;
738 739233110 : y19 = s1 + r2;
739 739233110 : y11 = s3 + r4;
740 739233110 : y15 = s3 - r4;
741 :
742 : /* 5 fft2 stages */
743 739233110 : vec[0] = y00 + y02;
744 739233110 : vec[1] = y01 + y03;
745 739233110 : vec[2] = y12 - y14;
746 739233110 : vec[3] = y13 - y15;
747 739233110 : vec[4] = y04 + y06;
748 739233110 : vec[5] = y05 + y07;
749 739233110 : vec[6] = y16 - y18;
750 739233110 : vec[7] = y17 - y19;
751 739233110 : vec[8] = y08 + y10;
752 739233110 : vec[9] = y09 + y11;
753 739233110 : vec[10] = y00 - y02;
754 739233110 : vec[11] = y01 - y03;
755 739233110 : vec[12] = y12 + y14;
756 739233110 : vec[13] = y13 + y15;
757 739233110 : vec[14] = y04 - y06;
758 739233110 : vec[15] = y05 - y07;
759 739233110 : vec[16] = y16 + y18;
760 739233110 : vec[17] = y17 + y19;
761 739233110 : vec[18] = y08 - y10;
762 739233110 : vec[19] = y09 - y11;
763 :
764 739233110 : return;
765 : }
766 :
767 : /**
768 : * \brief Function performs a complex 20-point FFT
769 : * The FFT is performed inplace. The result of the FFT
770 : * is scaled by SCALEFACTOR20 bits.
771 : *
772 : * WOPS FLC version: 1509 cycles
773 : * WOPS with 32x16 bit multiplications: 432 cycles
774 : *
775 : * \param [i/o] re real input / output
776 : * \param [i/o] im imag input / output
777 : * \param [i ] s stride real and imag input / output
778 : *
779 : * \return void
780 : */
781 804268550 : static void fft20( float *signal )
782 : {
783 804268550 : const int16_t s = 2;
784 804268550 : float *re = signal, *im = signal + 1;
785 : float r1, r2, r3, r4;
786 : float s1, s2, s3, s4;
787 : float x0, x1, x2, x3, x4;
788 : float t, t0, t1, t2, t3, t4, t5, t6, t7;
789 : float y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
790 : float y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
791 : float y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
792 : float y30, y31, y32, y33, y34, y35, y36, y37, y38, y39;
793 :
794 : /* 1. FFT5 stage */
795 :
796 : /* real part */
797 804268550 : x0 = re[s * 0];
798 804268550 : x1 = re[s * 16];
799 804268550 : x2 = re[s * 12];
800 804268550 : x3 = re[s * 8];
801 804268550 : x4 = re[s * 4];
802 804268550 : r1 = x1 + x4;
803 804268550 : r4 = x1 - x4;
804 804268550 : r3 = x2 + x3;
805 804268550 : r2 = x2 - x3;
806 804268550 : t = ( r1 - r3 ) * C54;
807 804268550 : r1 = r1 + r3;
808 804268550 : y00 = x0 + r1;
809 804268550 : r1 = y00 + ( r1 * C55 );
810 804268550 : r3 = r1 - t;
811 804268550 : r1 = r1 + t;
812 804268550 : t = ( r4 + r2 ) * C51;
813 804268550 : r4 = t + ( r4 * C52 );
814 804268550 : r2 = t + ( r2 * C53 );
815 :
816 : /* imaginary part */
817 804268550 : x0 = im[s * 0];
818 804268550 : x1 = im[s * 16];
819 804268550 : x2 = im[s * 12];
820 804268550 : x3 = im[s * 8];
821 804268550 : x4 = im[s * 4];
822 804268550 : s1 = x1 + x4;
823 804268550 : s4 = x1 - x4;
824 804268550 : s3 = x2 + x3;
825 804268550 : s2 = x2 - x3;
826 804268550 : t = ( s1 - s3 ) * C54;
827 804268550 : s1 = ( s1 + s3 );
828 804268550 : y01 = ( x0 + s1 );
829 804268550 : s1 = y01 + ( s1 * C55 );
830 804268550 : s3 = ( s1 - t );
831 804268550 : s1 = ( s1 + t );
832 804268550 : t = ( s4 + s2 ) * C51;
833 804268550 : s4 = t + ( s4 * C52 );
834 804268550 : s2 = t + ( s2 * C53 );
835 :
836 : /* combination */
837 804268550 : y08 = ( r1 + s2 );
838 804268550 : y32 = ( r1 - s2 );
839 804268550 : y16 = ( r3 - s4 );
840 804268550 : y24 = ( r3 + s4 );
841 :
842 804268550 : y09 = ( s1 - r2 );
843 804268550 : y33 = ( s1 + r2 );
844 804268550 : y17 = ( s3 + r4 );
845 804268550 : y25 = ( s3 - r4 );
846 :
847 : /* 2. FFT5 stage */
848 :
849 : /* real part */
850 804268550 : x0 = re[s * 5];
851 804268550 : x1 = re[s * 1];
852 804268550 : x2 = re[s * 17];
853 804268550 : x3 = re[s * 13];
854 804268550 : x4 = re[s * 9];
855 804268550 : r1 = ( x1 + x4 );
856 804268550 : r4 = ( x1 - x4 );
857 804268550 : r3 = ( x2 + x3 );
858 804268550 : r2 = ( x2 - x3 );
859 804268550 : t = ( r1 - r3 ) * C54;
860 804268550 : r1 = ( r1 + r3 );
861 804268550 : y02 = ( x0 + r1 );
862 804268550 : r1 = y02 + ( r1 * C55 );
863 804268550 : r3 = ( r1 - t );
864 804268550 : r1 = ( r1 + t );
865 804268550 : t = ( r4 + r2 ) * C51;
866 804268550 : r4 = t + ( r4 * C52 );
867 804268550 : r2 = t + ( r2 * C53 );
868 :
869 : /* imaginary part */
870 804268550 : x0 = im[s * 5];
871 804268550 : x1 = im[s * 1];
872 804268550 : x2 = im[s * 17];
873 804268550 : x3 = im[s * 13];
874 804268550 : x4 = im[s * 9];
875 804268550 : s1 = ( x1 + x4 );
876 804268550 : s4 = ( x1 - x4 );
877 804268550 : s3 = ( x2 + x3 );
878 804268550 : s2 = ( x2 - x3 );
879 804268550 : t = ( s1 - s3 ) * C54;
880 804268550 : s1 = ( s1 + s3 );
881 804268550 : y03 = ( x0 + s1 );
882 804268550 : s1 = y03 + ( s1 * C55 );
883 804268550 : s3 = ( s1 - t );
884 804268550 : s1 = ( s1 + t );
885 804268550 : t = ( s4 + s2 ) * C51;
886 804268550 : s4 = t + ( s4 * C52 );
887 804268550 : s2 = t + ( s2 * C53 );
888 :
889 : /* combination */
890 804268550 : y10 = ( r1 + s2 );
891 804268550 : y34 = ( r1 - s2 );
892 804268550 : y18 = ( r3 - s4 );
893 804268550 : y26 = ( r3 + s4 );
894 :
895 804268550 : y11 = ( s1 - r2 );
896 804268550 : y35 = ( s1 + r2 );
897 804268550 : y19 = ( s3 + r4 );
898 804268550 : y27 = ( s3 - r4 );
899 :
900 : /* 3. FFT5 stage */
901 :
902 : /* real part */
903 804268550 : x0 = re[s * 10];
904 804268550 : x1 = re[s * 6];
905 804268550 : x2 = re[s * 2];
906 804268550 : x3 = re[s * 18];
907 804268550 : x4 = re[s * 14];
908 804268550 : r1 = ( x1 + x4 );
909 804268550 : r4 = ( x1 - x4 );
910 804268550 : r3 = ( x2 + x3 );
911 804268550 : r2 = ( x2 - x3 );
912 804268550 : t = ( r1 - r3 ) * C54;
913 804268550 : r1 = ( r1 + r3 );
914 804268550 : y04 = ( x0 + r1 );
915 804268550 : r1 = y04 + ( r1 * C55 );
916 804268550 : r3 = ( r1 - t );
917 804268550 : r1 = ( r1 + t );
918 804268550 : t = ( r4 + r2 ) * C51;
919 804268550 : r4 = t + ( r4 * C52 );
920 804268550 : r2 = t + ( r2 * C53 );
921 :
922 : /* imaginary part */
923 804268550 : x0 = im[s * 10];
924 804268550 : x1 = im[s * 6];
925 804268550 : x2 = im[s * 2];
926 804268550 : x3 = im[s * 18];
927 804268550 : x4 = im[s * 14];
928 804268550 : s1 = ( x1 + x4 );
929 804268550 : s4 = ( x1 - x4 );
930 804268550 : s3 = ( x2 + x3 );
931 804268550 : s2 = ( x2 - x3 );
932 804268550 : t = ( s1 - s3 ) * C54;
933 804268550 : s1 = ( s1 + s3 );
934 804268550 : y05 = ( x0 + s1 );
935 804268550 : s1 = y05 + ( s1 * C55 );
936 804268550 : s3 = ( s1 - t );
937 804268550 : s1 = ( s1 + t );
938 804268550 : t = ( s4 + s2 ) * C51;
939 804268550 : s4 = t + ( s4 * C52 );
940 804268550 : s2 = t + ( s2 * C53 );
941 :
942 : /* combination */
943 804268550 : y12 = ( r1 + s2 );
944 804268550 : y36 = ( r1 - s2 );
945 804268550 : y20 = ( r3 - s4 );
946 804268550 : y28 = ( r3 + s4 );
947 :
948 804268550 : y13 = ( s1 - r2 );
949 804268550 : y37 = ( s1 + r2 );
950 804268550 : y21 = ( s3 + r4 );
951 804268550 : y29 = ( s3 - r4 );
952 :
953 : /* 4. FFT5 stage */
954 :
955 : /* real part */
956 804268550 : x0 = re[s * 15];
957 804268550 : x1 = re[s * 11];
958 804268550 : x2 = re[s * 7];
959 804268550 : x3 = re[s * 3];
960 804268550 : x4 = re[s * 19];
961 804268550 : r1 = ( x1 + x4 );
962 804268550 : r4 = ( x1 - x4 );
963 804268550 : r3 = ( x2 + x3 );
964 804268550 : r2 = ( x2 - x3 );
965 804268550 : t = ( r1 - r3 ) * C54;
966 804268550 : r1 = ( r1 + r3 );
967 804268550 : y06 = ( x0 + r1 );
968 804268550 : r1 = y06 + ( r1 * C55 );
969 804268550 : r3 = ( r1 - t );
970 804268550 : r1 = ( r1 + t );
971 804268550 : t = ( r4 + r2 ) * C51;
972 804268550 : r4 = t + ( r4 * C52 );
973 804268550 : r2 = t + ( r2 * C53 );
974 :
975 : /* imaginary part */
976 804268550 : x0 = im[s * 15];
977 804268550 : x1 = im[s * 11];
978 804268550 : x2 = im[s * 7];
979 804268550 : x3 = im[s * 3];
980 804268550 : x4 = im[s * 19];
981 804268550 : s1 = ( x1 + x4 );
982 804268550 : s4 = ( x1 - x4 );
983 804268550 : s3 = ( x2 + x3 );
984 804268550 : s2 = ( x2 - x3 );
985 804268550 : t = ( s1 - s3 ) * C54;
986 804268550 : s1 = ( s1 + s3 );
987 804268550 : y07 = ( x0 + s1 );
988 804268550 : s1 = y07 + ( s1 * C55 );
989 804268550 : s3 = ( s1 - t );
990 804268550 : s1 = ( s1 + t );
991 804268550 : t = ( s4 + s2 ) * C51;
992 804268550 : s4 = t + ( s4 * C52 );
993 804268550 : s2 = t + ( s2 * C53 );
994 :
995 : /* combination */
996 804268550 : y14 = ( r1 + s2 );
997 804268550 : y38 = ( r1 - s2 );
998 804268550 : y22 = ( r3 - s4 );
999 804268550 : y30 = ( r3 + s4 );
1000 :
1001 804268550 : y15 = ( s1 - r2 );
1002 804268550 : y39 = ( s1 + r2 );
1003 804268550 : y23 = ( s3 + r4 );
1004 804268550 : y31 = ( s3 - r4 );
1005 :
1006 :
1007 : /* 1. FFT4 stage */
1008 :
1009 : /* Pre-additions */
1010 804268550 : t0 = ( y00 + y04 );
1011 804268550 : t2 = ( y00 - y04 );
1012 804268550 : t1 = ( y01 + y05 );
1013 804268550 : t3 = ( y01 - y05 );
1014 804268550 : t4 = ( y02 + y06 );
1015 804268550 : t7 = ( y02 - y06 );
1016 804268550 : t5 = ( y07 + y03 );
1017 804268550 : t6 = ( y07 - y03 );
1018 :
1019 : /* Post-additions */
1020 804268550 : re[s * 0] = ( t0 + t4 );
1021 804268550 : im[s * 0] = ( t1 + t5 );
1022 804268550 : re[s * 5] = ( t2 - t6 );
1023 804268550 : im[s * 5] = ( t3 - t7 );
1024 804268550 : re[s * 10] = ( t0 - t4 );
1025 804268550 : im[s * 10] = ( t1 - t5 );
1026 804268550 : re[s * 15] = ( t2 + t6 );
1027 804268550 : im[s * 15] = ( t3 + t7 );
1028 :
1029 : /* 2. FFT4 stage */
1030 :
1031 : /* Pre-additions */
1032 804268550 : t0 = ( y08 + y12 );
1033 804268550 : t2 = ( y08 - y12 );
1034 804268550 : t1 = ( y09 + y13 );
1035 804268550 : t3 = ( y09 - y13 );
1036 804268550 : t4 = ( y10 + y14 );
1037 804268550 : t7 = ( y10 - y14 );
1038 804268550 : t5 = ( y15 + y11 );
1039 804268550 : t6 = ( y15 - y11 );
1040 :
1041 : /* Post-additions */
1042 804268550 : re[s * 4] = ( t0 + t4 );
1043 804268550 : im[s * 4] = ( t1 + t5 );
1044 804268550 : re[s * 9] = ( t2 - t6 );
1045 804268550 : im[s * 9] = ( t3 - t7 );
1046 804268550 : re[s * 14] = ( t0 - t4 );
1047 804268550 : im[s * 14] = ( t1 - t5 );
1048 804268550 : re[s * 19] = ( t2 + t6 );
1049 804268550 : im[s * 19] = ( t3 + t7 );
1050 :
1051 :
1052 : /* 3. FFT4 stage */
1053 :
1054 : /* Pre-additions */
1055 804268550 : t0 = ( y16 + y20 );
1056 804268550 : t2 = ( y16 - y20 );
1057 804268550 : t1 = ( y17 + y21 );
1058 804268550 : t3 = ( y17 - y21 );
1059 804268550 : t4 = ( y18 + y22 );
1060 804268550 : t7 = ( y18 - y22 );
1061 804268550 : t5 = ( y23 + y19 );
1062 804268550 : t6 = ( y23 - y19 );
1063 :
1064 : /* Post-additions */
1065 804268550 : re[s * 8] = ( t0 + t4 );
1066 804268550 : im[s * 8] = ( t1 + t5 );
1067 804268550 : re[s * 13] = ( t2 - t6 );
1068 804268550 : im[s * 13] = ( t3 - t7 );
1069 804268550 : re[s * 18] = ( t0 - t4 );
1070 804268550 : im[s * 18] = ( t1 - t5 );
1071 804268550 : re[s * 3] = ( t2 + t6 );
1072 804268550 : im[s * 3] = ( t3 + t7 );
1073 :
1074 : /* 4. FFT4 stage */
1075 :
1076 : /* Pre-additions */
1077 804268550 : t0 = ( y24 + y28 );
1078 804268550 : t2 = ( y24 - y28 );
1079 804268550 : t1 = ( y25 + y29 );
1080 804268550 : t3 = ( y25 - y29 );
1081 804268550 : t4 = ( y26 + y30 );
1082 804268550 : t7 = ( y26 - y30 );
1083 804268550 : t5 = ( y31 + y27 );
1084 804268550 : t6 = ( y31 - y27 );
1085 :
1086 : /* Post-additions */
1087 804268550 : re[s * 12] = ( t0 + t4 );
1088 804268550 : im[s * 12] = ( t1 + t5 );
1089 804268550 : re[s * 17] = ( t2 - t6 );
1090 804268550 : im[s * 17] = ( t3 - t7 );
1091 804268550 : re[s * 2] = ( t0 - t4 );
1092 804268550 : im[s * 2] = ( t1 - t5 );
1093 804268550 : re[s * 7] = ( t2 + t6 );
1094 804268550 : im[s * 7] = ( t3 + t7 );
1095 :
1096 : /* 5. FFT4 stage */
1097 :
1098 : /* Pre-additions */
1099 804268550 : t0 = ( y32 + y36 );
1100 804268550 : t2 = ( y32 - y36 );
1101 804268550 : t1 = ( y33 + y37 );
1102 804268550 : t3 = ( y33 - y37 );
1103 804268550 : t4 = ( y34 + y38 );
1104 804268550 : t7 = ( y34 - y38 );
1105 804268550 : t5 = ( y39 + y35 );
1106 804268550 : t6 = ( y39 - y35 );
1107 :
1108 : /* Post-additions */
1109 804268550 : re[s * 16] = ( t0 + t4 );
1110 804268550 : im[s * 16] = ( t1 + t5 );
1111 804268550 : re[s * 1] = ( t2 - t6 );
1112 804268550 : im[s * 1] = ( t3 - t7 );
1113 804268550 : re[s * 6] = ( t0 - t4 );
1114 804268550 : im[s * 6] = ( t1 - t5 );
1115 804268550 : re[s * 11] = ( t2 + t6 );
1116 804268550 : im[s * 11] = ( t3 + t7 );
1117 :
1118 804268550 : return;
1119 : }
1120 :
1121 : /*******************************************************************************
1122 : Functionname: fft30
1123 : *******************************************************************************
1124 :
1125 : Description: 30-point FFT.
1126 :
1127 : Arguments: in - pointer to data (interleaved real / imaginary parts)
1128 :
1129 : Return: none
1130 :
1131 : *******************************************************************************/
1132 :
1133 1374701688 : static void fft30( float *restrict in )
1134 : {
1135 : int16_t i;
1136 : float temp[60];
1137 1374701688 : float *temp_l = temp;
1138 1374701688 : float *temp_lu = temp + 2 * 8;
1139 1374701688 : float *temp_h = temp + 2 * 15;
1140 1374701688 : float *temp_hu = temp + 2 * 15 + 2 * 8;
1141 1374701688 : float *in_l = in + 2 * 0;
1142 1374701688 : float *in_h = in + 2 * 15;
1143 10997613504 : for ( i = 0; i < 7; i++ )
1144 : {
1145 9622911816 : *temp_l++ = *in_l++;
1146 9622911816 : *temp_l++ = *in_l++;
1147 9622911816 : *temp_h++ = *in_h++;
1148 9622911816 : *temp_h++ = *in_h++;
1149 9622911816 : *temp_l++ = *in_h++;
1150 9622911816 : *temp_l++ = *in_h++;
1151 9622911816 : *temp_h++ = *in_l++;
1152 9622911816 : *temp_h++ = *in_l++;
1153 : }
1154 1374701688 : *temp_l++ = *in_l++;
1155 1374701688 : *temp_l++ = *in_l++;
1156 1374701688 : *temp_h++ = *in_h++;
1157 1374701688 : *temp_h++ = *in_h++;
1158 1374701688 : temp_l = temp;
1159 1374701688 : temp_h = temp + 30;
1160 1374701688 : fft15( temp_l );
1161 1374701688 : fft15( temp_h );
1162 :
1163 1374701688 : in_l = in + 2 * 0;
1164 1374701688 : in_h = in + 2 * 15;
1165 10997613504 : for ( i = 0; i < 7; i++ )
1166 : {
1167 9622911816 : *in_l++ = *temp_l + *temp_h;
1168 9622911816 : *in_h++ = *temp_l++ - *temp_h++;
1169 9622911816 : *in_l++ = *temp_l + *temp_h;
1170 9622911816 : *in_h++ = *temp_l++ - *temp_h++;
1171 :
1172 9622911816 : *in_h++ = *temp_lu + *temp_hu;
1173 9622911816 : *in_l++ = *temp_lu++ - *temp_hu++;
1174 9622911816 : *in_h++ = *temp_lu + *temp_hu;
1175 9622911816 : *in_l++ = *temp_lu++ - *temp_hu++;
1176 : }
1177 1374701688 : *in_l++ = *temp_l + *temp_h;
1178 1374701688 : *in_h++ = *temp_l++ - *temp_h++;
1179 1374701688 : *in_l++ = *temp_l + *temp_h;
1180 1374701688 : *in_h++ = *temp_l++ - *temp_h++;
1181 :
1182 1374701688 : return;
1183 : }
1184 :
1185 : /*-------------------------------------------------------------------*
1186 : * fft_cldfb()
1187 : *
1188 : * Interface functions FFT subroutines
1189 : *--------------------------------------------------------------------*/
1190 3035840456 : void fft_cldfb(
1191 : float *data, /* i/o: input/output vector */
1192 : const int16_t size /* size of fft operation */
1193 : )
1194 : {
1195 :
1196 3035840456 : switch ( size )
1197 : {
1198 533276 : case 5:
1199 533276 : fft5s( data, 2 );
1200 533276 : break;
1201 116897328 : case 8:
1202 116897328 : fft8( data );
1203 116897328 : break;
1204 739233110 : case 10:
1205 739233110 : fft10( data );
1206 739233110 : break;
1207 206504 : case 16:
1208 206504 : fft16( data );
1209 206504 : break;
1210 804268550 : case 20:
1211 804268550 : fft20( data );
1212 804268550 : break;
1213 1374701688 : case 30:
1214 1374701688 : fft30( data );
1215 1374701688 : break;
1216 :
1217 0 : default:
1218 0 : assert( 0 );
1219 : break;
1220 : }
1221 :
1222 3035840456 : return;
1223 : }
|