Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include <assert.h>
40 : #include "prot.h"
41 : #include "ivas_cnst.h"
42 : #include "wmc_auto.h"
43 :
44 : #if __STDC_VERSION__ >= 199901L
45 : #if defined __ICL
46 : #define restrict __restrict
47 : #endif
48 : #else
49 : #define restrict
50 : #endif
51 :
52 :
53 : static void fft8( float *vec );
54 : static void fft10( float *vec );
55 : static void fft16( float *vec );
56 : static void fft20( float *vec );
57 : static void fft30( float *vec );
58 : static void fft5s( float *x, const int16_t stride );
59 :
60 :
61 : #define COS_PI_DIV8 9.238795325112867e-1f
62 : #define COS_3PI_DIV8 3.826834323650898e-1f
63 : #define SQRT2PLUS1 2.414213562373095f
64 : #define SQRT2MINUS1 4.142135623730952e-1f
65 :
66 :
67 : /*******************************************************************************
68 : Functionname: fft8
69 : *******************************************************************************
70 :
71 : Description: 8-point FFT. Complex-valued input takes 52 real additions
72 : and 4 real multiplications.
73 :
74 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
75 :
76 : Return: none
77 :
78 : *******************************************************************************/
79 7719738 : static void fft8( float *restrict vec )
80 : {
81 : float temp1[16];
82 : float temp2[16];
83 :
84 :
85 : /* Pre-additions */
86 7719738 : temp1[0] = vec[0] + vec[8];
87 7719738 : temp1[2] = vec[0] - vec[8];
88 7719738 : temp1[1] = vec[1] + vec[9];
89 7719738 : temp1[3] = vec[1] - vec[9];
90 7719738 : temp1[4] = vec[2] + vec[10];
91 7719738 : temp1[6] = vec[2] - vec[10];
92 7719738 : temp1[5] = vec[3] + vec[11];
93 7719738 : temp1[7] = vec[3] - vec[11];
94 7719738 : temp1[8] = vec[4] + vec[12];
95 7719738 : temp1[10] = vec[4] - vec[12];
96 7719738 : temp1[9] = vec[5] + vec[13];
97 7719738 : temp1[11] = vec[5] - vec[13];
98 7719738 : temp1[12] = vec[6] + vec[14];
99 7719738 : temp1[14] = vec[6] - vec[14];
100 7719738 : temp1[13] = vec[7] + vec[15];
101 7719738 : temp1[15] = vec[7] - vec[15];
102 :
103 : /* Pre-additions and core multiplications */
104 7719738 : temp2[0] = temp1[0] + temp1[8];
105 7719738 : temp2[4] = temp1[0] - temp1[8];
106 7719738 : temp2[1] = temp1[1] + temp1[9];
107 7719738 : temp2[5] = temp1[1] - temp1[9];
108 7719738 : temp2[8] = temp1[2] - temp1[11];
109 7719738 : temp2[10] = temp1[2] + temp1[11];
110 7719738 : temp2[9] = temp1[3] + temp1[10];
111 7719738 : temp2[11] = temp1[3] - temp1[10];
112 7719738 : temp2[2] = temp1[4] + temp1[12];
113 7719738 : temp2[7] = temp1[4] - temp1[12];
114 7719738 : temp2[3] = temp1[5] + temp1[13];
115 7719738 : temp2[6] = temp1[13] - temp1[5];
116 :
117 7719738 : temp1[1] = temp1[6] + temp1[14];
118 7719738 : temp1[2] = temp1[6] - temp1[14];
119 7719738 : temp1[0] = temp1[7] + temp1[15];
120 7719738 : temp1[3] = temp1[7] - temp1[15];
121 :
122 7719738 : temp2[12] = ( temp1[0] + temp1[2] ) * INV_SQRT2;
123 7719738 : temp2[14] = ( temp1[0] - temp1[2] ) * INV_SQRT2;
124 7719738 : temp2[13] = ( temp1[3] - temp1[1] ) * INV_SQRT2;
125 7719738 : temp2[15] = ( temp1[1] + temp1[3] ) * -INV_SQRT2;
126 :
127 : /* Post-additions */
128 7719738 : vec[0] = temp2[0] + temp2[2];
129 7719738 : vec[8] = temp2[0] - temp2[2];
130 7719738 : vec[1] = temp2[1] + temp2[3];
131 7719738 : vec[9] = temp2[1] - temp2[3];
132 7719738 : vec[4] = temp2[4] - temp2[6];
133 7719738 : vec[12] = temp2[4] + temp2[6];
134 7719738 : vec[5] = temp2[5] - temp2[7];
135 7719738 : vec[13] = temp2[5] + temp2[7];
136 7719738 : vec[6] = temp2[8] + temp2[14];
137 7719738 : vec[14] = temp2[8] - temp2[14];
138 7719738 : vec[7] = temp2[9] + temp2[15];
139 7719738 : vec[15] = temp2[9] - temp2[15];
140 7719738 : vec[2] = temp2[10] + temp2[12];
141 7719738 : vec[10] = temp2[10] - temp2[12];
142 7719738 : vec[3] = temp2[11] + temp2[13];
143 7719738 : vec[11] = temp2[11] - temp2[13];
144 :
145 7719738 : return;
146 : }
147 :
148 :
149 : /*******************************************************************************
150 : Functionname: fft16
151 : *******************************************************************************
152 :
153 : Description: 16-point FFT. Complex-valued input takes 144 real additions and
154 : 24 real multiplications.
155 :
156 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
157 :
158 : Return: none
159 :
160 : *******************************************************************************/
161 : /* fast implementation, completely unrolled and inlined */
162 4086 : static void fft16( float *restrict vec )
163 : {
164 : float temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17,
165 : temp18, temp19, temp110, temp111, temp112, temp113, temp114, temp115;
166 : float temp20, temp21, temp22, temp23, temp24, temp25, temp26, temp27,
167 : temp28, temp29, temp210, temp211, temp212, temp213, temp214, temp215;
168 : float vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
169 : vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
170 :
171 :
172 : /* even */
173 4086 : vec0 = vec[0] + vec[16];
174 4086 : vec1 = vec[1] + vec[17];
175 4086 : vec2 = vec[2] + vec[18];
176 4086 : vec3 = vec[3] + vec[19];
177 4086 : vec4 = vec[4] + vec[20];
178 4086 : vec5 = vec[5] + vec[21];
179 4086 : vec6 = vec[6] + vec[22];
180 4086 : vec7 = vec[7] + vec[23];
181 4086 : vec8 = vec[8] + vec[24];
182 4086 : vec9 = vec[9] + vec[25];
183 4086 : vec10 = vec[10] + vec[26];
184 4086 : vec11 = vec[11] + vec[27];
185 4086 : vec12 = vec[12] + vec[28];
186 4086 : vec13 = vec[13] + vec[29];
187 4086 : vec14 = vec[14] + vec[30];
188 4086 : vec15 = vec[15] + vec[31];
189 :
190 : /* Pre-additions */
191 4086 : temp10 = vec0 + vec8;
192 4086 : temp12 = vec0 - vec8;
193 4086 : temp11 = vec1 + vec9;
194 4086 : temp13 = vec1 - vec9;
195 4086 : temp14 = vec2 + vec10;
196 4086 : temp16 = vec2 - vec10;
197 4086 : temp15 = vec3 + vec11;
198 4086 : temp17 = vec3 - vec11;
199 4086 : temp18 = vec4 + vec12;
200 4086 : temp110 = vec4 - vec12;
201 4086 : temp19 = vec5 + vec13;
202 4086 : temp111 = vec5 - vec13;
203 4086 : temp112 = vec6 + vec14;
204 4086 : temp114 = vec6 - vec14;
205 4086 : temp113 = vec7 + vec15;
206 4086 : temp115 = vec7 - vec15;
207 :
208 : /* Pre-additions and core multiplications */
209 4086 : temp20 = temp10 + temp18;
210 4086 : temp24 = temp10 - temp18;
211 4086 : temp21 = temp11 + temp19;
212 4086 : temp25 = temp11 - temp19;
213 4086 : temp28 = temp12 - temp111;
214 4086 : temp210 = temp12 + temp111;
215 4086 : temp29 = temp13 + temp110;
216 4086 : temp211 = temp13 - temp110;
217 4086 : temp22 = temp14 + temp112;
218 4086 : temp27 = temp14 - temp112;
219 4086 : temp23 = temp15 + temp113;
220 4086 : temp26 = temp113 - temp15;
221 :
222 4086 : temp11 = temp16 + temp114;
223 4086 : temp12 = temp16 - temp114;
224 4086 : temp10 = temp17 + temp115;
225 4086 : temp13 = temp17 - temp115;
226 :
227 4086 : temp212 = ( temp10 + temp12 ) * INV_SQRT2;
228 4086 : temp214 = ( temp10 - temp12 ) * INV_SQRT2;
229 4086 : temp213 = ( temp13 - temp11 ) * INV_SQRT2;
230 4086 : temp215 = ( temp11 + temp13 ) * -INV_SQRT2;
231 :
232 :
233 : /* odd */
234 4086 : vec0 = vec[0] - vec[16];
235 4086 : vec1 = vec[1] - vec[17];
236 4086 : vec2 = vec[2] - vec[18];
237 4086 : vec3 = vec[3] - vec[19];
238 4086 : vec4 = vec[4] - vec[20];
239 4086 : vec5 = vec[5] - vec[21];
240 4086 : vec6 = vec[6] - vec[22];
241 4086 : vec7 = vec[7] - vec[23];
242 4086 : vec8 = vec[8] - vec[24];
243 4086 : vec9 = vec[9] - vec[25];
244 4086 : vec10 = vec[10] - vec[26];
245 4086 : vec11 = vec[11] - vec[27];
246 4086 : vec12 = vec[12] - vec[28];
247 4086 : vec13 = vec[13] - vec[29];
248 4086 : vec14 = vec[14] - vec[30];
249 4086 : vec15 = vec[15] - vec[31];
250 :
251 : /* Pre-additions and core multiplications */
252 4086 : temp19 = ( vec2 + vec14 ) * -COS_3PI_DIV8;
253 4086 : temp110 = ( vec2 - vec14 ) * COS_PI_DIV8;
254 4086 : temp18 = ( vec3 + vec15 ) * COS_3PI_DIV8;
255 4086 : temp111 = ( vec3 - vec15 ) * COS_PI_DIV8;
256 4086 : temp15 = ( vec4 + vec12 ) * -INV_SQRT2;
257 4086 : temp16 = ( vec4 - vec12 ) * INV_SQRT2;
258 4086 : temp14 = ( vec5 + vec13 ) * INV_SQRT2;
259 4086 : temp17 = ( vec5 - vec13 ) * INV_SQRT2;
260 4086 : temp113 = ( vec6 + vec10 ) * -COS_PI_DIV8;
261 4086 : temp114 = ( vec6 - vec10 ) * COS_3PI_DIV8;
262 4086 : temp112 = ( vec7 + vec11 ) * COS_PI_DIV8;
263 4086 : temp115 = ( vec7 - vec11 ) * COS_3PI_DIV8;
264 :
265 : /* Core multiplications */
266 4086 : vec2 = temp18 * SQRT2PLUS1 - temp112 * SQRT2MINUS1;
267 4086 : vec3 = temp19 * SQRT2PLUS1 - temp113 * SQRT2MINUS1;
268 4086 : vec4 = temp110 * SQRT2MINUS1 - temp114 * SQRT2PLUS1;
269 4086 : vec5 = temp111 * SQRT2MINUS1 - temp115 * SQRT2PLUS1;
270 :
271 : /* Post-additions */
272 4086 : temp18 += temp112;
273 4086 : temp19 += temp113;
274 4086 : temp110 += temp114;
275 4086 : temp111 += temp115;
276 :
277 4086 : vec6 = vec0 + temp14;
278 4086 : vec10 = vec0 - temp14;
279 4086 : vec7 = vec1 + temp15;
280 4086 : vec11 = vec1 - temp15;
281 :
282 4086 : vec12 = temp16 - vec9;
283 4086 : vec14 = temp16 + vec9;
284 4086 : vec13 = vec8 + temp17;
285 4086 : vec15 = vec8 - temp17;
286 :
287 4086 : temp10 = vec6 - vec14;
288 4086 : temp12 = vec6 + vec14;
289 4086 : temp11 = vec7 + vec15;
290 4086 : temp13 = vec7 - vec15;
291 4086 : temp14 = vec10 + vec12;
292 4086 : temp16 = vec10 - vec12;
293 4086 : temp15 = vec11 + vec13;
294 4086 : temp17 = vec11 - vec13;
295 :
296 4086 : vec10 = temp18 + temp110;
297 4086 : temp110 = temp18 - temp110;
298 4086 : vec11 = temp19 + temp111;
299 4086 : temp111 = temp19 - temp111;
300 :
301 4086 : temp112 = vec2 + vec4;
302 4086 : temp114 = vec2 - vec4;
303 4086 : temp113 = vec3 + vec5;
304 4086 : temp115 = vec3 - vec5;
305 :
306 :
307 : /* Post-additions */
308 4086 : *vec++ = temp20 + temp22;
309 4086 : *vec++ = temp21 + temp23;
310 4086 : *vec++ = temp12 + vec10;
311 4086 : *vec++ = temp13 + vec11;
312 4086 : *vec++ = temp210 + temp212;
313 4086 : *vec++ = temp211 + temp213;
314 4086 : *vec++ = temp10 + temp112;
315 4086 : *vec++ = temp11 + temp113;
316 4086 : *vec++ = temp24 - temp26;
317 4086 : *vec++ = temp25 - temp27;
318 4086 : *vec++ = temp16 + temp114;
319 4086 : *vec++ = temp17 + temp115;
320 4086 : *vec++ = temp28 + temp214;
321 4086 : *vec++ = temp29 + temp215;
322 4086 : *vec++ = temp14 + temp110;
323 4086 : *vec++ = temp15 + temp111;
324 4086 : *vec++ = temp20 - temp22;
325 4086 : *vec++ = temp21 - temp23;
326 4086 : *vec++ = temp12 - vec10;
327 4086 : *vec++ = temp13 - vec11;
328 4086 : *vec++ = temp210 - temp212;
329 4086 : *vec++ = temp211 - temp213;
330 4086 : *vec++ = temp10 - temp112;
331 4086 : *vec++ = temp11 - temp113;
332 4086 : *vec++ = temp24 + temp26;
333 4086 : *vec++ = temp25 + temp27;
334 4086 : *vec++ = temp16 - temp114;
335 4086 : *vec++ = temp17 - temp115;
336 4086 : *vec++ = temp28 - temp214;
337 4086 : *vec++ = temp29 - temp215;
338 4086 : *vec++ = temp14 - temp110;
339 4086 : *vec++ = temp15 - temp111;
340 :
341 4086 : return;
342 : }
343 :
344 :
345 : /*******************************************************************************
346 : Functionname: fft15
347 : *******************************************************************************
348 :
349 : Description: 15-point FFT. Complex-valued input takes 176 real additions
350 : and 34 real multiplications.
351 :
352 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
353 :
354 : Return: none
355 :
356 : *******************************************************************************/
357 258247112 : static void fft15( float *restrict vec )
358 : {
359 :
360 : float r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17;
361 : float i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17;
362 : float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9,
363 : tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18, tmp19,
364 : tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp28, tmp29;
365 :
366 :
367 : /* Pre-additions real part */
368 258247112 : r1 = vec[2] + vec[8];
369 258247112 : r2 = vec[2] - vec[8];
370 258247112 : r3 = vec[4] + vec[16];
371 258247112 : r4 = vec[4] - vec[16];
372 258247112 : r5 = vec[6] + vec[24];
373 258247112 : r6 = vec[6] - vec[24];
374 258247112 : r7 = vec[10] + vec[20];
375 258247112 : r8 = vec[10] - vec[20];
376 258247112 : r9 = vec[12] + vec[18];
377 258247112 : r10 = vec[12] - vec[18];
378 258247112 : r11 = vec[14] + vec[26];
379 258247112 : r12 = vec[14] - vec[26];
380 258247112 : r13 = vec[22] + vec[28];
381 258247112 : r14 = vec[22] - vec[28];
382 :
383 258247112 : tmp2 = r1 + r3;
384 258247112 : tmp4 = r1 - r3;
385 258247112 : tmp6 = r2 + r14;
386 258247112 : tmp8 = r2 - r14;
387 258247112 : tmp10 = r4 + r12;
388 258247112 : tmp12 = r4 - r12;
389 258247112 : tmp14 = r5 + r9;
390 258247112 : tmp16 = r5 - r9;
391 258247112 : tmp18 = r11 + r13;
392 258247112 : tmp20 = r11 - r13;
393 :
394 : /* Pre-additions imaginary part */
395 258247112 : i1 = vec[3] + vec[9];
396 258247112 : i2 = vec[3] - vec[9];
397 258247112 : i3 = vec[5] + vec[17];
398 258247112 : i4 = vec[5] - vec[17];
399 258247112 : i5 = vec[7] + vec[25];
400 258247112 : i6 = vec[7] - vec[25];
401 258247112 : i7 = vec[11] + vec[21];
402 258247112 : i8 = vec[11] - vec[21];
403 258247112 : i9 = vec[13] + vec[19];
404 258247112 : i10 = vec[13] - vec[19];
405 258247112 : i11 = vec[15] + vec[27];
406 258247112 : i12 = vec[15] - vec[27];
407 258247112 : i13 = vec[23] + vec[29];
408 258247112 : i14 = vec[23] - vec[29];
409 :
410 258247112 : tmp3 = i1 + i3;
411 258247112 : tmp5 = i1 - i3;
412 258247112 : tmp7 = i2 + i14;
413 258247112 : tmp9 = i2 - i14;
414 258247112 : tmp11 = i4 + i12;
415 258247112 : tmp13 = i4 - i12;
416 258247112 : tmp15 = i5 + i9;
417 258247112 : tmp17 = i5 - i9;
418 258247112 : tmp19 = i11 + i13;
419 258247112 : tmp21 = i11 - i13;
420 :
421 :
422 : /* Pre-additions and core multiplications */
423 258247112 : tmp28 = tmp4 + tmp20;
424 258247112 : tmp29 = tmp5 + tmp21;
425 258247112 : r4 = tmp2 + tmp18;
426 258247112 : i4 = tmp3 + tmp19;
427 258247112 : r3 = ( r4 + tmp14 ) * -1.25f;
428 258247112 : i3 = ( i4 + tmp15 ) * -1.25f;
429 258247112 : r2 = ( tmp29 - i8 ) * -8.660254037844387e-1f;
430 258247112 : i2 = ( tmp28 - r8 ) * 8.660254037844387e-1f;
431 258247112 : r1 = r4 + r7;
432 258247112 : i1 = i4 + i7;
433 258247112 : r0 = r1 + vec[0] + tmp14;
434 258247112 : i0 = i1 + vec[1] + tmp15;
435 258247112 : r7 = tmp4 - tmp20;
436 258247112 : i7 = tmp5 - tmp21;
437 258247112 : r8 = ( tmp3 - tmp19 ) * -4.841229182759272e-1f;
438 258247112 : i8 = ( tmp2 - tmp18 ) * 4.841229182759272e-1f;
439 258247112 : tmp0 = tmp6 + r10;
440 258247112 : tmp1 = tmp7 + i10;
441 258247112 : tmp2 = r6 - tmp10;
442 258247112 : tmp3 = i6 - tmp11;
443 258247112 : r10 = tmp7 * -2.308262652881440f;
444 258247112 : i10 = tmp6 * 2.308262652881440f;
445 258247112 : r11 = tmp8 * 1.332676064001459f;
446 258247112 : i11 = tmp9 * 1.332676064001459f;
447 258247112 : r6 = ( r7 - tmp16 ) * 5.590169943749475e-1f;
448 258247112 : i6 = ( i7 - tmp17 ) * 5.590169943749475e-1f;
449 258247112 : r12 = ( tmp1 + tmp3 ) * 5.877852522924733e-1f;
450 258247112 : i12 = ( tmp0 + tmp2 ) * -5.877852522924733e-1f;
451 258247112 : r13 = ( tmp7 - tmp11 ) * -8.816778784387098e-1f;
452 258247112 : i13 = ( tmp6 - tmp10 ) * 8.816778784387098e-1f;
453 258247112 : r14 = ( tmp8 + tmp12 ) * 5.090369604551274e-1f;
454 258247112 : i14 = ( tmp9 + tmp13 ) * 5.090369604551274e-1f;
455 258247112 : r16 = tmp11 * 5.449068960040204e-1f;
456 258247112 : i16 = tmp10 * -5.449068960040204e-1f;
457 258247112 : r17 = tmp12 * 3.146021430912046e-1f;
458 258247112 : i17 = tmp13 * 3.146021430912046e-1f;
459 :
460 258247112 : r4 *= 1.875f;
461 258247112 : i4 *= 1.875f;
462 258247112 : r1 *= -1.5f;
463 258247112 : i1 *= -1.5f;
464 258247112 : r7 *= -8.385254915624212e-1f;
465 258247112 : i7 *= -8.385254915624212e-1f;
466 258247112 : r5 = tmp29 * 1.082531754730548f;
467 258247112 : i5 = tmp28 * -1.082531754730548f;
468 258247112 : r9 = tmp1 * 1.538841768587627f;
469 258247112 : i9 = tmp0 * -1.538841768587627f;
470 258247112 : r15 = tmp3 * 3.632712640026803e-1f;
471 258247112 : i15 = tmp2 * -3.632712640026803e-1f;
472 :
473 :
474 : /* Post-additions real part */
475 258247112 : tmp2 = r0 + r1;
476 258247112 : tmp4 = r3 + r6;
477 258247112 : tmp6 = r3 - r6;
478 258247112 : tmp8 = r4 + r5;
479 258247112 : tmp10 = r4 - r5;
480 258247112 : tmp12 = r7 + r8;
481 258247112 : tmp14 = r7 - r8;
482 258247112 : tmp16 = r13 + r16;
483 258247112 : tmp18 = r14 + r17;
484 258247112 : tmp20 = r10 - r13;
485 258247112 : tmp22 = r11 - r14;
486 258247112 : tmp24 = r12 + r15;
487 258247112 : tmp26 = r12 - r9;
488 :
489 258247112 : r1 = tmp2 + r2;
490 258247112 : r2 = tmp2 - r2;
491 258247112 : r3 = tmp4 + tmp26;
492 258247112 : r4 = tmp4 - tmp26;
493 258247112 : r5 = tmp6 + tmp24;
494 258247112 : r6 = tmp6 - tmp24;
495 258247112 : r7 = tmp16 + tmp18;
496 258247112 : r8 = tmp16 - tmp18;
497 258247112 : r9 = tmp20 - tmp22;
498 258247112 : r10 = tmp20 + tmp22;
499 258247112 : r11 = r1 + tmp8;
500 258247112 : r12 = r2 + tmp10;
501 258247112 : r13 = r11 - tmp12;
502 258247112 : r14 = r12 - tmp14;
503 258247112 : r15 = r12 + tmp14;
504 258247112 : r16 = r11 + tmp12;
505 :
506 : /* Post-additions imaginary part */
507 258247112 : tmp3 = i0 + i1;
508 258247112 : tmp5 = i3 + i6;
509 258247112 : tmp7 = i3 - i6;
510 258247112 : tmp9 = i4 + i5;
511 258247112 : tmp11 = i4 - i5;
512 258247112 : tmp13 = i7 + i8;
513 258247112 : tmp15 = i7 - i8;
514 258247112 : tmp17 = i13 + i16;
515 258247112 : tmp19 = i14 + i17;
516 258247112 : tmp21 = i10 - i13;
517 258247112 : tmp23 = i11 - i14;
518 258247112 : tmp25 = i12 + i15;
519 258247112 : tmp27 = i12 - i9;
520 :
521 258247112 : i1 = tmp3 + i2;
522 258247112 : i2 = tmp3 - i2;
523 258247112 : i3 = tmp5 + tmp27;
524 258247112 : i4 = tmp5 - tmp27;
525 258247112 : i5 = tmp7 + tmp25;
526 258247112 : i6 = tmp7 - tmp25;
527 258247112 : i7 = tmp17 + tmp19;
528 258247112 : i8 = tmp17 - tmp19;
529 258247112 : i9 = tmp21 - tmp23;
530 258247112 : i10 = tmp21 + tmp23;
531 258247112 : i11 = i1 + tmp9;
532 258247112 : i12 = i2 + tmp11;
533 258247112 : i13 = i11 - tmp13;
534 258247112 : i14 = i12 - tmp15;
535 258247112 : i15 = i12 + tmp15;
536 258247112 : i16 = i11 + tmp13;
537 :
538 258247112 : *vec++ = r0;
539 258247112 : *vec++ = i0;
540 258247112 : *vec++ = r13 + r5 + r7;
541 258247112 : *vec++ = i13 + i5 + i7;
542 258247112 : *vec++ = r15 + r3 - r9;
543 258247112 : *vec++ = i15 + i3 - i9;
544 258247112 : *vec++ = r0 + r4;
545 258247112 : *vec++ = i0 + i4;
546 258247112 : *vec++ = r13 + r6 - r7;
547 258247112 : *vec++ = i13 + i6 - i7;
548 258247112 : *vec++ = r2;
549 258247112 : *vec++ = i2;
550 258247112 : *vec++ = r0 + r5;
551 258247112 : *vec++ = i0 + i5;
552 258247112 : *vec++ = r16 + r3 - r10;
553 258247112 : *vec++ = i16 + i3 - i10;
554 258247112 : *vec++ = r15 + r4 + r9;
555 258247112 : *vec++ = i15 + i4 + i9;
556 258247112 : *vec++ = r0 + r6;
557 258247112 : *vec++ = i0 + i6;
558 258247112 : *vec++ = r1;
559 258247112 : *vec++ = i1;
560 258247112 : *vec++ = r14 + r5 + r8;
561 258247112 : *vec++ = i14 + i5 + i8;
562 258247112 : *vec++ = r0 + r3;
563 258247112 : *vec++ = i0 + i3;
564 258247112 : *vec++ = r16 + r4 + r10;
565 258247112 : *vec++ = i16 + i4 + i10;
566 258247112 : *vec++ = r14 + r6 - r8;
567 258247112 : *vec++ = i14 + i6 - i8;
568 :
569 258247112 : return;
570 : }
571 :
572 : /*******************************************************************************
573 : Functionname: fft5s
574 : *******************************************************************************
575 :
576 : Description: 5-point FFT.
577 :
578 : Arguments: x - pointer to input data (interleaved real / imaginary parts)
579 : stride - stride for input data
580 :
581 : Return: none
582 :
583 : *******************************************************************************/
584 : static const float C51 = 0.9510565162951535f;
585 : static const float C52 = -1.5388417685876270f;
586 : static const float C53 = -0.3632712640026803f;
587 : static const float C54 = 0.5590169943749475f;
588 : static const float C55 = -1.25f;
589 :
590 0 : static void fft5s( float *x, const int16_t stride )
591 : {
592 : float r1, r2, r3, r4;
593 : float s1, s2, s3, s4;
594 : float t;
595 : /* real part */
596 0 : r1 = x[1 * stride] + x[4 * stride];
597 0 : r4 = x[1 * stride] - x[4 * stride];
598 0 : r3 = x[2 * stride] + x[3 * stride];
599 0 : r2 = x[2 * stride] - x[3 * stride];
600 0 : t = ( r1 - r3 ) * C54;
601 0 : r1 = r1 + r3;
602 0 : x[0] = x[0] + r1;
603 0 : r1 = x[0] + ( r1 * C55 );
604 0 : r3 = r1 - t;
605 0 : r1 = r1 + t;
606 0 : t = ( r4 + r2 ) * C51;
607 0 : r4 = t + ( r4 * C52 );
608 0 : r2 = t + ( r2 * C53 );
609 :
610 : /* imaginary part */
611 0 : s1 = x[1 * stride + 1] + x[4 * stride + 1];
612 0 : s4 = x[1 * stride + 1] - x[4 * stride + 1];
613 0 : s3 = x[2 * stride + 1] + x[3 * stride + 1];
614 0 : s2 = x[2 * stride + 1] - x[3 * stride + 1];
615 0 : t = ( s1 - s3 ) * C54;
616 0 : s1 = s1 + s3;
617 0 : x[1] = x[1] + s1;
618 0 : s1 = x[1] + ( s1 * C55 );
619 0 : s3 = s1 - t;
620 0 : s1 = s1 + t;
621 0 : t = ( s4 + s2 ) * C51;
622 0 : s4 = t + ( s4 * C52 );
623 0 : s2 = t + ( s2 * C53 );
624 :
625 : /* combination */
626 0 : x[1 * stride] = r1 + s2;
627 0 : x[4 * stride] = r1 - s2;
628 0 : x[2 * stride] = r3 - s4;
629 0 : x[3 * stride] = r3 + s4;
630 :
631 0 : x[1 * stride + 1] = s1 - r2;
632 0 : x[4 * stride + 1] = s1 + r2;
633 0 : x[2 * stride + 1] = s3 + r4;
634 0 : x[3 * stride + 1] = s3 - r4;
635 0 : }
636 :
637 :
638 : /**
639 : * \brief Function performs a complex 10-point FFT
640 : * The FFT is performed inplace. The result of the FFT
641 : * is scaled by SCALEFACTOR10 bits.
642 : *
643 : * WOPS FLC version: 1093 cycles
644 : * WOPS with 32x16 bit multiplications: 196 cycles
645 : *
646 : * \param [i/o] re real input / output
647 : * \param [i/o] im imag input / output
648 : * \param [i ] s stride real and imag input / output
649 : *
650 : * \return void
651 : */
652 14228256 : static void fft10( float *restrict vec )
653 : {
654 : float t;
655 : float r1, r2, r3, r4;
656 : float s1, s2, s3, s4;
657 : float y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
658 : float y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
659 :
660 : /* 2 fft5 stages */
661 :
662 : /* real part */
663 14228256 : r1 = vec[12] + vec[8];
664 14228256 : r4 = vec[12] - vec[8];
665 14228256 : r3 = vec[4] + vec[16];
666 14228256 : r2 = vec[4] - vec[16];
667 14228256 : t = ( r1 - r3 ) * C54;
668 14228256 : r1 = r1 + r3;
669 14228256 : y00 = vec[0] + r1;
670 14228256 : r1 = y00 + ( r1 * C55 );
671 14228256 : r3 = r1 - t;
672 14228256 : r1 = r1 + t;
673 14228256 : t = ( r4 + r2 ) * C51;
674 14228256 : r4 = t + ( r4 * C52 );
675 14228256 : r2 = t + ( r2 * C53 );
676 :
677 : /* imaginary part */
678 14228256 : s1 = vec[13] + vec[9];
679 14228256 : s4 = vec[13] - vec[9];
680 14228256 : s3 = vec[5] + vec[17];
681 14228256 : s2 = vec[5] - vec[17];
682 14228256 : t = ( s1 - s3 ) * C54;
683 14228256 : s1 = s1 + s3;
684 14228256 : y01 = vec[1] + s1;
685 14228256 : s1 = y01 + ( s1 * C55 );
686 14228256 : s3 = s1 - t;
687 14228256 : s1 = s1 + t;
688 14228256 : t = ( s4 + s2 ) * C51;
689 14228256 : s4 = t + ( s4 * C52 );
690 14228256 : s2 = t + ( s2 * C53 );
691 :
692 : /* combination */
693 14228256 : y04 = r1 + s2;
694 14228256 : y16 = r1 - s2;
695 14228256 : y08 = r3 - s4;
696 14228256 : y12 = r3 + s4;
697 14228256 : y05 = s1 - r2;
698 14228256 : y17 = s1 + r2;
699 14228256 : y09 = s3 + r4;
700 14228256 : y13 = s3 - r4;
701 :
702 : /* real part */
703 14228256 : r1 = vec[2] + vec[18];
704 14228256 : r4 = vec[2] - vec[18];
705 14228256 : r3 = vec[14] + vec[6];
706 14228256 : r2 = vec[14] - vec[6];
707 14228256 : t = ( r1 - r3 ) * C54;
708 14228256 : r1 = r1 + r3;
709 14228256 : y02 = vec[10] + r1;
710 14228256 : r1 = y02 + ( r1 * C55 );
711 14228256 : r3 = r1 - t;
712 14228256 : r1 = r1 + t;
713 14228256 : t = ( r4 + r2 ) * C51;
714 14228256 : r4 = t + ( r4 * C52 );
715 14228256 : r2 = t + ( r2 * C53 );
716 :
717 : /* imaginary part */
718 14228256 : s1 = vec[3] + vec[19];
719 14228256 : s4 = vec[3] - vec[19];
720 14228256 : s3 = vec[15] + vec[7];
721 14228256 : s2 = vec[15] - vec[7];
722 14228256 : t = ( s1 - s3 ) * C54;
723 14228256 : s1 = s1 + s3;
724 14228256 : y03 = vec[11] + s1;
725 14228256 : s1 = y03 + ( s1 * C55 );
726 14228256 : s3 = s1 - t;
727 14228256 : s1 = s1 + t;
728 14228256 : t = ( s4 + s2 ) * C51;
729 14228256 : s4 = t + ( s4 * C52 );
730 14228256 : s2 = t + ( s2 * C53 );
731 :
732 : /* combination */
733 14228256 : y06 = r1 + s2;
734 14228256 : y18 = r1 - s2;
735 14228256 : y10 = r3 - s4;
736 14228256 : y14 = r3 + s4;
737 14228256 : y07 = s1 - r2;
738 14228256 : y19 = s1 + r2;
739 14228256 : y11 = s3 + r4;
740 14228256 : y15 = s3 - r4;
741 :
742 : /* 5 fft2 stages */
743 14228256 : vec[0] = y00 + y02;
744 14228256 : vec[1] = y01 + y03;
745 14228256 : vec[2] = y12 - y14;
746 14228256 : vec[3] = y13 - y15;
747 14228256 : vec[4] = y04 + y06;
748 14228256 : vec[5] = y05 + y07;
749 14228256 : vec[6] = y16 - y18;
750 14228256 : vec[7] = y17 - y19;
751 14228256 : vec[8] = y08 + y10;
752 14228256 : vec[9] = y09 + y11;
753 14228256 : vec[10] = y00 - y02;
754 14228256 : vec[11] = y01 - y03;
755 14228256 : vec[12] = y12 + y14;
756 14228256 : vec[13] = y13 + y15;
757 14228256 : vec[14] = y04 - y06;
758 14228256 : vec[15] = y05 - y07;
759 14228256 : vec[16] = y16 + y18;
760 14228256 : vec[17] = y17 + y19;
761 14228256 : vec[18] = y08 - y10;
762 14228256 : vec[19] = y09 - y11;
763 :
764 14228256 : return;
765 : }
766 :
767 : /**
768 : * \brief Function performs a complex 20-point FFT
769 : * The FFT is performed inplace. The result of the FFT
770 : * is scaled by SCALEFACTOR20 bits.
771 : *
772 : * WOPS FLC version: 1509 cycles
773 : * WOPS with 32x16 bit multiplications: 432 cycles
774 : *
775 : * \param [i/o] re real input / output
776 : * \param [i/o] im imag input / output
777 : * \param [i ] s stride real and imag input / output
778 : *
779 : * \return void
780 : */
781 16731354 : static void fft20( float *signal )
782 : {
783 16731354 : const int16_t s = 2;
784 16731354 : float *re = signal, *im = signal + 1;
785 : float r1, r2, r3, r4;
786 : float s1, s2, s3, s4;
787 : float x0, x1, x2, x3, x4;
788 : float t, t0, t1, t2, t3, t4, t5, t6, t7;
789 : float y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
790 : float y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
791 : float y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
792 : float y30, y31, y32, y33, y34, y35, y36, y37, y38, y39;
793 :
794 : /* 1. FFT5 stage */
795 :
796 : /* real part */
797 16731354 : x0 = re[s * 0];
798 16731354 : x1 = re[s * 16];
799 16731354 : x2 = re[s * 12];
800 16731354 : x3 = re[s * 8];
801 16731354 : x4 = re[s * 4];
802 16731354 : r1 = x1 + x4;
803 16731354 : r4 = x1 - x4;
804 16731354 : r3 = x2 + x3;
805 16731354 : r2 = x2 - x3;
806 16731354 : t = ( r1 - r3 ) * C54;
807 16731354 : r1 = r1 + r3;
808 16731354 : y00 = x0 + r1;
809 16731354 : r1 = y00 + ( r1 * C55 );
810 16731354 : r3 = r1 - t;
811 16731354 : r1 = r1 + t;
812 16731354 : t = ( r4 + r2 ) * C51;
813 16731354 : r4 = t + ( r4 * C52 );
814 16731354 : r2 = t + ( r2 * C53 );
815 :
816 : /* imaginary part */
817 16731354 : x0 = im[s * 0];
818 16731354 : x1 = im[s * 16];
819 16731354 : x2 = im[s * 12];
820 16731354 : x3 = im[s * 8];
821 16731354 : x4 = im[s * 4];
822 16731354 : s1 = x1 + x4;
823 16731354 : s4 = x1 - x4;
824 16731354 : s3 = x2 + x3;
825 16731354 : s2 = x2 - x3;
826 16731354 : t = ( s1 - s3 ) * C54;
827 16731354 : s1 = ( s1 + s3 );
828 16731354 : y01 = ( x0 + s1 );
829 16731354 : s1 = y01 + ( s1 * C55 );
830 16731354 : s3 = ( s1 - t );
831 16731354 : s1 = ( s1 + t );
832 16731354 : t = ( s4 + s2 ) * C51;
833 16731354 : s4 = t + ( s4 * C52 );
834 16731354 : s2 = t + ( s2 * C53 );
835 :
836 : /* combination */
837 16731354 : y08 = ( r1 + s2 );
838 16731354 : y32 = ( r1 - s2 );
839 16731354 : y16 = ( r3 - s4 );
840 16731354 : y24 = ( r3 + s4 );
841 :
842 16731354 : y09 = ( s1 - r2 );
843 16731354 : y33 = ( s1 + r2 );
844 16731354 : y17 = ( s3 + r4 );
845 16731354 : y25 = ( s3 - r4 );
846 :
847 : /* 2. FFT5 stage */
848 :
849 : /* real part */
850 16731354 : x0 = re[s * 5];
851 16731354 : x1 = re[s * 1];
852 16731354 : x2 = re[s * 17];
853 16731354 : x3 = re[s * 13];
854 16731354 : x4 = re[s * 9];
855 16731354 : r1 = ( x1 + x4 );
856 16731354 : r4 = ( x1 - x4 );
857 16731354 : r3 = ( x2 + x3 );
858 16731354 : r2 = ( x2 - x3 );
859 16731354 : t = ( r1 - r3 ) * C54;
860 16731354 : r1 = ( r1 + r3 );
861 16731354 : y02 = ( x0 + r1 );
862 16731354 : r1 = y02 + ( r1 * C55 );
863 16731354 : r3 = ( r1 - t );
864 16731354 : r1 = ( r1 + t );
865 16731354 : t = ( r4 + r2 ) * C51;
866 16731354 : r4 = t + ( r4 * C52 );
867 16731354 : r2 = t + ( r2 * C53 );
868 :
869 : /* imaginary part */
870 16731354 : x0 = im[s * 5];
871 16731354 : x1 = im[s * 1];
872 16731354 : x2 = im[s * 17];
873 16731354 : x3 = im[s * 13];
874 16731354 : x4 = im[s * 9];
875 16731354 : s1 = ( x1 + x4 );
876 16731354 : s4 = ( x1 - x4 );
877 16731354 : s3 = ( x2 + x3 );
878 16731354 : s2 = ( x2 - x3 );
879 16731354 : t = ( s1 - s3 ) * C54;
880 16731354 : s1 = ( s1 + s3 );
881 16731354 : y03 = ( x0 + s1 );
882 16731354 : s1 = y03 + ( s1 * C55 );
883 16731354 : s3 = ( s1 - t );
884 16731354 : s1 = ( s1 + t );
885 16731354 : t = ( s4 + s2 ) * C51;
886 16731354 : s4 = t + ( s4 * C52 );
887 16731354 : s2 = t + ( s2 * C53 );
888 :
889 : /* combination */
890 16731354 : y10 = ( r1 + s2 );
891 16731354 : y34 = ( r1 - s2 );
892 16731354 : y18 = ( r3 - s4 );
893 16731354 : y26 = ( r3 + s4 );
894 :
895 16731354 : y11 = ( s1 - r2 );
896 16731354 : y35 = ( s1 + r2 );
897 16731354 : y19 = ( s3 + r4 );
898 16731354 : y27 = ( s3 - r4 );
899 :
900 : /* 3. FFT5 stage */
901 :
902 : /* real part */
903 16731354 : x0 = re[s * 10];
904 16731354 : x1 = re[s * 6];
905 16731354 : x2 = re[s * 2];
906 16731354 : x3 = re[s * 18];
907 16731354 : x4 = re[s * 14];
908 16731354 : r1 = ( x1 + x4 );
909 16731354 : r4 = ( x1 - x4 );
910 16731354 : r3 = ( x2 + x3 );
911 16731354 : r2 = ( x2 - x3 );
912 16731354 : t = ( r1 - r3 ) * C54;
913 16731354 : r1 = ( r1 + r3 );
914 16731354 : y04 = ( x0 + r1 );
915 16731354 : r1 = y04 + ( r1 * C55 );
916 16731354 : r3 = ( r1 - t );
917 16731354 : r1 = ( r1 + t );
918 16731354 : t = ( r4 + r2 ) * C51;
919 16731354 : r4 = t + ( r4 * C52 );
920 16731354 : r2 = t + ( r2 * C53 );
921 :
922 : /* imaginary part */
923 16731354 : x0 = im[s * 10];
924 16731354 : x1 = im[s * 6];
925 16731354 : x2 = im[s * 2];
926 16731354 : x3 = im[s * 18];
927 16731354 : x4 = im[s * 14];
928 16731354 : s1 = ( x1 + x4 );
929 16731354 : s4 = ( x1 - x4 );
930 16731354 : s3 = ( x2 + x3 );
931 16731354 : s2 = ( x2 - x3 );
932 16731354 : t = ( s1 - s3 ) * C54;
933 16731354 : s1 = ( s1 + s3 );
934 16731354 : y05 = ( x0 + s1 );
935 16731354 : s1 = y05 + ( s1 * C55 );
936 16731354 : s3 = ( s1 - t );
937 16731354 : s1 = ( s1 + t );
938 16731354 : t = ( s4 + s2 ) * C51;
939 16731354 : s4 = t + ( s4 * C52 );
940 16731354 : s2 = t + ( s2 * C53 );
941 :
942 : /* combination */
943 16731354 : y12 = ( r1 + s2 );
944 16731354 : y36 = ( r1 - s2 );
945 16731354 : y20 = ( r3 - s4 );
946 16731354 : y28 = ( r3 + s4 );
947 :
948 16731354 : y13 = ( s1 - r2 );
949 16731354 : y37 = ( s1 + r2 );
950 16731354 : y21 = ( s3 + r4 );
951 16731354 : y29 = ( s3 - r4 );
952 :
953 : /* 4. FFT5 stage */
954 :
955 : /* real part */
956 16731354 : x0 = re[s * 15];
957 16731354 : x1 = re[s * 11];
958 16731354 : x2 = re[s * 7];
959 16731354 : x3 = re[s * 3];
960 16731354 : x4 = re[s * 19];
961 16731354 : r1 = ( x1 + x4 );
962 16731354 : r4 = ( x1 - x4 );
963 16731354 : r3 = ( x2 + x3 );
964 16731354 : r2 = ( x2 - x3 );
965 16731354 : t = ( r1 - r3 ) * C54;
966 16731354 : r1 = ( r1 + r3 );
967 16731354 : y06 = ( x0 + r1 );
968 16731354 : r1 = y06 + ( r1 * C55 );
969 16731354 : r3 = ( r1 - t );
970 16731354 : r1 = ( r1 + t );
971 16731354 : t = ( r4 + r2 ) * C51;
972 16731354 : r4 = t + ( r4 * C52 );
973 16731354 : r2 = t + ( r2 * C53 );
974 :
975 : /* imaginary part */
976 16731354 : x0 = im[s * 15];
977 16731354 : x1 = im[s * 11];
978 16731354 : x2 = im[s * 7];
979 16731354 : x3 = im[s * 3];
980 16731354 : x4 = im[s * 19];
981 16731354 : s1 = ( x1 + x4 );
982 16731354 : s4 = ( x1 - x4 );
983 16731354 : s3 = ( x2 + x3 );
984 16731354 : s2 = ( x2 - x3 );
985 16731354 : t = ( s1 - s3 ) * C54;
986 16731354 : s1 = ( s1 + s3 );
987 16731354 : y07 = ( x0 + s1 );
988 16731354 : s1 = y07 + ( s1 * C55 );
989 16731354 : s3 = ( s1 - t );
990 16731354 : s1 = ( s1 + t );
991 16731354 : t = ( s4 + s2 ) * C51;
992 16731354 : s4 = t + ( s4 * C52 );
993 16731354 : s2 = t + ( s2 * C53 );
994 :
995 : /* combination */
996 16731354 : y14 = ( r1 + s2 );
997 16731354 : y38 = ( r1 - s2 );
998 16731354 : y22 = ( r3 - s4 );
999 16731354 : y30 = ( r3 + s4 );
1000 :
1001 16731354 : y15 = ( s1 - r2 );
1002 16731354 : y39 = ( s1 + r2 );
1003 16731354 : y23 = ( s3 + r4 );
1004 16731354 : y31 = ( s3 - r4 );
1005 :
1006 :
1007 : /* 1. FFT4 stage */
1008 :
1009 : /* Pre-additions */
1010 16731354 : t0 = ( y00 + y04 );
1011 16731354 : t2 = ( y00 - y04 );
1012 16731354 : t1 = ( y01 + y05 );
1013 16731354 : t3 = ( y01 - y05 );
1014 16731354 : t4 = ( y02 + y06 );
1015 16731354 : t7 = ( y02 - y06 );
1016 16731354 : t5 = ( y07 + y03 );
1017 16731354 : t6 = ( y07 - y03 );
1018 :
1019 : /* Post-additions */
1020 16731354 : re[s * 0] = ( t0 + t4 );
1021 16731354 : im[s * 0] = ( t1 + t5 );
1022 16731354 : re[s * 5] = ( t2 - t6 );
1023 16731354 : im[s * 5] = ( t3 - t7 );
1024 16731354 : re[s * 10] = ( t0 - t4 );
1025 16731354 : im[s * 10] = ( t1 - t5 );
1026 16731354 : re[s * 15] = ( t2 + t6 );
1027 16731354 : im[s * 15] = ( t3 + t7 );
1028 :
1029 : /* 2. FFT4 stage */
1030 :
1031 : /* Pre-additions */
1032 16731354 : t0 = ( y08 + y12 );
1033 16731354 : t2 = ( y08 - y12 );
1034 16731354 : t1 = ( y09 + y13 );
1035 16731354 : t3 = ( y09 - y13 );
1036 16731354 : t4 = ( y10 + y14 );
1037 16731354 : t7 = ( y10 - y14 );
1038 16731354 : t5 = ( y15 + y11 );
1039 16731354 : t6 = ( y15 - y11 );
1040 :
1041 : /* Post-additions */
1042 16731354 : re[s * 4] = ( t0 + t4 );
1043 16731354 : im[s * 4] = ( t1 + t5 );
1044 16731354 : re[s * 9] = ( t2 - t6 );
1045 16731354 : im[s * 9] = ( t3 - t7 );
1046 16731354 : re[s * 14] = ( t0 - t4 );
1047 16731354 : im[s * 14] = ( t1 - t5 );
1048 16731354 : re[s * 19] = ( t2 + t6 );
1049 16731354 : im[s * 19] = ( t3 + t7 );
1050 :
1051 :
1052 : /* 3. FFT4 stage */
1053 :
1054 : /* Pre-additions */
1055 16731354 : t0 = ( y16 + y20 );
1056 16731354 : t2 = ( y16 - y20 );
1057 16731354 : t1 = ( y17 + y21 );
1058 16731354 : t3 = ( y17 - y21 );
1059 16731354 : t4 = ( y18 + y22 );
1060 16731354 : t7 = ( y18 - y22 );
1061 16731354 : t5 = ( y23 + y19 );
1062 16731354 : t6 = ( y23 - y19 );
1063 :
1064 : /* Post-additions */
1065 16731354 : re[s * 8] = ( t0 + t4 );
1066 16731354 : im[s * 8] = ( t1 + t5 );
1067 16731354 : re[s * 13] = ( t2 - t6 );
1068 16731354 : im[s * 13] = ( t3 - t7 );
1069 16731354 : re[s * 18] = ( t0 - t4 );
1070 16731354 : im[s * 18] = ( t1 - t5 );
1071 16731354 : re[s * 3] = ( t2 + t6 );
1072 16731354 : im[s * 3] = ( t3 + t7 );
1073 :
1074 : /* 4. FFT4 stage */
1075 :
1076 : /* Pre-additions */
1077 16731354 : t0 = ( y24 + y28 );
1078 16731354 : t2 = ( y24 - y28 );
1079 16731354 : t1 = ( y25 + y29 );
1080 16731354 : t3 = ( y25 - y29 );
1081 16731354 : t4 = ( y26 + y30 );
1082 16731354 : t7 = ( y26 - y30 );
1083 16731354 : t5 = ( y31 + y27 );
1084 16731354 : t6 = ( y31 - y27 );
1085 :
1086 : /* Post-additions */
1087 16731354 : re[s * 12] = ( t0 + t4 );
1088 16731354 : im[s * 12] = ( t1 + t5 );
1089 16731354 : re[s * 17] = ( t2 - t6 );
1090 16731354 : im[s * 17] = ( t3 - t7 );
1091 16731354 : re[s * 2] = ( t0 - t4 );
1092 16731354 : im[s * 2] = ( t1 - t5 );
1093 16731354 : re[s * 7] = ( t2 + t6 );
1094 16731354 : im[s * 7] = ( t3 + t7 );
1095 :
1096 : /* 5. FFT4 stage */
1097 :
1098 : /* Pre-additions */
1099 16731354 : t0 = ( y32 + y36 );
1100 16731354 : t2 = ( y32 - y36 );
1101 16731354 : t1 = ( y33 + y37 );
1102 16731354 : t3 = ( y33 - y37 );
1103 16731354 : t4 = ( y34 + y38 );
1104 16731354 : t7 = ( y34 - y38 );
1105 16731354 : t5 = ( y39 + y35 );
1106 16731354 : t6 = ( y39 - y35 );
1107 :
1108 : /* Post-additions */
1109 16731354 : re[s * 16] = ( t0 + t4 );
1110 16731354 : im[s * 16] = ( t1 + t5 );
1111 16731354 : re[s * 1] = ( t2 - t6 );
1112 16731354 : im[s * 1] = ( t3 - t7 );
1113 16731354 : re[s * 6] = ( t0 - t4 );
1114 16731354 : im[s * 6] = ( t1 - t5 );
1115 16731354 : re[s * 11] = ( t2 + t6 );
1116 16731354 : im[s * 11] = ( t3 + t7 );
1117 :
1118 16731354 : return;
1119 : }
1120 :
1121 : /*******************************************************************************
1122 : Functionname: fft30
1123 : *******************************************************************************
1124 :
1125 : Description: 30-point FFT.
1126 :
1127 : Arguments: in - pointer to data (interleaved real / imaginary parts)
1128 :
1129 : Return: none
1130 :
1131 : *******************************************************************************/
1132 :
1133 129123556 : static void fft30( float *restrict in )
1134 : {
1135 : int16_t i;
1136 : float temp[60];
1137 129123556 : float *temp_l = temp;
1138 129123556 : float *temp_lu = temp + 2 * 8;
1139 129123556 : float *temp_h = temp + 2 * 15;
1140 129123556 : float *temp_hu = temp + 2 * 15 + 2 * 8;
1141 129123556 : float *in_l = in + 2 * 0;
1142 129123556 : float *in_h = in + 2 * 15;
1143 1032988448 : for ( i = 0; i < 7; i++ )
1144 : {
1145 903864892 : *temp_l++ = *in_l++;
1146 903864892 : *temp_l++ = *in_l++;
1147 903864892 : *temp_h++ = *in_h++;
1148 903864892 : *temp_h++ = *in_h++;
1149 903864892 : *temp_l++ = *in_h++;
1150 903864892 : *temp_l++ = *in_h++;
1151 903864892 : *temp_h++ = *in_l++;
1152 903864892 : *temp_h++ = *in_l++;
1153 : }
1154 129123556 : *temp_l++ = *in_l++;
1155 129123556 : *temp_l++ = *in_l++;
1156 129123556 : *temp_h++ = *in_h++;
1157 129123556 : *temp_h++ = *in_h++;
1158 129123556 : temp_l = temp;
1159 129123556 : temp_h = temp + 30;
1160 129123556 : fft15( temp_l );
1161 129123556 : fft15( temp_h );
1162 :
1163 129123556 : in_l = in + 2 * 0;
1164 129123556 : in_h = in + 2 * 15;
1165 1032988448 : for ( i = 0; i < 7; i++ )
1166 : {
1167 903864892 : *in_l++ = *temp_l + *temp_h;
1168 903864892 : *in_h++ = *temp_l++ - *temp_h++;
1169 903864892 : *in_l++ = *temp_l + *temp_h;
1170 903864892 : *in_h++ = *temp_l++ - *temp_h++;
1171 :
1172 903864892 : *in_h++ = *temp_lu + *temp_hu;
1173 903864892 : *in_l++ = *temp_lu++ - *temp_hu++;
1174 903864892 : *in_h++ = *temp_lu + *temp_hu;
1175 903864892 : *in_l++ = *temp_lu++ - *temp_hu++;
1176 : }
1177 129123556 : *in_l++ = *temp_l + *temp_h;
1178 129123556 : *in_h++ = *temp_l++ - *temp_h++;
1179 129123556 : *in_l++ = *temp_l + *temp_h;
1180 129123556 : *in_h++ = *temp_l++ - *temp_h++;
1181 :
1182 129123556 : return;
1183 : }
1184 :
1185 : /*-------------------------------------------------------------------*
1186 : * fft_cldfb()
1187 : *
1188 : * Interface functions FFT subroutines
1189 : *--------------------------------------------------------------------*/
1190 167806990 : void fft_cldfb(
1191 : float *data, /* i/o: input/output vector */
1192 : const int16_t size /* size of fft operation */
1193 : )
1194 : {
1195 :
1196 167806990 : switch ( size )
1197 : {
1198 0 : case 5:
1199 0 : fft5s( data, 2 );
1200 0 : break;
1201 7719738 : case 8:
1202 7719738 : fft8( data );
1203 7719738 : break;
1204 14228256 : case 10:
1205 14228256 : fft10( data );
1206 14228256 : break;
1207 4086 : case 16:
1208 4086 : fft16( data );
1209 4086 : break;
1210 16731354 : case 20:
1211 16731354 : fft20( data );
1212 16731354 : break;
1213 129123556 : case 30:
1214 129123556 : fft30( data );
1215 129123556 : break;
1216 :
1217 0 : default:
1218 0 : assert( 0 );
1219 : break;
1220 : }
1221 :
1222 167806990 : return;
1223 : }
|