Line data Source code
1 : /******************************************************************************************************
2 :
3 : (C) 2022-2025 IVAS codec Public Collaboration with portions copyright Dolby International AB, Ericsson AB,
4 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
5 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
6 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
7 : contributors to this repository. All Rights Reserved.
8 :
9 : This software is protected by copyright law and by international treaties.
10 : The IVAS codec Public Collaboration consisting of Dolby International AB, Ericsson AB,
11 : Fraunhofer-Gesellschaft zur Foerderung der angewandten Forschung e.V., Huawei Technologies Co. LTD.,
12 : Koninklijke Philips N.V., Nippon Telegraph and Telephone Corporation, Nokia Technologies Oy, Orange,
13 : Panasonic Holdings Corporation, Qualcomm Technologies, Inc., VoiceAge Corporation, and other
14 : contributors to this repository retain full ownership rights in their respective contributions in
15 : the software. This notice grants no license of any kind, including but not limited to patent
16 : license, nor is any license granted by implication, estoppel or otherwise.
17 :
18 : Contributors are required to enter into the IVAS codec Public Collaboration agreement before making
19 : contributions.
20 :
21 : This software is provided "AS IS", without any express or implied warranties. The software is in the
22 : development stage. It is intended exclusively for experts who have experience with such software and
23 : solely for the purpose of inspection. All implied warranties of non-infringement, merchantability
24 : and fitness for a particular purpose are hereby disclaimed and excluded.
25 :
26 : Any dispute, controversy or claim arising under or in relation to providing this software shall be
27 : submitted to and settled by the final, binding jurisdiction of the courts of Munich, Germany in
28 : accordance with the laws of the Federal Republic of Germany excluding its conflict of law rules and
29 : the United Nations Convention on Contracts on the International Sales of Goods.
30 :
31 : *******************************************************************************************************/
32 :
33 : /*====================================================================================
34 : EVS Codec 3GPP TS26.443 Nov 04, 2021. Version 12.14.0 / 13.10.0 / 14.6.0 / 15.4.0 / 16.3.0
35 : ====================================================================================*/
36 :
37 : #include <stdint.h>
38 : #include "options.h"
39 : #include <assert.h>
40 : #include "prot.h"
41 : #include "ivas_cnst.h"
42 : #include "wmc_auto.h"
43 :
44 : #if __STDC_VERSION__ >= 199901L
45 : #if defined __ICL
46 : #define restrict __restrict
47 : #endif
48 : #else
49 : #define restrict
50 : #endif
51 :
52 :
53 : static void fft8( float *vec );
54 : static void fft10( float *vec );
55 : static void fft16( float *vec );
56 : static void fft20( float *vec );
57 : static void fft30( float *vec );
58 : static void fft5s( float *x, const int16_t stride );
59 :
60 :
61 : #define COS_PI_DIV8 9.238795325112867e-1f
62 : #define COS_3PI_DIV8 3.826834323650898e-1f
63 : #define SQRT2PLUS1 2.414213562373095f
64 : #define SQRT2MINUS1 4.142135623730952e-1f
65 :
66 :
67 : /*******************************************************************************
68 : Functionname: fft8
69 : *******************************************************************************
70 :
71 : Description: 8-point FFT. Complex-valued input takes 52 real additions
72 : and 4 real multiplications.
73 :
74 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
75 :
76 : Return: none
77 :
78 : *******************************************************************************/
79 8692744 : static void fft8( float *restrict vec )
80 : {
81 : float temp1[16];
82 : float temp2[16];
83 :
84 :
85 : /* Pre-additions */
86 8692744 : temp1[0] = vec[0] + vec[8];
87 8692744 : temp1[2] = vec[0] - vec[8];
88 8692744 : temp1[1] = vec[1] + vec[9];
89 8692744 : temp1[3] = vec[1] - vec[9];
90 8692744 : temp1[4] = vec[2] + vec[10];
91 8692744 : temp1[6] = vec[2] - vec[10];
92 8692744 : temp1[5] = vec[3] + vec[11];
93 8692744 : temp1[7] = vec[3] - vec[11];
94 8692744 : temp1[8] = vec[4] + vec[12];
95 8692744 : temp1[10] = vec[4] - vec[12];
96 8692744 : temp1[9] = vec[5] + vec[13];
97 8692744 : temp1[11] = vec[5] - vec[13];
98 8692744 : temp1[12] = vec[6] + vec[14];
99 8692744 : temp1[14] = vec[6] - vec[14];
100 8692744 : temp1[13] = vec[7] + vec[15];
101 8692744 : temp1[15] = vec[7] - vec[15];
102 :
103 : /* Pre-additions and core multiplications */
104 8692744 : temp2[0] = temp1[0] + temp1[8];
105 8692744 : temp2[4] = temp1[0] - temp1[8];
106 8692744 : temp2[1] = temp1[1] + temp1[9];
107 8692744 : temp2[5] = temp1[1] - temp1[9];
108 8692744 : temp2[8] = temp1[2] - temp1[11];
109 8692744 : temp2[10] = temp1[2] + temp1[11];
110 8692744 : temp2[9] = temp1[3] + temp1[10];
111 8692744 : temp2[11] = temp1[3] - temp1[10];
112 8692744 : temp2[2] = temp1[4] + temp1[12];
113 8692744 : temp2[7] = temp1[4] - temp1[12];
114 8692744 : temp2[3] = temp1[5] + temp1[13];
115 8692744 : temp2[6] = temp1[13] - temp1[5];
116 :
117 8692744 : temp1[1] = temp1[6] + temp1[14];
118 8692744 : temp1[2] = temp1[6] - temp1[14];
119 8692744 : temp1[0] = temp1[7] + temp1[15];
120 8692744 : temp1[3] = temp1[7] - temp1[15];
121 :
122 8692744 : temp2[12] = ( temp1[0] + temp1[2] ) * INV_SQRT2;
123 8692744 : temp2[14] = ( temp1[0] - temp1[2] ) * INV_SQRT2;
124 8692744 : temp2[13] = ( temp1[3] - temp1[1] ) * INV_SQRT2;
125 8692744 : temp2[15] = ( temp1[1] + temp1[3] ) * -INV_SQRT2;
126 :
127 : /* Post-additions */
128 8692744 : vec[0] = temp2[0] + temp2[2];
129 8692744 : vec[8] = temp2[0] - temp2[2];
130 8692744 : vec[1] = temp2[1] + temp2[3];
131 8692744 : vec[9] = temp2[1] - temp2[3];
132 8692744 : vec[4] = temp2[4] - temp2[6];
133 8692744 : vec[12] = temp2[4] + temp2[6];
134 8692744 : vec[5] = temp2[5] - temp2[7];
135 8692744 : vec[13] = temp2[5] + temp2[7];
136 8692744 : vec[6] = temp2[8] + temp2[14];
137 8692744 : vec[14] = temp2[8] - temp2[14];
138 8692744 : vec[7] = temp2[9] + temp2[15];
139 8692744 : vec[15] = temp2[9] - temp2[15];
140 8692744 : vec[2] = temp2[10] + temp2[12];
141 8692744 : vec[10] = temp2[10] - temp2[12];
142 8692744 : vec[3] = temp2[11] + temp2[13];
143 8692744 : vec[11] = temp2[11] - temp2[13];
144 :
145 8692744 : return;
146 : }
147 :
148 :
149 : /*******************************************************************************
150 : Functionname: fft16
151 : *******************************************************************************
152 :
153 : Description: 16-point FFT. Complex-valued input takes 144 real additions and
154 : 24 real multiplications.
155 :
156 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
157 :
158 : Return: none
159 :
160 : *******************************************************************************/
161 : /* fast implementation, completely unrolled and inlined */
162 10176 : static void fft16( float *restrict vec )
163 : {
164 : float temp10, temp11, temp12, temp13, temp14, temp15, temp16, temp17,
165 : temp18, temp19, temp110, temp111, temp112, temp113, temp114, temp115;
166 : float temp20, temp21, temp22, temp23, temp24, temp25, temp26, temp27,
167 : temp28, temp29, temp210, temp211, temp212, temp213, temp214, temp215;
168 : float vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7,
169 : vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15;
170 :
171 :
172 : /* even */
173 10176 : vec0 = vec[0] + vec[16];
174 10176 : vec1 = vec[1] + vec[17];
175 10176 : vec2 = vec[2] + vec[18];
176 10176 : vec3 = vec[3] + vec[19];
177 10176 : vec4 = vec[4] + vec[20];
178 10176 : vec5 = vec[5] + vec[21];
179 10176 : vec6 = vec[6] + vec[22];
180 10176 : vec7 = vec[7] + vec[23];
181 10176 : vec8 = vec[8] + vec[24];
182 10176 : vec9 = vec[9] + vec[25];
183 10176 : vec10 = vec[10] + vec[26];
184 10176 : vec11 = vec[11] + vec[27];
185 10176 : vec12 = vec[12] + vec[28];
186 10176 : vec13 = vec[13] + vec[29];
187 10176 : vec14 = vec[14] + vec[30];
188 10176 : vec15 = vec[15] + vec[31];
189 :
190 : /* Pre-additions */
191 10176 : temp10 = vec0 + vec8;
192 10176 : temp12 = vec0 - vec8;
193 10176 : temp11 = vec1 + vec9;
194 10176 : temp13 = vec1 - vec9;
195 10176 : temp14 = vec2 + vec10;
196 10176 : temp16 = vec2 - vec10;
197 10176 : temp15 = vec3 + vec11;
198 10176 : temp17 = vec3 - vec11;
199 10176 : temp18 = vec4 + vec12;
200 10176 : temp110 = vec4 - vec12;
201 10176 : temp19 = vec5 + vec13;
202 10176 : temp111 = vec5 - vec13;
203 10176 : temp112 = vec6 + vec14;
204 10176 : temp114 = vec6 - vec14;
205 10176 : temp113 = vec7 + vec15;
206 10176 : temp115 = vec7 - vec15;
207 :
208 : /* Pre-additions and core multiplications */
209 10176 : temp20 = temp10 + temp18;
210 10176 : temp24 = temp10 - temp18;
211 10176 : temp21 = temp11 + temp19;
212 10176 : temp25 = temp11 - temp19;
213 10176 : temp28 = temp12 - temp111;
214 10176 : temp210 = temp12 + temp111;
215 10176 : temp29 = temp13 + temp110;
216 10176 : temp211 = temp13 - temp110;
217 10176 : temp22 = temp14 + temp112;
218 10176 : temp27 = temp14 - temp112;
219 10176 : temp23 = temp15 + temp113;
220 10176 : temp26 = temp113 - temp15;
221 :
222 10176 : temp11 = temp16 + temp114;
223 10176 : temp12 = temp16 - temp114;
224 10176 : temp10 = temp17 + temp115;
225 10176 : temp13 = temp17 - temp115;
226 :
227 10176 : temp212 = ( temp10 + temp12 ) * INV_SQRT2;
228 10176 : temp214 = ( temp10 - temp12 ) * INV_SQRT2;
229 10176 : temp213 = ( temp13 - temp11 ) * INV_SQRT2;
230 10176 : temp215 = ( temp11 + temp13 ) * -INV_SQRT2;
231 :
232 :
233 : /* odd */
234 10176 : vec0 = vec[0] - vec[16];
235 10176 : vec1 = vec[1] - vec[17];
236 10176 : vec2 = vec[2] - vec[18];
237 10176 : vec3 = vec[3] - vec[19];
238 10176 : vec4 = vec[4] - vec[20];
239 10176 : vec5 = vec[5] - vec[21];
240 10176 : vec6 = vec[6] - vec[22];
241 10176 : vec7 = vec[7] - vec[23];
242 10176 : vec8 = vec[8] - vec[24];
243 10176 : vec9 = vec[9] - vec[25];
244 10176 : vec10 = vec[10] - vec[26];
245 10176 : vec11 = vec[11] - vec[27];
246 10176 : vec12 = vec[12] - vec[28];
247 10176 : vec13 = vec[13] - vec[29];
248 10176 : vec14 = vec[14] - vec[30];
249 10176 : vec15 = vec[15] - vec[31];
250 :
251 : /* Pre-additions and core multiplications */
252 10176 : temp19 = ( vec2 + vec14 ) * -COS_3PI_DIV8;
253 10176 : temp110 = ( vec2 - vec14 ) * COS_PI_DIV8;
254 10176 : temp18 = ( vec3 + vec15 ) * COS_3PI_DIV8;
255 10176 : temp111 = ( vec3 - vec15 ) * COS_PI_DIV8;
256 10176 : temp15 = ( vec4 + vec12 ) * -INV_SQRT2;
257 10176 : temp16 = ( vec4 - vec12 ) * INV_SQRT2;
258 10176 : temp14 = ( vec5 + vec13 ) * INV_SQRT2;
259 10176 : temp17 = ( vec5 - vec13 ) * INV_SQRT2;
260 10176 : temp113 = ( vec6 + vec10 ) * -COS_PI_DIV8;
261 10176 : temp114 = ( vec6 - vec10 ) * COS_3PI_DIV8;
262 10176 : temp112 = ( vec7 + vec11 ) * COS_PI_DIV8;
263 10176 : temp115 = ( vec7 - vec11 ) * COS_3PI_DIV8;
264 :
265 : /* Core multiplications */
266 10176 : vec2 = temp18 * SQRT2PLUS1 - temp112 * SQRT2MINUS1;
267 10176 : vec3 = temp19 * SQRT2PLUS1 - temp113 * SQRT2MINUS1;
268 10176 : vec4 = temp110 * SQRT2MINUS1 - temp114 * SQRT2PLUS1;
269 10176 : vec5 = temp111 * SQRT2MINUS1 - temp115 * SQRT2PLUS1;
270 :
271 : /* Post-additions */
272 10176 : temp18 += temp112;
273 10176 : temp19 += temp113;
274 10176 : temp110 += temp114;
275 10176 : temp111 += temp115;
276 :
277 10176 : vec6 = vec0 + temp14;
278 10176 : vec10 = vec0 - temp14;
279 10176 : vec7 = vec1 + temp15;
280 10176 : vec11 = vec1 - temp15;
281 :
282 10176 : vec12 = temp16 - vec9;
283 10176 : vec14 = temp16 + vec9;
284 10176 : vec13 = vec8 + temp17;
285 10176 : vec15 = vec8 - temp17;
286 :
287 10176 : temp10 = vec6 - vec14;
288 10176 : temp12 = vec6 + vec14;
289 10176 : temp11 = vec7 + vec15;
290 10176 : temp13 = vec7 - vec15;
291 10176 : temp14 = vec10 + vec12;
292 10176 : temp16 = vec10 - vec12;
293 10176 : temp15 = vec11 + vec13;
294 10176 : temp17 = vec11 - vec13;
295 :
296 10176 : vec10 = temp18 + temp110;
297 10176 : temp110 = temp18 - temp110;
298 10176 : vec11 = temp19 + temp111;
299 10176 : temp111 = temp19 - temp111;
300 :
301 10176 : temp112 = vec2 + vec4;
302 10176 : temp114 = vec2 - vec4;
303 10176 : temp113 = vec3 + vec5;
304 10176 : temp115 = vec3 - vec5;
305 :
306 :
307 : /* Post-additions */
308 10176 : *vec++ = temp20 + temp22;
309 10176 : *vec++ = temp21 + temp23;
310 10176 : *vec++ = temp12 + vec10;
311 10176 : *vec++ = temp13 + vec11;
312 10176 : *vec++ = temp210 + temp212;
313 10176 : *vec++ = temp211 + temp213;
314 10176 : *vec++ = temp10 + temp112;
315 10176 : *vec++ = temp11 + temp113;
316 10176 : *vec++ = temp24 - temp26;
317 10176 : *vec++ = temp25 - temp27;
318 10176 : *vec++ = temp16 + temp114;
319 10176 : *vec++ = temp17 + temp115;
320 10176 : *vec++ = temp28 + temp214;
321 10176 : *vec++ = temp29 + temp215;
322 10176 : *vec++ = temp14 + temp110;
323 10176 : *vec++ = temp15 + temp111;
324 10176 : *vec++ = temp20 - temp22;
325 10176 : *vec++ = temp21 - temp23;
326 10176 : *vec++ = temp12 - vec10;
327 10176 : *vec++ = temp13 - vec11;
328 10176 : *vec++ = temp210 - temp212;
329 10176 : *vec++ = temp211 - temp213;
330 10176 : *vec++ = temp10 - temp112;
331 10176 : *vec++ = temp11 - temp113;
332 10176 : *vec++ = temp24 + temp26;
333 10176 : *vec++ = temp25 + temp27;
334 10176 : *vec++ = temp16 - temp114;
335 10176 : *vec++ = temp17 - temp115;
336 10176 : *vec++ = temp28 - temp214;
337 10176 : *vec++ = temp29 - temp215;
338 10176 : *vec++ = temp14 - temp110;
339 10176 : *vec++ = temp15 - temp111;
340 :
341 10176 : return;
342 : }
343 :
344 :
345 : /*******************************************************************************
346 : Functionname: fft15
347 : *******************************************************************************
348 :
349 : Description: 15-point FFT. Complex-valued input takes 176 real additions
350 : and 34 real multiplications.
351 :
352 : Arguments: vec - pointer to data (interleaved real / imaginary parts)
353 :
354 : Return: none
355 :
356 : *******************************************************************************/
357 307011888 : static void fft15( float *restrict vec )
358 : {
359 :
360 : float r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15, r16, r17;
361 : float i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, i16, i17;
362 : float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, tmp9,
363 : tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17, tmp18, tmp19,
364 : tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27, tmp28, tmp29;
365 :
366 :
367 : /* Pre-additions real part */
368 307011888 : r1 = vec[2] + vec[8];
369 307011888 : r2 = vec[2] - vec[8];
370 307011888 : r3 = vec[4] + vec[16];
371 307011888 : r4 = vec[4] - vec[16];
372 307011888 : r5 = vec[6] + vec[24];
373 307011888 : r6 = vec[6] - vec[24];
374 307011888 : r7 = vec[10] + vec[20];
375 307011888 : r8 = vec[10] - vec[20];
376 307011888 : r9 = vec[12] + vec[18];
377 307011888 : r10 = vec[12] - vec[18];
378 307011888 : r11 = vec[14] + vec[26];
379 307011888 : r12 = vec[14] - vec[26];
380 307011888 : r13 = vec[22] + vec[28];
381 307011888 : r14 = vec[22] - vec[28];
382 :
383 307011888 : tmp2 = r1 + r3;
384 307011888 : tmp4 = r1 - r3;
385 307011888 : tmp6 = r2 + r14;
386 307011888 : tmp8 = r2 - r14;
387 307011888 : tmp10 = r4 + r12;
388 307011888 : tmp12 = r4 - r12;
389 307011888 : tmp14 = r5 + r9;
390 307011888 : tmp16 = r5 - r9;
391 307011888 : tmp18 = r11 + r13;
392 307011888 : tmp20 = r11 - r13;
393 :
394 : /* Pre-additions imaginary part */
395 307011888 : i1 = vec[3] + vec[9];
396 307011888 : i2 = vec[3] - vec[9];
397 307011888 : i3 = vec[5] + vec[17];
398 307011888 : i4 = vec[5] - vec[17];
399 307011888 : i5 = vec[7] + vec[25];
400 307011888 : i6 = vec[7] - vec[25];
401 307011888 : i7 = vec[11] + vec[21];
402 307011888 : i8 = vec[11] - vec[21];
403 307011888 : i9 = vec[13] + vec[19];
404 307011888 : i10 = vec[13] - vec[19];
405 307011888 : i11 = vec[15] + vec[27];
406 307011888 : i12 = vec[15] - vec[27];
407 307011888 : i13 = vec[23] + vec[29];
408 307011888 : i14 = vec[23] - vec[29];
409 :
410 307011888 : tmp3 = i1 + i3;
411 307011888 : tmp5 = i1 - i3;
412 307011888 : tmp7 = i2 + i14;
413 307011888 : tmp9 = i2 - i14;
414 307011888 : tmp11 = i4 + i12;
415 307011888 : tmp13 = i4 - i12;
416 307011888 : tmp15 = i5 + i9;
417 307011888 : tmp17 = i5 - i9;
418 307011888 : tmp19 = i11 + i13;
419 307011888 : tmp21 = i11 - i13;
420 :
421 :
422 : /* Pre-additions and core multiplications */
423 307011888 : tmp28 = tmp4 + tmp20;
424 307011888 : tmp29 = tmp5 + tmp21;
425 307011888 : r4 = tmp2 + tmp18;
426 307011888 : i4 = tmp3 + tmp19;
427 307011888 : r3 = ( r4 + tmp14 ) * -1.25f;
428 307011888 : i3 = ( i4 + tmp15 ) * -1.25f;
429 307011888 : r2 = ( tmp29 - i8 ) * -8.660254037844387e-1f;
430 307011888 : i2 = ( tmp28 - r8 ) * 8.660254037844387e-1f;
431 307011888 : r1 = r4 + r7;
432 307011888 : i1 = i4 + i7;
433 307011888 : r0 = r1 + vec[0] + tmp14;
434 307011888 : i0 = i1 + vec[1] + tmp15;
435 307011888 : r7 = tmp4 - tmp20;
436 307011888 : i7 = tmp5 - tmp21;
437 307011888 : r8 = ( tmp3 - tmp19 ) * -4.841229182759272e-1f;
438 307011888 : i8 = ( tmp2 - tmp18 ) * 4.841229182759272e-1f;
439 307011888 : tmp0 = tmp6 + r10;
440 307011888 : tmp1 = tmp7 + i10;
441 307011888 : tmp2 = r6 - tmp10;
442 307011888 : tmp3 = i6 - tmp11;
443 307011888 : r10 = tmp7 * -2.308262652881440f;
444 307011888 : i10 = tmp6 * 2.308262652881440f;
445 307011888 : r11 = tmp8 * 1.332676064001459f;
446 307011888 : i11 = tmp9 * 1.332676064001459f;
447 307011888 : r6 = ( r7 - tmp16 ) * 5.590169943749475e-1f;
448 307011888 : i6 = ( i7 - tmp17 ) * 5.590169943749475e-1f;
449 307011888 : r12 = ( tmp1 + tmp3 ) * 5.877852522924733e-1f;
450 307011888 : i12 = ( tmp0 + tmp2 ) * -5.877852522924733e-1f;
451 307011888 : r13 = ( tmp7 - tmp11 ) * -8.816778784387098e-1f;
452 307011888 : i13 = ( tmp6 - tmp10 ) * 8.816778784387098e-1f;
453 307011888 : r14 = ( tmp8 + tmp12 ) * 5.090369604551274e-1f;
454 307011888 : i14 = ( tmp9 + tmp13 ) * 5.090369604551274e-1f;
455 307011888 : r16 = tmp11 * 5.449068960040204e-1f;
456 307011888 : i16 = tmp10 * -5.449068960040204e-1f;
457 307011888 : r17 = tmp12 * 3.146021430912046e-1f;
458 307011888 : i17 = tmp13 * 3.146021430912046e-1f;
459 :
460 307011888 : r4 *= 1.875f;
461 307011888 : i4 *= 1.875f;
462 307011888 : r1 *= -1.5f;
463 307011888 : i1 *= -1.5f;
464 307011888 : r7 *= -8.385254915624212e-1f;
465 307011888 : i7 *= -8.385254915624212e-1f;
466 307011888 : r5 = tmp29 * 1.082531754730548f;
467 307011888 : i5 = tmp28 * -1.082531754730548f;
468 307011888 : r9 = tmp1 * 1.538841768587627f;
469 307011888 : i9 = tmp0 * -1.538841768587627f;
470 307011888 : r15 = tmp3 * 3.632712640026803e-1f;
471 307011888 : i15 = tmp2 * -3.632712640026803e-1f;
472 :
473 :
474 : /* Post-additions real part */
475 307011888 : tmp2 = r0 + r1;
476 307011888 : tmp4 = r3 + r6;
477 307011888 : tmp6 = r3 - r6;
478 307011888 : tmp8 = r4 + r5;
479 307011888 : tmp10 = r4 - r5;
480 307011888 : tmp12 = r7 + r8;
481 307011888 : tmp14 = r7 - r8;
482 307011888 : tmp16 = r13 + r16;
483 307011888 : tmp18 = r14 + r17;
484 307011888 : tmp20 = r10 - r13;
485 307011888 : tmp22 = r11 - r14;
486 307011888 : tmp24 = r12 + r15;
487 307011888 : tmp26 = r12 - r9;
488 :
489 307011888 : r1 = tmp2 + r2;
490 307011888 : r2 = tmp2 - r2;
491 307011888 : r3 = tmp4 + tmp26;
492 307011888 : r4 = tmp4 - tmp26;
493 307011888 : r5 = tmp6 + tmp24;
494 307011888 : r6 = tmp6 - tmp24;
495 307011888 : r7 = tmp16 + tmp18;
496 307011888 : r8 = tmp16 - tmp18;
497 307011888 : r9 = tmp20 - tmp22;
498 307011888 : r10 = tmp20 + tmp22;
499 307011888 : r11 = r1 + tmp8;
500 307011888 : r12 = r2 + tmp10;
501 307011888 : r13 = r11 - tmp12;
502 307011888 : r14 = r12 - tmp14;
503 307011888 : r15 = r12 + tmp14;
504 307011888 : r16 = r11 + tmp12;
505 :
506 : /* Post-additions imaginary part */
507 307011888 : tmp3 = i0 + i1;
508 307011888 : tmp5 = i3 + i6;
509 307011888 : tmp7 = i3 - i6;
510 307011888 : tmp9 = i4 + i5;
511 307011888 : tmp11 = i4 - i5;
512 307011888 : tmp13 = i7 + i8;
513 307011888 : tmp15 = i7 - i8;
514 307011888 : tmp17 = i13 + i16;
515 307011888 : tmp19 = i14 + i17;
516 307011888 : tmp21 = i10 - i13;
517 307011888 : tmp23 = i11 - i14;
518 307011888 : tmp25 = i12 + i15;
519 307011888 : tmp27 = i12 - i9;
520 :
521 307011888 : i1 = tmp3 + i2;
522 307011888 : i2 = tmp3 - i2;
523 307011888 : i3 = tmp5 + tmp27;
524 307011888 : i4 = tmp5 - tmp27;
525 307011888 : i5 = tmp7 + tmp25;
526 307011888 : i6 = tmp7 - tmp25;
527 307011888 : i7 = tmp17 + tmp19;
528 307011888 : i8 = tmp17 - tmp19;
529 307011888 : i9 = tmp21 - tmp23;
530 307011888 : i10 = tmp21 + tmp23;
531 307011888 : i11 = i1 + tmp9;
532 307011888 : i12 = i2 + tmp11;
533 307011888 : i13 = i11 - tmp13;
534 307011888 : i14 = i12 - tmp15;
535 307011888 : i15 = i12 + tmp15;
536 307011888 : i16 = i11 + tmp13;
537 :
538 307011888 : *vec++ = r0;
539 307011888 : *vec++ = i0;
540 307011888 : *vec++ = r13 + r5 + r7;
541 307011888 : *vec++ = i13 + i5 + i7;
542 307011888 : *vec++ = r15 + r3 - r9;
543 307011888 : *vec++ = i15 + i3 - i9;
544 307011888 : *vec++ = r0 + r4;
545 307011888 : *vec++ = i0 + i4;
546 307011888 : *vec++ = r13 + r6 - r7;
547 307011888 : *vec++ = i13 + i6 - i7;
548 307011888 : *vec++ = r2;
549 307011888 : *vec++ = i2;
550 307011888 : *vec++ = r0 + r5;
551 307011888 : *vec++ = i0 + i5;
552 307011888 : *vec++ = r16 + r3 - r10;
553 307011888 : *vec++ = i16 + i3 - i10;
554 307011888 : *vec++ = r15 + r4 + r9;
555 307011888 : *vec++ = i15 + i4 + i9;
556 307011888 : *vec++ = r0 + r6;
557 307011888 : *vec++ = i0 + i6;
558 307011888 : *vec++ = r1;
559 307011888 : *vec++ = i1;
560 307011888 : *vec++ = r14 + r5 + r8;
561 307011888 : *vec++ = i14 + i5 + i8;
562 307011888 : *vec++ = r0 + r3;
563 307011888 : *vec++ = i0 + i3;
564 307011888 : *vec++ = r16 + r4 + r10;
565 307011888 : *vec++ = i16 + i4 + i10;
566 307011888 : *vec++ = r14 + r6 - r8;
567 307011888 : *vec++ = i14 + i6 - i8;
568 :
569 307011888 : return;
570 : }
571 :
572 : /*******************************************************************************
573 : Functionname: fft5s
574 : *******************************************************************************
575 :
576 : Description: 5-point FFT.
577 :
578 : Arguments: x - pointer to input data (interleaved real / imaginary parts)
579 : stride - stride for input data
580 :
581 : Return: none
582 :
583 : *******************************************************************************/
584 : static const float C51 = 0.9510565162951535f;
585 : static const float C52 = -1.5388417685876270f;
586 : static const float C53 = -0.3632712640026803f;
587 : static const float C54 = 0.5590169943749475f;
588 : static const float C55 = -1.25f;
589 :
590 0 : static void fft5s( float *x, const int16_t stride )
591 : {
592 : float r1, r2, r3, r4;
593 : float s1, s2, s3, s4;
594 : float t;
595 : /* real part */
596 0 : r1 = x[1 * stride] + x[4 * stride];
597 0 : r4 = x[1 * stride] - x[4 * stride];
598 0 : r3 = x[2 * stride] + x[3 * stride];
599 0 : r2 = x[2 * stride] - x[3 * stride];
600 0 : t = ( r1 - r3 ) * C54;
601 0 : r1 = r1 + r3;
602 0 : x[0] = x[0] + r1;
603 0 : r1 = x[0] + ( r1 * C55 );
604 0 : r3 = r1 - t;
605 0 : r1 = r1 + t;
606 0 : t = ( r4 + r2 ) * C51;
607 0 : r4 = t + ( r4 * C52 );
608 0 : r2 = t + ( r2 * C53 );
609 :
610 : /* imaginary part */
611 0 : s1 = x[1 * stride + 1] + x[4 * stride + 1];
612 0 : s4 = x[1 * stride + 1] - x[4 * stride + 1];
613 0 : s3 = x[2 * stride + 1] + x[3 * stride + 1];
614 0 : s2 = x[2 * stride + 1] - x[3 * stride + 1];
615 0 : t = ( s1 - s3 ) * C54;
616 0 : s1 = s1 + s3;
617 0 : x[1] = x[1] + s1;
618 0 : s1 = x[1] + ( s1 * C55 );
619 0 : s3 = s1 - t;
620 0 : s1 = s1 + t;
621 0 : t = ( s4 + s2 ) * C51;
622 0 : s4 = t + ( s4 * C52 );
623 0 : s2 = t + ( s2 * C53 );
624 :
625 : /* combination */
626 0 : x[1 * stride] = r1 + s2;
627 0 : x[4 * stride] = r1 - s2;
628 0 : x[2 * stride] = r3 - s4;
629 0 : x[3 * stride] = r3 + s4;
630 :
631 0 : x[1 * stride + 1] = s1 - r2;
632 0 : x[4 * stride + 1] = s1 + r2;
633 0 : x[2 * stride + 1] = s3 + r4;
634 0 : x[3 * stride + 1] = s3 - r4;
635 0 : }
636 :
637 :
638 : /**
639 : * \brief Function performs a complex 10-point FFT
640 : * The FFT is performed inplace. The result of the FFT
641 : * is scaled by SCALEFACTOR10 bits.
642 : *
643 : * WOPS FLC version: 1093 cycles
644 : * WOPS with 32x16 bit multiplications: 196 cycles
645 : *
646 : * \param [i/o] re real input / output
647 : * \param [i/o] im imag input / output
648 : * \param [i ] s stride real and imag input / output
649 : *
650 : * \return void
651 : */
652 19610972 : static void fft10( float *restrict vec )
653 : {
654 : float t;
655 : float r1, r2, r3, r4;
656 : float s1, s2, s3, s4;
657 : float y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
658 : float y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
659 :
660 : /* 2 fft5 stages */
661 :
662 : /* real part */
663 19610972 : r1 = vec[12] + vec[8];
664 19610972 : r4 = vec[12] - vec[8];
665 19610972 : r3 = vec[4] + vec[16];
666 19610972 : r2 = vec[4] - vec[16];
667 19610972 : t = ( r1 - r3 ) * C54;
668 19610972 : r1 = r1 + r3;
669 19610972 : y00 = vec[0] + r1;
670 19610972 : r1 = y00 + ( r1 * C55 );
671 19610972 : r3 = r1 - t;
672 19610972 : r1 = r1 + t;
673 19610972 : t = ( r4 + r2 ) * C51;
674 19610972 : r4 = t + ( r4 * C52 );
675 19610972 : r2 = t + ( r2 * C53 );
676 :
677 : /* imaginary part */
678 19610972 : s1 = vec[13] + vec[9];
679 19610972 : s4 = vec[13] - vec[9];
680 19610972 : s3 = vec[5] + vec[17];
681 19610972 : s2 = vec[5] - vec[17];
682 19610972 : t = ( s1 - s3 ) * C54;
683 19610972 : s1 = s1 + s3;
684 19610972 : y01 = vec[1] + s1;
685 19610972 : s1 = y01 + ( s1 * C55 );
686 19610972 : s3 = s1 - t;
687 19610972 : s1 = s1 + t;
688 19610972 : t = ( s4 + s2 ) * C51;
689 19610972 : s4 = t + ( s4 * C52 );
690 19610972 : s2 = t + ( s2 * C53 );
691 :
692 : /* combination */
693 19610972 : y04 = r1 + s2;
694 19610972 : y16 = r1 - s2;
695 19610972 : y08 = r3 - s4;
696 19610972 : y12 = r3 + s4;
697 19610972 : y05 = s1 - r2;
698 19610972 : y17 = s1 + r2;
699 19610972 : y09 = s3 + r4;
700 19610972 : y13 = s3 - r4;
701 :
702 : /* real part */
703 19610972 : r1 = vec[2] + vec[18];
704 19610972 : r4 = vec[2] - vec[18];
705 19610972 : r3 = vec[14] + vec[6];
706 19610972 : r2 = vec[14] - vec[6];
707 19610972 : t = ( r1 - r3 ) * C54;
708 19610972 : r1 = r1 + r3;
709 19610972 : y02 = vec[10] + r1;
710 19610972 : r1 = y02 + ( r1 * C55 );
711 19610972 : r3 = r1 - t;
712 19610972 : r1 = r1 + t;
713 19610972 : t = ( r4 + r2 ) * C51;
714 19610972 : r4 = t + ( r4 * C52 );
715 19610972 : r2 = t + ( r2 * C53 );
716 :
717 : /* imaginary part */
718 19610972 : s1 = vec[3] + vec[19];
719 19610972 : s4 = vec[3] - vec[19];
720 19610972 : s3 = vec[15] + vec[7];
721 19610972 : s2 = vec[15] - vec[7];
722 19610972 : t = ( s1 - s3 ) * C54;
723 19610972 : s1 = s1 + s3;
724 19610972 : y03 = vec[11] + s1;
725 19610972 : s1 = y03 + ( s1 * C55 );
726 19610972 : s3 = s1 - t;
727 19610972 : s1 = s1 + t;
728 19610972 : t = ( s4 + s2 ) * C51;
729 19610972 : s4 = t + ( s4 * C52 );
730 19610972 : s2 = t + ( s2 * C53 );
731 :
732 : /* combination */
733 19610972 : y06 = r1 + s2;
734 19610972 : y18 = r1 - s2;
735 19610972 : y10 = r3 - s4;
736 19610972 : y14 = r3 + s4;
737 19610972 : y07 = s1 - r2;
738 19610972 : y19 = s1 + r2;
739 19610972 : y11 = s3 + r4;
740 19610972 : y15 = s3 - r4;
741 :
742 : /* 5 fft2 stages */
743 19610972 : vec[0] = y00 + y02;
744 19610972 : vec[1] = y01 + y03;
745 19610972 : vec[2] = y12 - y14;
746 19610972 : vec[3] = y13 - y15;
747 19610972 : vec[4] = y04 + y06;
748 19610972 : vec[5] = y05 + y07;
749 19610972 : vec[6] = y16 - y18;
750 19610972 : vec[7] = y17 - y19;
751 19610972 : vec[8] = y08 + y10;
752 19610972 : vec[9] = y09 + y11;
753 19610972 : vec[10] = y00 - y02;
754 19610972 : vec[11] = y01 - y03;
755 19610972 : vec[12] = y12 + y14;
756 19610972 : vec[13] = y13 + y15;
757 19610972 : vec[14] = y04 - y06;
758 19610972 : vec[15] = y05 - y07;
759 19610972 : vec[16] = y16 + y18;
760 19610972 : vec[17] = y17 + y19;
761 19610972 : vec[18] = y08 - y10;
762 19610972 : vec[19] = y09 - y11;
763 :
764 19610972 : return;
765 : }
766 :
767 : /**
768 : * \brief Function performs a complex 20-point FFT
769 : * The FFT is performed inplace. The result of the FFT
770 : * is scaled by SCALEFACTOR20 bits.
771 : *
772 : * WOPS FLC version: 1509 cycles
773 : * WOPS with 32x16 bit multiplications: 432 cycles
774 : *
775 : * \param [i/o] re real input / output
776 : * \param [i/o] im imag input / output
777 : * \param [i ] s stride real and imag input / output
778 : *
779 : * \return void
780 : */
781 20347704 : static void fft20( float *signal )
782 : {
783 20347704 : const int16_t s = 2;
784 20347704 : float *re = signal, *im = signal + 1;
785 : float r1, r2, r3, r4;
786 : float s1, s2, s3, s4;
787 : float x0, x1, x2, x3, x4;
788 : float t, t0, t1, t2, t3, t4, t5, t6, t7;
789 : float y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
790 : float y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
791 : float y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
792 : float y30, y31, y32, y33, y34, y35, y36, y37, y38, y39;
793 :
794 : /* 1. FFT5 stage */
795 :
796 : /* real part */
797 20347704 : x0 = re[s * 0];
798 20347704 : x1 = re[s * 16];
799 20347704 : x2 = re[s * 12];
800 20347704 : x3 = re[s * 8];
801 20347704 : x4 = re[s * 4];
802 20347704 : r1 = x1 + x4;
803 20347704 : r4 = x1 - x4;
804 20347704 : r3 = x2 + x3;
805 20347704 : r2 = x2 - x3;
806 20347704 : t = ( r1 - r3 ) * C54;
807 20347704 : r1 = r1 + r3;
808 20347704 : y00 = x0 + r1;
809 20347704 : r1 = y00 + ( r1 * C55 );
810 20347704 : r3 = r1 - t;
811 20347704 : r1 = r1 + t;
812 20347704 : t = ( r4 + r2 ) * C51;
813 20347704 : r4 = t + ( r4 * C52 );
814 20347704 : r2 = t + ( r2 * C53 );
815 :
816 : /* imaginary part */
817 20347704 : x0 = im[s * 0];
818 20347704 : x1 = im[s * 16];
819 20347704 : x2 = im[s * 12];
820 20347704 : x3 = im[s * 8];
821 20347704 : x4 = im[s * 4];
822 20347704 : s1 = x1 + x4;
823 20347704 : s4 = x1 - x4;
824 20347704 : s3 = x2 + x3;
825 20347704 : s2 = x2 - x3;
826 20347704 : t = ( s1 - s3 ) * C54;
827 20347704 : s1 = ( s1 + s3 );
828 20347704 : y01 = ( x0 + s1 );
829 20347704 : s1 = y01 + ( s1 * C55 );
830 20347704 : s3 = ( s1 - t );
831 20347704 : s1 = ( s1 + t );
832 20347704 : t = ( s4 + s2 ) * C51;
833 20347704 : s4 = t + ( s4 * C52 );
834 20347704 : s2 = t + ( s2 * C53 );
835 :
836 : /* combination */
837 20347704 : y08 = ( r1 + s2 );
838 20347704 : y32 = ( r1 - s2 );
839 20347704 : y16 = ( r3 - s4 );
840 20347704 : y24 = ( r3 + s4 );
841 :
842 20347704 : y09 = ( s1 - r2 );
843 20347704 : y33 = ( s1 + r2 );
844 20347704 : y17 = ( s3 + r4 );
845 20347704 : y25 = ( s3 - r4 );
846 :
847 : /* 2. FFT5 stage */
848 :
849 : /* real part */
850 20347704 : x0 = re[s * 5];
851 20347704 : x1 = re[s * 1];
852 20347704 : x2 = re[s * 17];
853 20347704 : x3 = re[s * 13];
854 20347704 : x4 = re[s * 9];
855 20347704 : r1 = ( x1 + x4 );
856 20347704 : r4 = ( x1 - x4 );
857 20347704 : r3 = ( x2 + x3 );
858 20347704 : r2 = ( x2 - x3 );
859 20347704 : t = ( r1 - r3 ) * C54;
860 20347704 : r1 = ( r1 + r3 );
861 20347704 : y02 = ( x0 + r1 );
862 20347704 : r1 = y02 + ( r1 * C55 );
863 20347704 : r3 = ( r1 - t );
864 20347704 : r1 = ( r1 + t );
865 20347704 : t = ( r4 + r2 ) * C51;
866 20347704 : r4 = t + ( r4 * C52 );
867 20347704 : r2 = t + ( r2 * C53 );
868 :
869 : /* imaginary part */
870 20347704 : x0 = im[s * 5];
871 20347704 : x1 = im[s * 1];
872 20347704 : x2 = im[s * 17];
873 20347704 : x3 = im[s * 13];
874 20347704 : x4 = im[s * 9];
875 20347704 : s1 = ( x1 + x4 );
876 20347704 : s4 = ( x1 - x4 );
877 20347704 : s3 = ( x2 + x3 );
878 20347704 : s2 = ( x2 - x3 );
879 20347704 : t = ( s1 - s3 ) * C54;
880 20347704 : s1 = ( s1 + s3 );
881 20347704 : y03 = ( x0 + s1 );
882 20347704 : s1 = y03 + ( s1 * C55 );
883 20347704 : s3 = ( s1 - t );
884 20347704 : s1 = ( s1 + t );
885 20347704 : t = ( s4 + s2 ) * C51;
886 20347704 : s4 = t + ( s4 * C52 );
887 20347704 : s2 = t + ( s2 * C53 );
888 :
889 : /* combination */
890 20347704 : y10 = ( r1 + s2 );
891 20347704 : y34 = ( r1 - s2 );
892 20347704 : y18 = ( r3 - s4 );
893 20347704 : y26 = ( r3 + s4 );
894 :
895 20347704 : y11 = ( s1 - r2 );
896 20347704 : y35 = ( s1 + r2 );
897 20347704 : y19 = ( s3 + r4 );
898 20347704 : y27 = ( s3 - r4 );
899 :
900 : /* 3. FFT5 stage */
901 :
902 : /* real part */
903 20347704 : x0 = re[s * 10];
904 20347704 : x1 = re[s * 6];
905 20347704 : x2 = re[s * 2];
906 20347704 : x3 = re[s * 18];
907 20347704 : x4 = re[s * 14];
908 20347704 : r1 = ( x1 + x4 );
909 20347704 : r4 = ( x1 - x4 );
910 20347704 : r3 = ( x2 + x3 );
911 20347704 : r2 = ( x2 - x3 );
912 20347704 : t = ( r1 - r3 ) * C54;
913 20347704 : r1 = ( r1 + r3 );
914 20347704 : y04 = ( x0 + r1 );
915 20347704 : r1 = y04 + ( r1 * C55 );
916 20347704 : r3 = ( r1 - t );
917 20347704 : r1 = ( r1 + t );
918 20347704 : t = ( r4 + r2 ) * C51;
919 20347704 : r4 = t + ( r4 * C52 );
920 20347704 : r2 = t + ( r2 * C53 );
921 :
922 : /* imaginary part */
923 20347704 : x0 = im[s * 10];
924 20347704 : x1 = im[s * 6];
925 20347704 : x2 = im[s * 2];
926 20347704 : x3 = im[s * 18];
927 20347704 : x4 = im[s * 14];
928 20347704 : s1 = ( x1 + x4 );
929 20347704 : s4 = ( x1 - x4 );
930 20347704 : s3 = ( x2 + x3 );
931 20347704 : s2 = ( x2 - x3 );
932 20347704 : t = ( s1 - s3 ) * C54;
933 20347704 : s1 = ( s1 + s3 );
934 20347704 : y05 = ( x0 + s1 );
935 20347704 : s1 = y05 + ( s1 * C55 );
936 20347704 : s3 = ( s1 - t );
937 20347704 : s1 = ( s1 + t );
938 20347704 : t = ( s4 + s2 ) * C51;
939 20347704 : s4 = t + ( s4 * C52 );
940 20347704 : s2 = t + ( s2 * C53 );
941 :
942 : /* combination */
943 20347704 : y12 = ( r1 + s2 );
944 20347704 : y36 = ( r1 - s2 );
945 20347704 : y20 = ( r3 - s4 );
946 20347704 : y28 = ( r3 + s4 );
947 :
948 20347704 : y13 = ( s1 - r2 );
949 20347704 : y37 = ( s1 + r2 );
950 20347704 : y21 = ( s3 + r4 );
951 20347704 : y29 = ( s3 - r4 );
952 :
953 : /* 4. FFT5 stage */
954 :
955 : /* real part */
956 20347704 : x0 = re[s * 15];
957 20347704 : x1 = re[s * 11];
958 20347704 : x2 = re[s * 7];
959 20347704 : x3 = re[s * 3];
960 20347704 : x4 = re[s * 19];
961 20347704 : r1 = ( x1 + x4 );
962 20347704 : r4 = ( x1 - x4 );
963 20347704 : r3 = ( x2 + x3 );
964 20347704 : r2 = ( x2 - x3 );
965 20347704 : t = ( r1 - r3 ) * C54;
966 20347704 : r1 = ( r1 + r3 );
967 20347704 : y06 = ( x0 + r1 );
968 20347704 : r1 = y06 + ( r1 * C55 );
969 20347704 : r3 = ( r1 - t );
970 20347704 : r1 = ( r1 + t );
971 20347704 : t = ( r4 + r2 ) * C51;
972 20347704 : r4 = t + ( r4 * C52 );
973 20347704 : r2 = t + ( r2 * C53 );
974 :
975 : /* imaginary part */
976 20347704 : x0 = im[s * 15];
977 20347704 : x1 = im[s * 11];
978 20347704 : x2 = im[s * 7];
979 20347704 : x3 = im[s * 3];
980 20347704 : x4 = im[s * 19];
981 20347704 : s1 = ( x1 + x4 );
982 20347704 : s4 = ( x1 - x4 );
983 20347704 : s3 = ( x2 + x3 );
984 20347704 : s2 = ( x2 - x3 );
985 20347704 : t = ( s1 - s3 ) * C54;
986 20347704 : s1 = ( s1 + s3 );
987 20347704 : y07 = ( x0 + s1 );
988 20347704 : s1 = y07 + ( s1 * C55 );
989 20347704 : s3 = ( s1 - t );
990 20347704 : s1 = ( s1 + t );
991 20347704 : t = ( s4 + s2 ) * C51;
992 20347704 : s4 = t + ( s4 * C52 );
993 20347704 : s2 = t + ( s2 * C53 );
994 :
995 : /* combination */
996 20347704 : y14 = ( r1 + s2 );
997 20347704 : y38 = ( r1 - s2 );
998 20347704 : y22 = ( r3 - s4 );
999 20347704 : y30 = ( r3 + s4 );
1000 :
1001 20347704 : y15 = ( s1 - r2 );
1002 20347704 : y39 = ( s1 + r2 );
1003 20347704 : y23 = ( s3 + r4 );
1004 20347704 : y31 = ( s3 - r4 );
1005 :
1006 :
1007 : /* 1. FFT4 stage */
1008 :
1009 : /* Pre-additions */
1010 20347704 : t0 = ( y00 + y04 );
1011 20347704 : t2 = ( y00 - y04 );
1012 20347704 : t1 = ( y01 + y05 );
1013 20347704 : t3 = ( y01 - y05 );
1014 20347704 : t4 = ( y02 + y06 );
1015 20347704 : t7 = ( y02 - y06 );
1016 20347704 : t5 = ( y07 + y03 );
1017 20347704 : t6 = ( y07 - y03 );
1018 :
1019 : /* Post-additions */
1020 20347704 : re[s * 0] = ( t0 + t4 );
1021 20347704 : im[s * 0] = ( t1 + t5 );
1022 20347704 : re[s * 5] = ( t2 - t6 );
1023 20347704 : im[s * 5] = ( t3 - t7 );
1024 20347704 : re[s * 10] = ( t0 - t4 );
1025 20347704 : im[s * 10] = ( t1 - t5 );
1026 20347704 : re[s * 15] = ( t2 + t6 );
1027 20347704 : im[s * 15] = ( t3 + t7 );
1028 :
1029 : /* 2. FFT4 stage */
1030 :
1031 : /* Pre-additions */
1032 20347704 : t0 = ( y08 + y12 );
1033 20347704 : t2 = ( y08 - y12 );
1034 20347704 : t1 = ( y09 + y13 );
1035 20347704 : t3 = ( y09 - y13 );
1036 20347704 : t4 = ( y10 + y14 );
1037 20347704 : t7 = ( y10 - y14 );
1038 20347704 : t5 = ( y15 + y11 );
1039 20347704 : t6 = ( y15 - y11 );
1040 :
1041 : /* Post-additions */
1042 20347704 : re[s * 4] = ( t0 + t4 );
1043 20347704 : im[s * 4] = ( t1 + t5 );
1044 20347704 : re[s * 9] = ( t2 - t6 );
1045 20347704 : im[s * 9] = ( t3 - t7 );
1046 20347704 : re[s * 14] = ( t0 - t4 );
1047 20347704 : im[s * 14] = ( t1 - t5 );
1048 20347704 : re[s * 19] = ( t2 + t6 );
1049 20347704 : im[s * 19] = ( t3 + t7 );
1050 :
1051 :
1052 : /* 3. FFT4 stage */
1053 :
1054 : /* Pre-additions */
1055 20347704 : t0 = ( y16 + y20 );
1056 20347704 : t2 = ( y16 - y20 );
1057 20347704 : t1 = ( y17 + y21 );
1058 20347704 : t3 = ( y17 - y21 );
1059 20347704 : t4 = ( y18 + y22 );
1060 20347704 : t7 = ( y18 - y22 );
1061 20347704 : t5 = ( y23 + y19 );
1062 20347704 : t6 = ( y23 - y19 );
1063 :
1064 : /* Post-additions */
1065 20347704 : re[s * 8] = ( t0 + t4 );
1066 20347704 : im[s * 8] = ( t1 + t5 );
1067 20347704 : re[s * 13] = ( t2 - t6 );
1068 20347704 : im[s * 13] = ( t3 - t7 );
1069 20347704 : re[s * 18] = ( t0 - t4 );
1070 20347704 : im[s * 18] = ( t1 - t5 );
1071 20347704 : re[s * 3] = ( t2 + t6 );
1072 20347704 : im[s * 3] = ( t3 + t7 );
1073 :
1074 : /* 4. FFT4 stage */
1075 :
1076 : /* Pre-additions */
1077 20347704 : t0 = ( y24 + y28 );
1078 20347704 : t2 = ( y24 - y28 );
1079 20347704 : t1 = ( y25 + y29 );
1080 20347704 : t3 = ( y25 - y29 );
1081 20347704 : t4 = ( y26 + y30 );
1082 20347704 : t7 = ( y26 - y30 );
1083 20347704 : t5 = ( y31 + y27 );
1084 20347704 : t6 = ( y31 - y27 );
1085 :
1086 : /* Post-additions */
1087 20347704 : re[s * 12] = ( t0 + t4 );
1088 20347704 : im[s * 12] = ( t1 + t5 );
1089 20347704 : re[s * 17] = ( t2 - t6 );
1090 20347704 : im[s * 17] = ( t3 - t7 );
1091 20347704 : re[s * 2] = ( t0 - t4 );
1092 20347704 : im[s * 2] = ( t1 - t5 );
1093 20347704 : re[s * 7] = ( t2 + t6 );
1094 20347704 : im[s * 7] = ( t3 + t7 );
1095 :
1096 : /* 5. FFT4 stage */
1097 :
1098 : /* Pre-additions */
1099 20347704 : t0 = ( y32 + y36 );
1100 20347704 : t2 = ( y32 - y36 );
1101 20347704 : t1 = ( y33 + y37 );
1102 20347704 : t3 = ( y33 - y37 );
1103 20347704 : t4 = ( y34 + y38 );
1104 20347704 : t7 = ( y34 - y38 );
1105 20347704 : t5 = ( y39 + y35 );
1106 20347704 : t6 = ( y39 - y35 );
1107 :
1108 : /* Post-additions */
1109 20347704 : re[s * 16] = ( t0 + t4 );
1110 20347704 : im[s * 16] = ( t1 + t5 );
1111 20347704 : re[s * 1] = ( t2 - t6 );
1112 20347704 : im[s * 1] = ( t3 - t7 );
1113 20347704 : re[s * 6] = ( t0 - t4 );
1114 20347704 : im[s * 6] = ( t1 - t5 );
1115 20347704 : re[s * 11] = ( t2 + t6 );
1116 20347704 : im[s * 11] = ( t3 + t7 );
1117 :
1118 20347704 : return;
1119 : }
1120 :
1121 : /*******************************************************************************
1122 : Functionname: fft30
1123 : *******************************************************************************
1124 :
1125 : Description: 30-point FFT.
1126 :
1127 : Arguments: in - pointer to data (interleaved real / imaginary parts)
1128 :
1129 : Return: none
1130 :
1131 : *******************************************************************************/
1132 :
1133 153505944 : static void fft30( float *restrict in )
1134 : {
1135 : int16_t i;
1136 : float temp[60];
1137 153505944 : float *temp_l = temp;
1138 153505944 : float *temp_lu = temp + 2 * 8;
1139 153505944 : float *temp_h = temp + 2 * 15;
1140 153505944 : float *temp_hu = temp + 2 * 15 + 2 * 8;
1141 153505944 : float *in_l = in + 2 * 0;
1142 153505944 : float *in_h = in + 2 * 15;
1143 1228047552 : for ( i = 0; i < 7; i++ )
1144 : {
1145 1074541608 : *temp_l++ = *in_l++;
1146 1074541608 : *temp_l++ = *in_l++;
1147 1074541608 : *temp_h++ = *in_h++;
1148 1074541608 : *temp_h++ = *in_h++;
1149 1074541608 : *temp_l++ = *in_h++;
1150 1074541608 : *temp_l++ = *in_h++;
1151 1074541608 : *temp_h++ = *in_l++;
1152 1074541608 : *temp_h++ = *in_l++;
1153 : }
1154 153505944 : *temp_l++ = *in_l++;
1155 153505944 : *temp_l++ = *in_l++;
1156 153505944 : *temp_h++ = *in_h++;
1157 153505944 : *temp_h++ = *in_h++;
1158 153505944 : temp_l = temp;
1159 153505944 : temp_h = temp + 30;
1160 153505944 : fft15( temp_l );
1161 153505944 : fft15( temp_h );
1162 :
1163 153505944 : in_l = in + 2 * 0;
1164 153505944 : in_h = in + 2 * 15;
1165 1228047552 : for ( i = 0; i < 7; i++ )
1166 : {
1167 1074541608 : *in_l++ = *temp_l + *temp_h;
1168 1074541608 : *in_h++ = *temp_l++ - *temp_h++;
1169 1074541608 : *in_l++ = *temp_l + *temp_h;
1170 1074541608 : *in_h++ = *temp_l++ - *temp_h++;
1171 :
1172 1074541608 : *in_h++ = *temp_lu + *temp_hu;
1173 1074541608 : *in_l++ = *temp_lu++ - *temp_hu++;
1174 1074541608 : *in_h++ = *temp_lu + *temp_hu;
1175 1074541608 : *in_l++ = *temp_lu++ - *temp_hu++;
1176 : }
1177 153505944 : *in_l++ = *temp_l + *temp_h;
1178 153505944 : *in_h++ = *temp_l++ - *temp_h++;
1179 153505944 : *in_l++ = *temp_l + *temp_h;
1180 153505944 : *in_h++ = *temp_l++ - *temp_h++;
1181 :
1182 153505944 : return;
1183 : }
1184 :
1185 : /*-------------------------------------------------------------------*
1186 : * fft_cldfb()
1187 : *
1188 : * Interface functions FFT subroutines
1189 : *--------------------------------------------------------------------*/
1190 202167540 : void fft_cldfb(
1191 : float *data, /* i/o: input/output vector */
1192 : const int16_t size /* size of fft operation */
1193 : )
1194 : {
1195 :
1196 202167540 : switch ( size )
1197 : {
1198 0 : case 5:
1199 0 : fft5s( data, 2 );
1200 0 : break;
1201 8692744 : case 8:
1202 8692744 : fft8( data );
1203 8692744 : break;
1204 19610972 : case 10:
1205 19610972 : fft10( data );
1206 19610972 : break;
1207 10176 : case 16:
1208 10176 : fft16( data );
1209 10176 : break;
1210 20347704 : case 20:
1211 20347704 : fft20( data );
1212 20347704 : break;
1213 153505944 : case 30:
1214 153505944 : fft30( data );
1215 153505944 : break;
1216 :
1217 0 : default:
1218 0 : assert( 0 );
1219 : break;
1220 : }
1221 :
1222 202167540 : return;
1223 : }
|