aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/linden/indra/newview/llfft.cpp
diff options
context:
space:
mode:
authorJacek Antonelli2008-08-15 23:44:46 -0500
committerJacek Antonelli2008-08-15 23:44:46 -0500
commit38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4 (patch)
treeadca584755d22ca041a2dbfc35d4eca01f70b32c /linden/indra/newview/llfft.cpp
parentREADME.txt (diff)
downloadmeta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.zip
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.gz
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.bz2
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.xz
Second Life viewer sources 1.13.2.12
Diffstat (limited to 'linden/indra/newview/llfft.cpp')
-rw-r--r--linden/indra/newview/llfft.cpp576
1 files changed, 576 insertions, 0 deletions
diff --git a/linden/indra/newview/llfft.cpp b/linden/indra/newview/llfft.cpp
new file mode 100644
index 0000000..9652786
--- /dev/null
+++ b/linden/indra/newview/llfft.cpp
@@ -0,0 +1,576 @@
1/**
2 * @file llfft.cpp
3 * @brief FFT function implementations
4 *
5 * Copyright (c) 2003-2007, Linden Research, Inc.
6 *
7 * The source code in this file ("Source Code") is provided by Linden Lab
8 * to you under the terms of the GNU General Public License, version 2.0
9 * ("GPL"), unless you have obtained a separate licensing agreement
10 * ("Other License"), formally executed by you and Linden Lab. Terms of
11 * the GPL can be found in doc/GPL-license.txt in this distribution, or
12 * online at http://secondlife.com/developers/opensource/gplv2
13 *
14 * There are special exceptions to the terms and conditions of the GPL as
15 * it is applied to this Source Code. View the full text of the exception
16 * in the file doc/FLOSS-exception.txt in this software distribution, or
17 * online at http://secondlife.com/developers/opensource/flossexception
18 *
19 * By copying, modifying or distributing this software, you acknowledge
20 * that you have read and understood your obligations described above,
21 * and agree to abide by those obligations.
22 *
23 * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
24 * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
25 * COMPLETENESS OR PERFORMANCE.
26 */
27
28/*
29 * Fast Fourier Transform
30 *
31 */
32#include "llviewerprecompiledheaders.h"
33
34#include "llfft.h"
35#include "llerror.h"
36
37/*
38**
39*********************************************************************
40** Forward and inverse discrete Fourier transforms on complex data **
41*********************************************************************
42**
43**
44** forward_fft(array, rows, cols)
45** COMPLEX *array;
46** S32 rows, cols;
47**
48** inverse_fft(array, rows, cols)
49** COMPLEX *array;
50** S32 rows, cols;
51**
52** These entry points compute the forward and inverse DFT's, respectively,
53** of a single-precision COMPLEX array.
54**
55** The result is a COMPLEX array of the same size, returned in
56** the same space as the input array. That is, the original array is
57** overwritten and destroyed.
58**
59** Rows and columns must each be an integral power of 2.
60**
61** These routines return integer value -1 if an error was detected,
62** 0 otherwise
63**
64** This implementation of the DFT uses the transform pair defined as follows.
65**
66** Let there be two COMPLEX arrays each with n rows and m columns
67** Index them as
68** f(x,y): 0 <= x <= m - 1, 0 <= y <= n - 1
69** F(u,v): -m/2 <= u <= m/2 - 1, -n/2 <= v <= n/2 - 1
70**
71** Then the forward and inverse transforms are related as
72**
73** Forward:
74**
75** F(u,v) = \sum_{x=0}^{m-1} \sum_{y=0}^{n-1}
76** f(x,y) \exp{-2\pi i (ux/m + vy/n)}
77**
78**
79** Inverse:
80**
81** f(x,y) = 1/(mn) \sum_{u=-m/2}^{m/2-1} \sum_{v=-n/2}^{n/2-1}
82** F(u,v) \exp{2\pi i (ux/m + vy/n)}
83**
84** Therefore, the transforms have these properties:
85** 1. \sum_x \sum_y f(x,y) = F(0,0)
86** 2. m n \sum_x \sum_y |f(x,y)|^2 = \sum_u \sum_v |F(u,v)|^2
87**
88*/
89
90
91//DPCOMPLEX *stageBuff; /* buffer to hold a row or column at a time */
92//COMPLEX *bigBuff; /* a pointer to the input array */
93
94
95/*
96 * These macros move complex data between bigBuff and
97 * stageBuff
98 */
99
100inline void LoadRow(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 row, U32 cols)
101{
102 for (U32 j = row*cols, k = 0 ; k < cols ; j++, k++)
103 {
104 stageBuff[k].re = bigBuff[j].re;
105 stageBuff[k].im = bigBuff[j].im;
106 }
107}
108
109inline void StoreRow(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 row, U32 cols)
110{
111 for (U32 j = row*cols, k = 0 ; k < cols ; j++, k++)
112 {
113 bigBuff[j].re = (F32)stageBuff[k].re;
114 bigBuff[j].im = (F32)stageBuff[k].im;
115 }
116}
117
118inline void LoadCol(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 col, U32 rows, U32 cols)
119{
120 for (U32 j = col,k = 0 ; k < rows ; j+=cols, k++)
121 {
122 stageBuff[k].re = bigBuff[j].re;
123 stageBuff[k].im = bigBuff[j].im;
124 }
125}
126
127inline void StoreCol(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 col, U32 rows, U32 cols)
128{
129 for (U32 j = col,k = 0 ; k < rows ; j+=cols, k++)
130 {
131 bigBuff[j].re = (F32)stageBuff[k].re;
132 bigBuff[j].im = (F32)stageBuff[k].im;
133 }
134}
135
136
137/* do something with an error message */
138inline void handle_error(S8* msg)
139{
140 llerrs << msg << llendl;
141}
142
143
144/*
145** compute DFT: forward if direction==0, inverse if direction==1
146** array must be COMPLEX
147*/
148BOOL fft(const LLFFTPlan& plan, COMPLEX *array, S32 rows, S32 cols, S32 direction)
149{
150 S32 i;
151
152 if (!plan.valid() || plan.rows() != rows || plan.cols() != cols)
153 return FALSE;
154
155 /* compute transform row by row */
156
157 if(cols>1)
158 {
159 for(i=0;i<rows;i++)
160 {
161 LoadRow(plan.buffer(), array, i, cols);
162 FFT842(direction, cols, plan.buffer());
163 StoreRow(plan.buffer(), array, i, cols);
164 }
165 }
166
167 if(rows<2) /* done */
168 {
169 //freeBuffer();
170 return TRUE;
171 }
172
173
174 /* compute transform column by column */
175
176 for(i=0;i<cols;i++)
177 {
178 LoadCol(plan.buffer(), array, i, rows, cols);
179 FFT842(direction, rows, plan.buffer());
180 StoreCol(plan.buffer(), array, i, rows, cols);
181 }
182
183 //freeBuffer();
184
185 return TRUE;
186}
187
188
189
190/*
191** FFT842
192** This routine replaces the input DPCOMPLEX vector by its
193** finite discrete complex fourier transform if in==0.
194** It replaces the input DPCOMPLEX vector by its
195** finite discrete complex inverse fourier transform if in==1.
196**
197** in - FORWARD or INVERSE
198** n - length of vector
199** b - input vector
200**
201** It performs as many base 8 iterations as possible and
202** then finishes with a base 4 iteration or a base 2
203** iteration if needed.
204**
205** Ported from the FORTRAN code in Programming for Digital Signal Processing,
206** IEEE Press 1979, Section 1, by G. D. Bergland and M. T. Dolan
207**
208*/
209void FFT842(S32 in, S32 n, DPCOMPLEX *b)
210{
211 F64 fn, r, fi;
212
213 S32 L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15;
214 S32 j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;
215 S32 i, j, ij, ji, ij1, ji1;
216 S32 n2pow, n8pow, nthpo, ipass, nxtlt, lengt;
217
218 n2pow = fastlog2(n);
219 nthpo = n;
220 fn = 1.0 / (F64)nthpo;
221
222
223 if(in==FORWARD)
224 {
225 /* take conjugate */
226 for(i=0;i<n;i++)
227 {
228 b[i].im *= -1.0;
229 }
230 }
231
232 if(in==INVERSE)
233 {
234 /* scramble inputs */
235 for(i=0,j=n/2;j<n;i++,j++)
236 {
237 r = b[j].re;
238 fi = b[j].im;
239 b[j].re = b[i].re;
240 b[j].im = b[i].im;
241 b[i].re = r;
242 b[i].im = fi;
243 }
244 }
245
246 n8pow = n2pow/3;
247
248 if(n8pow)
249 {
250 /* radix 8 iterations */
251 for(ipass=1;ipass<=n8pow;ipass++)
252 {
253 nxtlt = 0x1 << (n2pow - 3*ipass);
254 lengt = 8*nxtlt;
255 R8TX(nxtlt,nthpo,lengt,
256 b,b+nxtlt,b+2*nxtlt,
257 b+3*nxtlt,b+4*nxtlt,b+5*nxtlt,
258 b+6*nxtlt,b+7*nxtlt);
259 }
260 }
261
262 if(n2pow%3 == 1)
263 {
264 /* radix 2 iteration needed */
265 R2TX(nthpo,b,b+1);
266 }
267
268
269 if(n2pow%3 == 2)
270 {
271 /* radix 4 iteration needed */
272 R4TX(nthpo,b,b+1,b+2,b+3);
273 }
274
275
276
277 for(j=1;j<=15;j++)
278 {
279 L[j] = 1;
280 if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j);
281 }
282 L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7];
283 L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15];
284
285 ij = 1;
286
287 for(j1=1;j1<=L1;j1++)
288 for(j2=j1;j2<=L2;j2+=L1)
289 for(j3=j2;j3<=L3;j3+=L2)
290 for(j4=j3;j4<=L4;j4+=L3)
291 for(j5=j4;j5<=L5;j5+=L4)
292 for(j6=j5;j6<=L6;j6+=L5)
293 for(j7=j6;j7<=L7;j7+=L6)
294 for(j8=j7;j8<=L8;j8+=L7)
295 for(j9=j8;j9<=L9;j9+=L8)
296 for(j10=j9;j10<=L10;j10+=L9)
297 for(j11=j10;j11<=L11;j11+=L10)
298 for(j12=j11;j12<=L12;j12+=L11)
299 for(j13=j12;j13<=L13;j13+=L12)
300 for(j14=j13;j14<=L14;j14+=L13)
301 for(ji=j14;ji<=L15;ji+=L14)
302 {
303 ij1 = ij-1;
304 ji1 = ji-1;
305
306 if(ij-ji<0)
307 {
308 r = b[ij1].re;
309 b[ij1].re = b[ji1].re;
310 b[ji1].re = r;
311 fi = b[ij1].im;
312 b[ij1].im = b[ji1].im;
313 b[ji1].im = fi;
314 }
315 ij++;
316 }
317
318 if(in==FORWARD) // take conjugates & unscramble outputs
319 {
320 for(i=0,j=n/2;j<n;i++,j++)
321 {
322 r = b[j].re;
323 fi = b[j].im;
324 b[j].re = b[i].re;
325 b[j].im = -b[i].im;
326 b[i].re = r;
327 b[i].im = -fi;
328 }
329 }
330
331 if(in==INVERSE) // scale outputs
332 {
333 for(i=0;i<nthpo;i++)
334 {
335 b[i].re *= fn;
336 b[i].im *= fn;
337 }
338 }
339}
340
341
342/*
343** radix 2 iteration subroutine
344*/
345void R2TX(S32 nthpo, DPCOMPLEX *c0, DPCOMPLEX *c1)
346{
347 S32 k,kk;
348 F64 *cr0, *ci0, *cr1, *ci1, r1, fi1;
349
350 cr0 = &(c0[0].re);
351 ci0 = &(c0[0].im);
352 cr1 = &(c1[0].re);
353 ci1 = &(c1[0].im);
354
355 for(k = 0; k < nthpo; k += 2)
356 {
357 kk = k*2;
358
359 r1 = cr0[kk] + cr1[kk];
360 cr1[kk] = cr0[kk] - cr1[kk];
361 cr0[kk] = r1;
362 fi1 = ci0[kk] + ci1[kk];
363 ci1[kk] = ci0[kk] - ci1[kk];
364 ci0[kk] = fi1;
365 }
366}
367
368
369/*
370** radix 4 iteration subroutine
371*/
372void R4TX(S32 nthpo, DPCOMPLEX *c0, DPCOMPLEX *c1, DPCOMPLEX *c2, DPCOMPLEX *c3)
373{
374 S32 k,kk;
375 F64 *cr0, *ci0, *cr1, *ci1, *cr2, *ci2, *cr3, *ci3;
376 F64 r1,r2,r3,r4,i1,i2,i3,i4;
377
378 cr0 = &(c0[0].re);
379 cr1 = &(c1[0].re);
380 cr2 = &(c2[0].re);
381 cr3 = &(c3[0].re);
382 ci0 = &(c0[0].im);
383 ci1 = &(c1[0].im);
384 ci2 = &(c2[0].im);
385 ci3 = &(c3[0].im);
386
387 for(k = 1; k <= nthpo; k += 4)
388 {
389 kk = (k-1)*2; /* real and imag parts alternate */
390
391 r1 = cr0[kk] + cr2[kk];
392 r2 = cr0[kk] - cr2[kk];
393 r3 = cr1[kk] + cr3[kk];
394 r4 = cr1[kk] - cr3[kk];
395 i1 = ci0[kk] + ci2[kk];
396 i2 = ci0[kk] - ci2[kk];
397 i3 = ci1[kk] + ci3[kk];
398 i4 = ci1[kk] - ci3[kk];
399 cr0[kk] = r1 + r3;
400 ci0[kk] = i1 + i3;
401 cr1[kk] = r1 - r3;
402 ci1[kk] = i1 - i3;
403 cr2[kk] = r2 - i4;
404 ci2[kk] = i2 + r4;
405 cr3[kk] = r2 + i4;
406 ci3[kk] = i2 - r4;
407 }
408}
409
410
411
412/*
413** radix 8 iteration subroutine
414*/
415void R8TX(S32 nxtlt, S32 nthpo, S32 lengt, DPCOMPLEX *cc0, DPCOMPLEX *cc1, DPCOMPLEX *cc2,
416 DPCOMPLEX *cc3, DPCOMPLEX *cc4, DPCOMPLEX *cc5, DPCOMPLEX *cc6, DPCOMPLEX *cc7)
417{
418 S32 j,k,kk;
419 F64 scale, arg, tr, ti;
420 F64 c1,c2,c3,c4,c5,c6,c7;
421 F64 s1,s2,s3,s4,s5,s6,s7;
422 F64 ar0,ar1,ar2,ar3,ar4,ar5,ar6,ar7;
423 F64 ai0,ai1,ai2,ai3,ai4,ai5,ai6,ai7;
424 F64 br0,br1,br2,br3,br4,br5,br6,br7;
425 F64 bi0,bi1,bi2,bi3,bi4,bi5,bi6,bi7;
426
427 F64 *cr0,*cr1,*cr2,*cr3,*cr4,*cr5,*cr6,*cr7;
428 F64 *ci0,*ci1,*ci2,*ci3,*ci4,*ci5,*ci6,*ci7;
429
430 cr0 = &(cc0[0].re);
431 cr1 = &(cc1[0].re);
432 cr2 = &(cc2[0].re);
433 cr3 = &(cc3[0].re);
434 cr4 = &(cc4[0].re);
435 cr5 = &(cc5[0].re);
436 cr6 = &(cc6[0].re);
437 cr7 = &(cc7[0].re);
438
439 ci0 = &(cc0[0].im);
440 ci1 = &(cc1[0].im);
441 ci2 = &(cc2[0].im);
442 ci3 = &(cc3[0].im);
443 ci4 = &(cc4[0].im);
444 ci5 = &(cc5[0].im);
445 ci6 = &(cc6[0].im);
446 ci7 = &(cc7[0].im);
447
448
449 scale = F_TWO_PI/lengt;
450
451 for(j = 1; j <= nxtlt; j++)
452 {
453 arg = (j-1)*scale;
454 c1 = cos(arg);
455 s1 = sin(arg);
456 c2 = c1*c1 - s1*s1;
457 s2 = c1*s1 + c1*s1;
458 c3 = c1*c2 - s1*s2;
459 s3 = c2*s1 + s2*c1;
460 c4 = c2*c2 - s2*s2;
461 s4 = c2*s2 + c2*s2;
462 c5 = c2*c3 - s2*s3;
463 s5 = c3*s2 + s3*c2;
464 c6 = c3*c3 - s3*s3;
465 s6 = c3*s3 + c3*s3;
466 c7 = c3*c4 - s3*s4;
467 s7 = c4*s3 + s4*c3;
468
469 for(k = j; k <= nthpo; k += lengt)
470 {
471 kk = (k-1)*2; /* index by twos; re & im alternate */
472
473 ar0 = cr0[kk] + cr4[kk];
474 ar1 = cr1[kk] + cr5[kk];
475 ar2 = cr2[kk] + cr6[kk];
476 ar3 = cr3[kk] + cr7[kk];
477 ar4 = cr0[kk] - cr4[kk];
478 ar5 = cr1[kk] - cr5[kk];
479 ar6 = cr2[kk] - cr6[kk];
480 ar7 = cr3[kk] - cr7[kk];
481 ai0 = ci0[kk] + ci4[kk];
482 ai1 = ci1[kk] + ci5[kk];
483 ai2 = ci2[kk] + ci6[kk];
484 ai3 = ci3[kk] + ci7[kk];
485 ai4 = ci0[kk] - ci4[kk];
486 ai5 = ci1[kk] - ci5[kk];
487 ai6 = ci2[kk] - ci6[kk];
488 ai7 = ci3[kk] - ci7[kk];
489 br0 = ar0 + ar2;
490 br1 = ar1 + ar3;
491 br2 = ar0 - ar2;
492 br3 = ar1 - ar3;
493 br4 = ar4 - ai6;
494 br5 = ar5 - ai7;
495 br6 = ar4 + ai6;
496 br7 = ar5 + ai7;
497 bi0 = ai0 + ai2;
498 bi1 = ai1 + ai3;
499 bi2 = ai0 - ai2;
500 bi3 = ai1 - ai3;
501 bi4 = ai4 + ar6;
502 bi5 = ai5 + ar7;
503 bi6 = ai4 - ar6;
504 bi7 = ai5 - ar7;
505 cr0[kk] = br0 + br1;
506 ci0[kk] = bi0 + bi1;
507 if(j > 1)
508 {
509 cr1[kk] = c4*(br0-br1) - s4*(bi0-bi1);
510 cr2[kk] = c2*(br2-bi3) - s2*(bi2+br3);
511 cr3[kk] = c6*(br2+bi3) - s6*(bi2-br3);
512 ci1[kk] = c4*(bi0-bi1) + s4*(br0-br1);
513 ci2[kk] = c2*(bi2+br3) + s2*(br2-bi3);
514 ci3[kk] = c6*(bi2-br3) + s6*(br2+bi3);
515 tr = OO_SQRT2*(br5-bi5);
516 ti = OO_SQRT2*(br5+bi5);
517 cr4[kk] = c1*(br4+tr) - s1*(bi4+ti);
518 ci4[kk] = c1*(bi4+ti) + s1*(br4+tr);
519 cr5[kk] = c5*(br4-tr) - s5*(bi4-ti);
520 ci5[kk] = c5*(bi4-ti) + s5*(br4-tr);
521 tr = -OO_SQRT2*(br7+bi7);
522 ti = OO_SQRT2*(br7-bi7);
523 cr6[kk] = c3*(br6+tr) - s3*(bi6+ti);
524 ci6[kk] = c3*(bi6+ti) + s3*(br6+tr);
525 cr7[kk] = c7*(br6-tr) - s7*(bi6-ti);
526 ci7[kk] = c7*(bi6-ti) + s7*(br6-tr);
527 }
528 else
529 {
530 cr1[kk] = br0 - br1;
531 cr2[kk] = br2 - bi3;
532 cr3[kk] = br2 + bi3;
533 ci1[kk] = bi0 - bi1;
534 ci2[kk] = bi2 + br3;
535 ci3[kk] = bi2 - br3;
536 tr = OO_SQRT2*(br5-bi5);
537 ti = OO_SQRT2*(br5+bi5);
538 cr4[kk] = br4 + tr;
539 ci4[kk] = bi4 + ti;
540 cr5[kk] = br4 - tr;
541 ci5[kk] = bi4 - ti;
542 tr = -OO_SQRT2*(br7+bi7);
543 ti = OO_SQRT2*(br7-bi7);
544 cr6[kk] = br6 + tr;
545 ci6[kk] = bi6 + ti;
546 cr7[kk] = br6 - tr;
547 ci7[kk] = bi6 - ti;
548 }
549 }
550 }
551}
552
553
554/* see if exactly one bit is set in integer argument */
555S32 power_of_2(S32 n)
556{
557 S32 bits=0;
558 while(n)
559 {
560 bits += n & 1;
561 n >>= 1;
562 }
563 return(bits==1);
564}
565
566/* get binary log of integer argument; exact if n a power of 2 */
567S32 fastlog2(S32 n)
568{
569 S32 log = -1;
570 while(n)
571 {
572 log++;
573 n >>= 1;
574 }
575 return(log);
576}