Second Life viewer sources 1.13.2.12

author: Jacek Antonelli 2008-08-15 23:44:46 -0500
committer: Jacek Antonelli 2008-08-15 23:44:46 -0500
commit: 38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4 (patch)
tree: adca584755d22ca041a2dbfc35d4eca01f70b32c /linden/indra/newview/llfft.cpp
parent: README.txt (diff)
download: meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.zip
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.gz
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.bz2
meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.xz
1 files changed, 576 insertions, 0 deletions
diff --git a/linden/indra/newview/llfft.cpp b/linden/indra/newview/llfft.cpp
new file mode 100644
index 0000000..9652786
--- /dev/null
+++ b/linden/indra/newview/llfft.cpp
@@ -0,0 +1,576 @@
+/** 
+ * @file llfft.cpp
+ * @brief FFT function implementations
+ *
+ * Copyright (c) 2003-2007, Linden Research, Inc.
+ * 
+ * The source code in this file ("Source Code") is provided by Linden Lab
+ * to you under the terms of the GNU General Public License, version 2.0
+ * ("GPL"), unless you have obtained a separate licensing agreement
+ * ("Other License"), formally executed by you and Linden Lab.  Terms of
+ * the GPL can be found in doc/GPL-license.txt in this distribution, or
+ * online at http://secondlife.com/developers/opensource/gplv2
+ * 
+ * There are special exceptions to the terms and conditions of the GPL as
+ * it is applied to this Source Code. View the full text of the exception
+ * in the file doc/FLOSS-exception.txt in this software distribution, or
+ * online at http://secondlife.com/developers/opensource/flossexception
+ * 
+ * By copying, modifying or distributing this software, you acknowledge
+ * that you have read and understood your obligations described above,
+ * and agree to abide by those obligations.
+ * 
+ * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
+ * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
+ * COMPLETENESS OR PERFORMANCE.
+ */
+/*
+ *  Fast Fourier Transform
+ *
+ */
+#include "llviewerprecompiledheaders.h"
+#include "llfft.h"
+#include "llerror.h"
+/*
+**
+*********************************************************************
+** Forward and inverse discrete Fourier transforms on complex data **
+*********************************************************************
+**
+**
+** forward_fft(array, rows, cols)
+**               COMPLEX *array;
+**               S32 rows, cols;
+**
+** inverse_fft(array, rows, cols)
+**               COMPLEX *array;
+**               S32 rows, cols;
+**
+** These entry points compute the forward and inverse DFT's, respectively, 
+** of a single-precision COMPLEX array.
+**
+** The result is a COMPLEX array of the same size, returned in 
+** the same space as the input array.  That is, the original array is
+** overwritten and destroyed.
+**
+** Rows and columns must each be an integral power of 2.
+**
+** These routines return integer value -1 if an error was detected,
+** 0 otherwise
+**
+** This implementation of the DFT uses the transform pair defined as follows.
+**
+** Let there be two COMPLEX arrays each with n rows and m columns
+** Index them as 
+** f(x,y):    0 <= x <= m - 1,  0 <= y <= n - 1
+** F(u,v):    -m/2 <= u <= m/2 - 1,  -n/2 <= v <= n/2 - 1
+**
+** Then the forward and inverse transforms are related as
+**
+** Forward:
+**
+** F(u,v) = \sum_{x=0}^{m-1} \sum_{y=0}^{n-1} 
+**                      f(x,y) \exp{-2\pi i (ux/m + vy/n)}
+**
+**
+** Inverse:
+**
+** f(x,y) = 1/(mn) \sum_{u=-m/2}^{m/2-1} \sum_{v=-n/2}^{n/2-1} 
+**                      F(u,v) \exp{2\pi i (ux/m + vy/n)}
+**  
+** Therefore, the transforms have these properties:
+** 1.  \sum_x \sum_y  f(x,y) = F(0,0)
+** 2.  m n \sum_x \sum_y |f(x,y)|^2 = \sum_u \sum_v |F(u,v)|^2
+**
+*/
+//DPCOMPLEX     *stageBuff;  /* buffer to hold a row or column at a time */
+//COMPLEX       *bigBuff;    /* a pointer to the input array */
+/*
+ *      These macros move complex data between bigBuff and
+ *      stageBuff
+ */
+inline void LoadRow(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 row, U32 cols)
+{
+    for (U32 j = row*cols, k = 0 ; k < cols ; j++, k++)
+        {
+                stageBuff[k].re = bigBuff[j].re;
+                stageBuff[k].im = bigBuff[j].im;
+        }
+}
+inline void StoreRow(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 row, U32 cols)
+{
+    for (U32 j = row*cols, k = 0 ; k < cols ; j++, k++)
+        {
+                bigBuff[j].re = (F32)stageBuff[k].re;
+                bigBuff[j].im = (F32)stageBuff[k].im;
+        }
+}
+inline void LoadCol(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 col, U32 rows, U32 cols)
+{
+    for (U32 j = col,k = 0 ; k < rows ; j+=cols, k++)
+        {
+                stageBuff[k].re = bigBuff[j].re;
+                stageBuff[k].im = bigBuff[j].im;
+        }
+}
+inline void StoreCol(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 col, U32 rows, U32 cols)
+{
+    for (U32 j = col,k = 0 ; k < rows ; j+=cols, k++)
+        {
+                bigBuff[j].re = (F32)stageBuff[k].re;
+                bigBuff[j].im = (F32)stageBuff[k].im;
+        }
+}
+/* do something with an error message */
+inline void handle_error(S8* msg)
+{
+        llerrs << msg << llendl;
+}
+/* 
+** compute DFT: forward if direction==0, inverse if direction==1
+** array must be COMPLEX 
+*/ 
+BOOL fft(const LLFFTPlan& plan, COMPLEX *array, S32 rows, S32 cols, S32 direction)
+{
+        S32 i;
+        
+        if (!plan.valid() || plan.rows() != rows || plan.cols() != cols)
+                return FALSE;
+        
+        /* compute transform row by row */
+        
+        if(cols>1)
+        {
+                for(i=0;i<rows;i++) 
+                {
+                        LoadRow(plan.buffer(), array, i, cols);
+                        FFT842(direction, cols, plan.buffer());
+                        StoreRow(plan.buffer(), array, i, cols);
+                }
+        }
+        
+        if(rows<2) /* done */
+        {
+                //freeBuffer();
+                return TRUE;
+        }
+        
+        
+        /* compute transform column by column */
+        
+        for(i=0;i<cols;i++) 
+        {
+                LoadCol(plan.buffer(), array, i, rows, cols);
+                FFT842(direction, rows, plan.buffer());
+                StoreCol(plan.buffer(), array, i, rows, cols);
+        }
+        
+        //freeBuffer();
+        
+        return TRUE;
+}
+/*
+** FFT842
+** This routine replaces the input DPCOMPLEX vector by its
+** finite discrete complex fourier transform if in==0.
+** It replaces the input DPCOMPLEX vector by its
+** finite discrete complex inverse fourier transform if in==1.
+**
+**     in - FORWARD or INVERSE
+**     n - length of vector
+**     b - input vector
+**
+** It performs as many base 8 iterations as possible and
+** then finishes with a base 4 iteration or a base 2 
+** iteration if needed.
+**
+** Ported from the FORTRAN code in Programming for Digital Signal Processing,
+** IEEE Press 1979, Section 1, by G. D. Bergland and M. T. Dolan
+**
+*/
+void FFT842(S32 in, S32 n, DPCOMPLEX *b)
+{
+        F64 fn, r, fi;
+        
+        S32 L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15;
+        S32 j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;
+        S32 i, j, ij, ji, ij1, ji1;
+        S32 n2pow, n8pow, nthpo, ipass, nxtlt, lengt;
+        
+        n2pow = fastlog2(n);
+        nthpo = n;
+        fn = 1.0 / (F64)nthpo;
+        
+        
+        if(in==FORWARD)
+        {
+                /* take conjugate  */
+                for(i=0;i<n;i++)
+                {
+                        b[i].im *= -1.0;
+                }
+        }
+        
+        if(in==INVERSE)
+        {
+                /* scramble inputs */
+                for(i=0,j=n/2;j<n;i++,j++) 
+                {
+                        r  = b[j].re;
+                        fi = b[j].im;
+                        b[j].re = b[i].re;
+                        b[j].im = b[i].im;
+                        b[i].re = r;
+                        b[i].im = fi;
+                }
+        }
+        
+        n8pow = n2pow/3;
+        
+        if(n8pow) 
+    {
+                /* radix 8 iterations */
+                for(ipass=1;ipass<=n8pow;ipass++) 
+                {
+                        nxtlt = 0x1 << (n2pow - 3*ipass);
+                        lengt = 8*nxtlt;
+                        R8TX(nxtlt,nthpo,lengt,
+                                b,b+nxtlt,b+2*nxtlt,
+                                b+3*nxtlt,b+4*nxtlt,b+5*nxtlt,
+                                b+6*nxtlt,b+7*nxtlt);
+                }
+    }
+        
+        if(n2pow%3 == 1) 
+    {
+                /* radix 2 iteration needed */
+                R2TX(nthpo,b,b+1); 
+    }
+        
+        
+        if(n2pow%3 == 2)  
+    {
+                /* radix 4 iteration needed */
+                R4TX(nthpo,b,b+1,b+2,b+3); 
+    }
+        
+        
+        
+        for(j=1;j<=15;j++) 
+    {
+                L[j] = 1;
+                if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j);
+    }
+        L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7];
+        L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15];
+        
+        ij = 1;
+    
+        for(j1=1;j1<=L1;j1++)
+                for(j2=j1;j2<=L2;j2+=L1)
+                        for(j3=j2;j3<=L3;j3+=L2)
+                                for(j4=j3;j4<=L4;j4+=L3)
+                                        for(j5=j4;j5<=L5;j5+=L4)
+                                                for(j6=j5;j6<=L6;j6+=L5)
+                                                        for(j7=j6;j7<=L7;j7+=L6)
+                                                                for(j8=j7;j8<=L8;j8+=L7)
+                                                                        for(j9=j8;j9<=L9;j9+=L8)
+                                                                                for(j10=j9;j10<=L10;j10+=L9)
+                                                                                        for(j11=j10;j11<=L11;j11+=L10)
+                                                                                                for(j12=j11;j12<=L12;j12+=L11)
+                                                                                                        for(j13=j12;j13<=L13;j13+=L12)
+                                                                                                                for(j14=j13;j14<=L14;j14+=L13)
+                                                                                                                        for(ji=j14;ji<=L15;ji+=L14) 
+                                                                                                                        {
+                                                                                                                                ij1 = ij-1;
+                                                                                                                                ji1 = ji-1;
+                                                                                                                                
+                                                                                                                                if(ij-ji<0) 
+                                                                                                                                {
+                                                                                                                                        r = b[ij1].re;
+                                                                                                                                        b[ij1].re = b[ji1].re;
+                                                                                                                                        b[ji1].re = r;
+                                                                                                                                        fi = b[ij1].im;
+                                                                                                                                        b[ij1].im = b[ji1].im;
+                                                                                                                                        b[ji1].im = fi;
+                                                                                                                                }
+                                                                                                                                ij++;
+                                                                                                                        }
+                                                                                                                        
+                                                                                                                        if(in==FORWARD)  // take conjugates & unscramble outputs
+                                                                                                                        {
+                                                                                                                                for(i=0,j=n/2;j<n;i++,j++)
+                                                                                                                                {
+                                                                                                                                        r = b[j].re; 
+                                                                                                                                        fi = b[j].im;
+                                                                                                                                        b[j].re = b[i].re;
+                                                                                                                                        b[j].im = -b[i].im;
+                                                                                                                                        b[i].re = r;
+                                                                                                                                        b[i].im = -fi;
+                                                                                                                                }
+                                                                                                                        }
+                                                                                                                        if(in==INVERSE) // scale outputs
+                                                                                                                        {
+                                                                                                                                for(i=0;i<nthpo;i++) 
+                                                                                                                                {
+                                                                                                                                        b[i].re *= fn;
+                                                                                                                                        b[i].im *= fn;
+                                                                                                                                }
+                                                                                                                        }
+}
+/*
+** radix 2 iteration subroutine
+*/
+void R2TX(S32 nthpo, DPCOMPLEX *c0, DPCOMPLEX *c1)
+{
+        S32 k,kk;
+        F64 *cr0, *ci0, *cr1, *ci1, r1, fi1;
+        
+        cr0 = &(c0[0].re);
+        ci0 = &(c0[0].im);
+        cr1 = &(c1[0].re);
+        ci1 = &(c1[0].im);
+        
+        for(k = 0; k < nthpo; k += 2) 
+    {
+                kk = k*2;
+                
+                r1 = cr0[kk] + cr1[kk];
+                cr1[kk] = cr0[kk] - cr1[kk];
+                cr0[kk] = r1;
+                fi1 = ci0[kk] + ci1[kk];
+                ci1[kk] = ci0[kk] - ci1[kk];
+                ci0[kk] = fi1;
+    }
+}
+/*
+** radix 4 iteration subroutine
+*/
+void R4TX(S32 nthpo, DPCOMPLEX *c0, DPCOMPLEX *c1, DPCOMPLEX *c2, DPCOMPLEX *c3)
+{
+        S32 k,kk;
+        F64 *cr0, *ci0, *cr1, *ci1, *cr2, *ci2, *cr3, *ci3;
+        F64 r1,r2,r3,r4,i1,i2,i3,i4;
+        
+        cr0 = &(c0[0].re);
+        cr1 = &(c1[0].re);
+        cr2 = &(c2[0].re);
+        cr3 = &(c3[0].re);
+        ci0 = &(c0[0].im);
+        ci1 = &(c1[0].im);
+        ci2 = &(c2[0].im);
+        ci3 = &(c3[0].im);
+        
+        for(k = 1; k <= nthpo; k += 4) 
+    {
+                kk = (k-1)*2;  /* real and imag parts alternate */
+                
+                r1 = cr0[kk] + cr2[kk];
+                r2 = cr0[kk] - cr2[kk];
+                r3 = cr1[kk] + cr3[kk];
+                r4 = cr1[kk] - cr3[kk];
+                i1 = ci0[kk] + ci2[kk];
+                i2 = ci0[kk] - ci2[kk];
+                i3 = ci1[kk] + ci3[kk];
+                i4 = ci1[kk] - ci3[kk];
+                cr0[kk] = r1 + r3;
+                ci0[kk] = i1 + i3;
+                cr1[kk] = r1 - r3;
+                ci1[kk] = i1 - i3;
+                cr2[kk] = r2 - i4;
+                ci2[kk] = i2 + r4;
+                cr3[kk] = r2 + i4;
+                ci3[kk] = i2 - r4;
+    }
+}
+        
+/*
+** radix 8 iteration subroutine
+*/
+void R8TX(S32 nxtlt, S32 nthpo, S32 lengt, DPCOMPLEX *cc0, DPCOMPLEX *cc1, DPCOMPLEX *cc2,
+                  DPCOMPLEX *cc3, DPCOMPLEX *cc4, DPCOMPLEX *cc5, DPCOMPLEX *cc6, DPCOMPLEX *cc7)
+{
+        S32 j,k,kk;
+        F64 scale, arg, tr, ti;
+        F64 c1,c2,c3,c4,c5,c6,c7;
+        F64 s1,s2,s3,s4,s5,s6,s7;
+        F64 ar0,ar1,ar2,ar3,ar4,ar5,ar6,ar7;
+        F64 ai0,ai1,ai2,ai3,ai4,ai5,ai6,ai7;
+        F64 br0,br1,br2,br3,br4,br5,br6,br7;
+        F64 bi0,bi1,bi2,bi3,bi4,bi5,bi6,bi7;
+        
+        F64 *cr0,*cr1,*cr2,*cr3,*cr4,*cr5,*cr6,*cr7;
+        F64 *ci0,*ci1,*ci2,*ci3,*ci4,*ci5,*ci6,*ci7;
+        
+        cr0 = &(cc0[0].re);
+        cr1 = &(cc1[0].re);
+        cr2 = &(cc2[0].re);
+        cr3 = &(cc3[0].re);
+        cr4 = &(cc4[0].re);
+        cr5 = &(cc5[0].re);
+        cr6 = &(cc6[0].re);
+        cr7 = &(cc7[0].re);
+        
+        ci0 = &(cc0[0].im);
+        ci1 = &(cc1[0].im);
+        ci2 = &(cc2[0].im);
+        ci3 = &(cc3[0].im);
+        ci4 = &(cc4[0].im);
+        ci5 = &(cc5[0].im);
+        ci6 = &(cc6[0].im);
+        ci7 = &(cc7[0].im);
+        
+        
+        scale = F_TWO_PI/lengt;
+        
+        for(j = 1; j <= nxtlt; j++) 
+    {
+                arg = (j-1)*scale;
+                c1 = cos(arg);
+                s1 = sin(arg);
+                c2 = c1*c1 - s1*s1;
+                s2 = c1*s1 + c1*s1;
+                c3 = c1*c2 - s1*s2;
+                s3 = c2*s1 + s2*c1;
+                c4 = c2*c2 - s2*s2;
+                s4 = c2*s2 + c2*s2;
+                c5 = c2*c3 - s2*s3;
+                s5 = c3*s2 + s3*c2;
+                c6 = c3*c3 - s3*s3;
+                s6 = c3*s3 + c3*s3;
+                c7 = c3*c4 - s3*s4;
+                s7 = c4*s3 + s4*c3;
+                
+                for(k = j; k <= nthpo; k += lengt) 
+                {
+                        kk = (k-1)*2; /* index by twos; re & im alternate */
+                        
+                        ar0 = cr0[kk] + cr4[kk];
+                        ar1 = cr1[kk] + cr5[kk];
+                        ar2 = cr2[kk] + cr6[kk];
+                        ar3 = cr3[kk] + cr7[kk];
+                        ar4 = cr0[kk] - cr4[kk];
+                        ar5 = cr1[kk] - cr5[kk];
+                        ar6 = cr2[kk] - cr6[kk];
+                        ar7 = cr3[kk] - cr7[kk];
+                        ai0 = ci0[kk] + ci4[kk];
+                        ai1 = ci1[kk] + ci5[kk];
+                        ai2 = ci2[kk] + ci6[kk];
+                        ai3 = ci3[kk] + ci7[kk];
+                        ai4 = ci0[kk] - ci4[kk];
+                        ai5 = ci1[kk] - ci5[kk];
+                        ai6 = ci2[kk] - ci6[kk];
+                        ai7 = ci3[kk] - ci7[kk];
+                        br0 = ar0 + ar2;
+                        br1 = ar1 + ar3;
+                        br2 = ar0 - ar2;
+                        br3 = ar1 - ar3;
+                        br4 = ar4 - ai6;
+                        br5 = ar5 - ai7;
+                        br6 = ar4 + ai6;
+                        br7 = ar5 + ai7;
+                        bi0 = ai0 + ai2;
+                        bi1 = ai1 + ai3;
+                        bi2 = ai0 - ai2;
+                        bi3 = ai1 - ai3;
+                        bi4 = ai4 + ar6;
+                        bi5 = ai5 + ar7;
+                        bi6 = ai4 - ar6;
+                        bi7 = ai5 - ar7;
+                        cr0[kk] = br0 + br1;
+                        ci0[kk] = bi0 + bi1;
+                        if(j > 1) 
+                        {
+                                cr1[kk] = c4*(br0-br1) - s4*(bi0-bi1);
+                                cr2[kk] = c2*(br2-bi3) - s2*(bi2+br3);
+                                cr3[kk] = c6*(br2+bi3) - s6*(bi2-br3);
+                                ci1[kk] = c4*(bi0-bi1) + s4*(br0-br1);
+                                ci2[kk] = c2*(bi2+br3) + s2*(br2-bi3);
+                                ci3[kk] = c6*(bi2-br3) + s6*(br2+bi3);
+                                tr = OO_SQRT2*(br5-bi5);
+                                ti = OO_SQRT2*(br5+bi5);
+                                cr4[kk] = c1*(br4+tr) - s1*(bi4+ti);
+                                ci4[kk] = c1*(bi4+ti) + s1*(br4+tr);
+                                cr5[kk] = c5*(br4-tr) - s5*(bi4-ti);
+                                ci5[kk] = c5*(bi4-ti) + s5*(br4-tr);
+                                tr = -OO_SQRT2*(br7+bi7);
+                                ti = OO_SQRT2*(br7-bi7);
+                                cr6[kk] = c3*(br6+tr) - s3*(bi6+ti);
+                                ci6[kk] = c3*(bi6+ti) + s3*(br6+tr);
+                                cr7[kk] = c7*(br6-tr) - s7*(bi6-ti);
+                                ci7[kk] = c7*(bi6-ti) + s7*(br6-tr);
+                        }
+                        else 
+                        {
+                                cr1[kk] = br0 - br1;
+                                cr2[kk] = br2 - bi3;
+                                cr3[kk] = br2 + bi3;
+                                ci1[kk] = bi0 - bi1;
+                                ci2[kk] = bi2 + br3;
+                                ci3[kk] = bi2 - br3;
+                                tr = OO_SQRT2*(br5-bi5);
+                                ti = OO_SQRT2*(br5+bi5);
+                                cr4[kk] = br4 + tr;
+                                ci4[kk] = bi4 + ti;
+                                cr5[kk] = br4 - tr;
+                                ci5[kk] = bi4 - ti;
+                                tr = -OO_SQRT2*(br7+bi7);
+                                ti = OO_SQRT2*(br7-bi7);
+                                cr6[kk] = br6 + tr;
+                                ci6[kk] = bi6 + ti;
+                                cr7[kk] = br6 - tr;
+                                ci7[kk] = bi6 - ti;
+                        }
+                }
+    }
+}
+        
+/* see if exactly one bit is set in integer argument */
+S32 power_of_2(S32 n)
+{
+        S32 bits=0;
+        while(n)
+        {
+                bits += n & 1;
+                n >>= 1;
+        }
+        return(bits==1);
+}
+/* get binary log of integer argument; exact if n a power of 2 */
+S32 fastlog2(S32 n)
+{
+        S32 log = -1;
+        while(n)
+        {
+                log++;
+                n >>= 1;
+        }
+        return(log);
+}
author	Jacek Antonelli	2008-08-15 23:44:46 -0500
committer	Jacek Antonelli	2008-08-15 23:44:46 -0500
commit	38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4 (patch)
tree	adca584755d22ca041a2dbfc35d4eca01f70b32c /linden/indra/newview/llfft.cpp
parent	README.txt (diff)
download	meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.zip meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.gz meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.bz2 meta-impy-38d6d37f2d982fa959e9e8a4a3f7e1ccfad7b5d4.tar.xz

diff --git a/linden/indra/newview/llfft.cpp b/linden/indra/newview/llfft.cpp new file mode 100644 index 0000000..9652786 --- /dev/null +++ b/linden/indra/newview/llfft.cpp
@@ -0,0 +1,576 @@
	1	/**
	2	* @file llfft.cpp
	3	* @brief FFT function implementations
	4	*
	5	* Copyright (c) 2003-2007, Linden Research, Inc.
	6	*
	7	* The source code in this file ("Source Code") is provided by Linden Lab
	8	* to you under the terms of the GNU General Public License, version 2.0
	9	* ("GPL"), unless you have obtained a separate licensing agreement
	10	* ("Other License"), formally executed by you and Linden Lab. Terms of
	11	* the GPL can be found in doc/GPL-license.txt in this distribution, or
	12	* online at http://secondlife.com/developers/opensource/gplv2
	13	*
	14	* There are special exceptions to the terms and conditions of the GPL as
	15	* it is applied to this Source Code. View the full text of the exception
	16	* in the file doc/FLOSS-exception.txt in this software distribution, or
	17	* online at http://secondlife.com/developers/opensource/flossexception
	18	*
	19	* By copying, modifying or distributing this software, you acknowledge
	20	* that you have read and understood your obligations described above,
	21	* and agree to abide by those obligations.
	22	*
	23	* ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
	24	* WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
	25	* COMPLETENESS OR PERFORMANCE.
	26	*/
	27
	28	/*
	29	* Fast Fourier Transform
	30	*
	31	*/
	32	#include "llviewerprecompiledheaders.h"
	33
	34	#include "llfft.h"
	35	#include "llerror.h"
	36
	37	/*
	38	**
	39	*********************************************************************
	40	Forward and inverse discrete Fourier transforms on complex data
	41	*********************************************************************
	42	**
	43	**
	44	** forward_fft(array, rows, cols)
	45	** COMPLEX *array;
	46	** S32 rows, cols;
	47	**
	48	** inverse_fft(array, rows, cols)
	49	** COMPLEX *array;
	50	** S32 rows, cols;
	51	**
	52	** These entry points compute the forward and inverse DFT's, respectively,
	53	** of a single-precision COMPLEX array.
	54	**
	55	** The result is a COMPLEX array of the same size, returned in
	56	** the same space as the input array. That is, the original array is
	57	** overwritten and destroyed.
	58	**
	59	** Rows and columns must each be an integral power of 2.
	60	**
	61	** These routines return integer value -1 if an error was detected,
	62	** 0 otherwise
	63	**
	64	** This implementation of the DFT uses the transform pair defined as follows.
	65	**
	66	** Let there be two COMPLEX arrays each with n rows and m columns
	67	** Index them as
	68	** f(x,y): 0 <= x <= m - 1, 0 <= y <= n - 1
	69	** F(u,v): -m/2 <= u <= m/2 - 1, -n/2 <= v <= n/2 - 1
	70	**
	71	** Then the forward and inverse transforms are related as
	72	**
	73	** Forward:
	74	**
	75	** F(u,v) = \sum_{x=0}^{m-1} \sum_{y=0}^{n-1}
	76	** f(x,y) \exp{-2\pi i (ux/m + vy/n)}
	77	**
	78	**
	79	** Inverse:
	80	**
	81	** f(x,y) = 1/(mn) \sum_{u=-m/2}^{m/2-1} \sum_{v=-n/2}^{n/2-1}
	82	** F(u,v) \exp{2\pi i (ux/m + vy/n)}
	83	**
	84	** Therefore, the transforms have these properties:
	85	** 1. \sum_x \sum_y f(x,y) = F(0,0)
	86	** 2. m n \sum_x \sum_y \|f(x,y)\|^2 = \sum_u \sum_v \|F(u,v)\|^2
	87	**
	88	*/
	89
	90
	91	//DPCOMPLEX stageBuff; / buffer to hold a row or column at a time */
	92	//COMPLEX bigBuff; / a pointer to the input array */
	93
	94
	95	/*
	96	* These macros move complex data between bigBuff and
	97	* stageBuff
	98	*/
	99
	100	inline void LoadRow(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 row, U32 cols)
	101	{
	102	for (U32 j = row*cols, k = 0 ; k < cols ; j++, k++)
	103	{
	104	stageBuff[k].re = bigBuff[j].re;
	105	stageBuff[k].im = bigBuff[j].im;
	106	}
	107	}
	108
	109	inline void StoreRow(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 row, U32 cols)
	110	{
	111	for (U32 j = row*cols, k = 0 ; k < cols ; j++, k++)
	112	{
	113	bigBuff[j].re = (F32)stageBuff[k].re;
	114	bigBuff[j].im = (F32)stageBuff[k].im;
	115	}
	116	}
	117
	118	inline void LoadCol(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 col, U32 rows, U32 cols)
	119	{
	120	for (U32 j = col,k = 0 ; k < rows ; j+=cols, k++)
	121	{
	122	stageBuff[k].re = bigBuff[j].re;
	123	stageBuff[k].im = bigBuff[j].im;
	124	}
	125	}
	126
	127	inline void StoreCol(DPCOMPLEX* stageBuff, COMPLEX* bigBuff, U32 col, U32 rows, U32 cols)
	128	{
	129	for (U32 j = col,k = 0 ; k < rows ; j+=cols, k++)
	130	{
	131	bigBuff[j].re = (F32)stageBuff[k].re;
	132	bigBuff[j].im = (F32)stageBuff[k].im;
	133	}
	134	}
	135
	136
	137	/* do something with an error message */
	138	inline void handle_error(S8* msg)
	139	{
	140	llerrs << msg << llendl;
	141	}
	142
	143
	144	/*
	145	** compute DFT: forward if direction==0, inverse if direction==1
	146	** array must be COMPLEX
	147	*/
	148	BOOL fft(const LLFFTPlan& plan, COMPLEX *array, S32 rows, S32 cols, S32 direction)
	149	{
	150	S32 i;
	151
	152	if (!plan.valid() \|\| plan.rows() != rows \|\| plan.cols() != cols)
	153	return FALSE;
	154
	155	/* compute transform row by row */
	156
	157	if(cols>1)
	158	{
	159	for(i=0;i<rows;i++)
	160	{
	161	LoadRow(plan.buffer(), array, i, cols);
	162	FFT842(direction, cols, plan.buffer());
	163	StoreRow(plan.buffer(), array, i, cols);
	164	}
	165	}
	166
	167	if(rows<2) /* done */
	168	{
	169	//freeBuffer();
	170	return TRUE;
	171	}
	172
	173
	174	/* compute transform column by column */
	175
	176	for(i=0;i<cols;i++)
	177	{
	178	LoadCol(plan.buffer(), array, i, rows, cols);
	179	FFT842(direction, rows, plan.buffer());
	180	StoreCol(plan.buffer(), array, i, rows, cols);
	181	}
	182
	183	//freeBuffer();
	184
	185	return TRUE;
	186	}
	187
	188
	189
	190	/*
	191	** FFT842
	192	** This routine replaces the input DPCOMPLEX vector by its
	193	** finite discrete complex fourier transform if in==0.
	194	** It replaces the input DPCOMPLEX vector by its
	195	** finite discrete complex inverse fourier transform if in==1.
	196	**
	197	** in - FORWARD or INVERSE
	198	** n - length of vector
	199	** b - input vector
	200	**
	201	** It performs as many base 8 iterations as possible and
	202	** then finishes with a base 4 iteration or a base 2
	203	** iteration if needed.
	204	**
	205	** Ported from the FORTRAN code in Programming for Digital Signal Processing,
	206	** IEEE Press 1979, Section 1, by G. D. Bergland and M. T. Dolan
	207	**
	208	*/
	209	void FFT842(S32 in, S32 n, DPCOMPLEX *b)
	210	{
	211	F64 fn, r, fi;
	212
	213	S32 L[16],L1,L2,L3,L4,L5,L6,L7,L8,L9,L10,L11,L12,L13,L14,L15;
	214	S32 j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14;
	215	S32 i, j, ij, ji, ij1, ji1;
	216	S32 n2pow, n8pow, nthpo, ipass, nxtlt, lengt;
	217
	218	n2pow = fastlog2(n);
	219	nthpo = n;
	220	fn = 1.0 / (F64)nthpo;
	221
	222
	223	if(in==FORWARD)
	224	{
	225	/* take conjugate */
	226	for(i=0;i<n;i++)
	227	{
	228	b[i].im *= -1.0;
	229	}
	230	}
	231
	232	if(in==INVERSE)
	233	{
	234	/* scramble inputs */
	235	for(i=0,j=n/2;j<n;i++,j++)
	236	{
	237	r = b[j].re;
	238	fi = b[j].im;
	239	b[j].re = b[i].re;
	240	b[j].im = b[i].im;
	241	b[i].re = r;
	242	b[i].im = fi;
	243	}
	244	}
	245
	246	n8pow = n2pow/3;
	247
	248	if(n8pow)
	249	{
	250	/* radix 8 iterations */
	251	for(ipass=1;ipass<=n8pow;ipass++)
	252	{
	253	nxtlt = 0x1 << (n2pow - 3*ipass);
	254	lengt = 8*nxtlt;
	255	R8TX(nxtlt,nthpo,lengt,
	256	b,b+nxtlt,b+2*nxtlt,
	257	b+3nxtlt,b+4nxtlt,b+5*nxtlt,
	258	b+6nxtlt,b+7nxtlt);
	259	}
	260	}
	261
	262	if(n2pow%3 == 1)
	263	{
	264	/* radix 2 iteration needed */
	265	R2TX(nthpo,b,b+1);
	266	}
	267
	268
	269	if(n2pow%3 == 2)
	270	{
	271	/* radix 4 iteration needed */
	272	R4TX(nthpo,b,b+1,b+2,b+3);
	273	}
	274
	275
	276
	277	for(j=1;j<=15;j++)
	278	{
	279	L[j] = 1;
	280	if(j-n2pow <= 0) L[j] = 0x1 << (n2pow + 1 - j);
	281	}
	282	L15=L[1];L14=L[2];L13=L[3];L12=L[4];L11=L[5];L10=L[6];L9=L[7];
	283	L8=L[8];L7=L[9];L6=L[10];L5=L[11];L4=L[12];L3=L[13];L2=L[14];L1=L[15];
	284
	285	ij = 1;
	286
	287	for(j1=1;j1<=L1;j1++)
	288	for(j2=j1;j2<=L2;j2+=L1)
	289	for(j3=j2;j3<=L3;j3+=L2)
	290	for(j4=j3;j4<=L4;j4+=L3)
	291	for(j5=j4;j5<=L5;j5+=L4)
	292	for(j6=j5;j6<=L6;j6+=L5)
	293	for(j7=j6;j7<=L7;j7+=L6)
	294	for(j8=j7;j8<=L8;j8+=L7)
	295	for(j9=j8;j9<=L9;j9+=L8)
	296	for(j10=j9;j10<=L10;j10+=L9)
	297	for(j11=j10;j11<=L11;j11+=L10)
	298	for(j12=j11;j12<=L12;j12+=L11)
	299	for(j13=j12;j13<=L13;j13+=L12)
	300	for(j14=j13;j14<=L14;j14+=L13)
	301	for(ji=j14;ji<=L15;ji+=L14)
	302	{
	303	ij1 = ij-1;
	304	ji1 = ji-1;
	305
	306	if(ij-ji<0)
	307	{
	308	r = b[ij1].re;
	309	b[ij1].re = b[ji1].re;
	310	b[ji1].re = r;
	311	fi = b[ij1].im;
	312	b[ij1].im = b[ji1].im;
	313	b[ji1].im = fi;
	314	}
	315	ij++;
	316	}
	317
	318	if(in==FORWARD) // take conjugates & unscramble outputs
	319	{
	320	for(i=0,j=n/2;j<n;i++,j++)
	321	{
	322	r = b[j].re;
	323	fi = b[j].im;
	324	b[j].re = b[i].re;
	325	b[j].im = -b[i].im;
	326	b[i].re = r;
	327	b[i].im = -fi;
	328	}
	329	}
	330
	331	if(in==INVERSE) // scale outputs
	332	{
	333	for(i=0;i<nthpo;i++)
	334	{
	335	b[i].re *= fn;
	336	b[i].im *= fn;
	337	}
	338	}
	339	}
	340
	341
	342	/*
	343	** radix 2 iteration subroutine
	344	*/
	345	void R2TX(S32 nthpo, DPCOMPLEX c0, DPCOMPLEX c1)
	346	{
	347	S32 k,kk;
	348	F64 cr0, ci0, cr1, ci1, r1, fi1;
	349
	350	cr0 = &(c0[0].re);
	351	ci0 = &(c0[0].im);
	352	cr1 = &(c1[0].re);
	353	ci1 = &(c1[0].im);
	354
	355	for(k = 0; k < nthpo; k += 2)
	356	{
	357	kk = k*2;
	358
	359	r1 = cr0[kk] + cr1[kk];
	360	cr1[kk] = cr0[kk] - cr1[kk];
	361	cr0[kk] = r1;
	362	fi1 = ci0[kk] + ci1[kk];
	363	ci1[kk] = ci0[kk] - ci1[kk];
	364	ci0[kk] = fi1;
	365	}
	366	}
	367
	368
	369	/*
	370	** radix 4 iteration subroutine
	371	*/
	372	void R4TX(S32 nthpo, DPCOMPLEX c0, DPCOMPLEX c1, DPCOMPLEX c2, DPCOMPLEX c3)
	373	{
	374	S32 k,kk;
	375	F64 cr0, ci0, cr1, ci1, cr2, ci2, cr3, ci3;
	376	F64 r1,r2,r3,r4,i1,i2,i3,i4;
	377
	378	cr0 = &(c0[0].re);
	379	cr1 = &(c1[0].re);
	380	cr2 = &(c2[0].re);
	381	cr3 = &(c3[0].re);
	382	ci0 = &(c0[0].im);
	383	ci1 = &(c1[0].im);
	384	ci2 = &(c2[0].im);
	385	ci3 = &(c3[0].im);
	386
	387	for(k = 1; k <= nthpo; k += 4)
	388	{
	389	kk = (k-1)2; / real and imag parts alternate */
	390
	391	r1 = cr0[kk] + cr2[kk];
	392	r2 = cr0[kk] - cr2[kk];
	393	r3 = cr1[kk] + cr3[kk];
	394	r4 = cr1[kk] - cr3[kk];
	395	i1 = ci0[kk] + ci2[kk];
	396	i2 = ci0[kk] - ci2[kk];
	397	i3 = ci1[kk] + ci3[kk];
	398	i4 = ci1[kk] - ci3[kk];
	399	cr0[kk] = r1 + r3;
	400	ci0[kk] = i1 + i3;
	401	cr1[kk] = r1 - r3;
	402	ci1[kk] = i1 - i3;
	403	cr2[kk] = r2 - i4;
	404	ci2[kk] = i2 + r4;
	405	cr3[kk] = r2 + i4;
	406	ci3[kk] = i2 - r4;
	407	}
	408	}
	409
	410
	411
	412	/*
	413	** radix 8 iteration subroutine
	414	*/
	415	void R8TX(S32 nxtlt, S32 nthpo, S32 lengt, DPCOMPLEX cc0, DPCOMPLEX cc1, DPCOMPLEX *cc2,
	416	DPCOMPLEX cc3, DPCOMPLEX cc4, DPCOMPLEX cc5, DPCOMPLEX cc6, DPCOMPLEX *cc7)
	417	{
	418	S32 j,k,kk;
	419	F64 scale, arg, tr, ti;
	420	F64 c1,c2,c3,c4,c5,c6,c7;
	421	F64 s1,s2,s3,s4,s5,s6,s7;
	422	F64 ar0,ar1,ar2,ar3,ar4,ar5,ar6,ar7;
	423	F64 ai0,ai1,ai2,ai3,ai4,ai5,ai6,ai7;
	424	F64 br0,br1,br2,br3,br4,br5,br6,br7;
	425	F64 bi0,bi1,bi2,bi3,bi4,bi5,bi6,bi7;
	426
	427	F64 cr0,cr1,cr2,cr3,cr4,cr5,cr6,cr7;
	428	F64 ci0,ci1,ci2,ci3,ci4,ci5,ci6,ci7;
	429
	430	cr0 = &(cc0[0].re);
	431	cr1 = &(cc1[0].re);
	432	cr2 = &(cc2[0].re);
	433	cr3 = &(cc3[0].re);
	434	cr4 = &(cc4[0].re);
	435	cr5 = &(cc5[0].re);
	436	cr6 = &(cc6[0].re);
	437	cr7 = &(cc7[0].re);
	438
	439	ci0 = &(cc0[0].im);
	440	ci1 = &(cc1[0].im);
	441	ci2 = &(cc2[0].im);
	442	ci3 = &(cc3[0].im);
	443	ci4 = &(cc4[0].im);
	444	ci5 = &(cc5[0].im);
	445	ci6 = &(cc6[0].im);
	446	ci7 = &(cc7[0].im);
	447
	448
	449	scale = F_TWO_PI/lengt;
	450
	451	for(j = 1; j <= nxtlt; j++)
	452	{
	453	arg = (j-1)*scale;
	454	c1 = cos(arg);
	455	s1 = sin(arg);
	456	c2 = c1c1 - s1s1;
	457	s2 = c1s1 + c1s1;
	458	c3 = c1c2 - s1s2;
	459	s3 = c2s1 + s2c1;
	460	c4 = c2c2 - s2s2;
	461	s4 = c2s2 + c2s2;
	462	c5 = c2c3 - s2s3;
	463	s5 = c3s2 + s3c2;
	464	c6 = c3c3 - s3s3;
	465	s6 = c3s3 + c3s3;
	466	c7 = c3c4 - s3s4;
	467	s7 = c4s3 + s4c3;
	468
	469	for(k = j; k <= nthpo; k += lengt)
	470	{
	471	kk = (k-1)2; / index by twos; re & im alternate */
	472
	473	ar0 = cr0[kk] + cr4[kk];
	474	ar1 = cr1[kk] + cr5[kk];
	475	ar2 = cr2[kk] + cr6[kk];
	476	ar3 = cr3[kk] + cr7[kk];
	477	ar4 = cr0[kk] - cr4[kk];
	478	ar5 = cr1[kk] - cr5[kk];
	479	ar6 = cr2[kk] - cr6[kk];
	480	ar7 = cr3[kk] - cr7[kk];
	481	ai0 = ci0[kk] + ci4[kk];
	482	ai1 = ci1[kk] + ci5[kk];
	483	ai2 = ci2[kk] + ci6[kk];
	484	ai3 = ci3[kk] + ci7[kk];
	485	ai4 = ci0[kk] - ci4[kk];
	486	ai5 = ci1[kk] - ci5[kk];
	487	ai6 = ci2[kk] - ci6[kk];
	488	ai7 = ci3[kk] - ci7[kk];
	489	br0 = ar0 + ar2;
	490	br1 = ar1 + ar3;
	491	br2 = ar0 - ar2;
	492	br3 = ar1 - ar3;
	493	br4 = ar4 - ai6;
	494	br5 = ar5 - ai7;
	495	br6 = ar4 + ai6;
	496	br7 = ar5 + ai7;
	497	bi0 = ai0 + ai2;
	498	bi1 = ai1 + ai3;
	499	bi2 = ai0 - ai2;
	500	bi3 = ai1 - ai3;
	501	bi4 = ai4 + ar6;
	502	bi5 = ai5 + ar7;
	503	bi6 = ai4 - ar6;
	504	bi7 = ai5 - ar7;
	505	cr0[kk] = br0 + br1;
	506	ci0[kk] = bi0 + bi1;
	507	if(j > 1)
	508	{
	509	cr1[kk] = c4(br0-br1) - s4(bi0-bi1);
	510	cr2[kk] = c2(br2-bi3) - s2(bi2+br3);
	511	cr3[kk] = c6(br2+bi3) - s6(bi2-br3);
	512	ci1[kk] = c4(bi0-bi1) + s4(br0-br1);
	513	ci2[kk] = c2(bi2+br3) + s2(br2-bi3);
	514	ci3[kk] = c6(bi2-br3) + s6(br2+bi3);
	515	tr = OO_SQRT2*(br5-bi5);
	516	ti = OO_SQRT2*(br5+bi5);
	517	cr4[kk] = c1(br4+tr) - s1(bi4+ti);
	518	ci4[kk] = c1(bi4+ti) + s1(br4+tr);
	519	cr5[kk] = c5(br4-tr) - s5(bi4-ti);
	520	ci5[kk] = c5(bi4-ti) + s5(br4-tr);
	521	tr = -OO_SQRT2*(br7+bi7);
	522	ti = OO_SQRT2*(br7-bi7);
	523	cr6[kk] = c3(br6+tr) - s3(bi6+ti);
	524	ci6[kk] = c3(bi6+ti) + s3(br6+tr);
	525	cr7[kk] = c7(br6-tr) - s7(bi6-ti);
	526	ci7[kk] = c7(bi6-ti) + s7(br6-tr);
	527	}
	528	else
	529	{
	530	cr1[kk] = br0 - br1;
	531	cr2[kk] = br2 - bi3;
	532	cr3[kk] = br2 + bi3;
	533	ci1[kk] = bi0 - bi1;
	534	ci2[kk] = bi2 + br3;
	535	ci3[kk] = bi2 - br3;
	536	tr = OO_SQRT2*(br5-bi5);
	537	ti = OO_SQRT2*(br5+bi5);
	538	cr4[kk] = br4 + tr;
	539	ci4[kk] = bi4 + ti;
	540	cr5[kk] = br4 - tr;
	541	ci5[kk] = bi4 - ti;
	542	tr = -OO_SQRT2*(br7+bi7);
	543	ti = OO_SQRT2*(br7-bi7);
	544	cr6[kk] = br6 + tr;
	545	ci6[kk] = bi6 + ti;
	546	cr7[kk] = br6 - tr;
	547	ci7[kk] = bi6 - ti;
	548	}
	549	}
	550	}
	551	}
	552
	553
	554	/* see if exactly one bit is set in integer argument */
	555	S32 power_of_2(S32 n)
	556	{
	557	S32 bits=0;
	558	while(n)
	559	{
	560	bits += n & 1;
	561	n >>= 1;
	562	}
	563	return(bits==1);
	564	}
	565
	566	/* get binary log of integer argument; exact if n a power of 2 */
	567	S32 fastlog2(S32 n)
	568	{
	569	S32 log = -1;
	570	while(n)
	571	{
	572	log++;
	573	n >>= 1;
	574	}
	575	return(log);
	576	}