From f205de7847da7ae1c10212d82e7042d0100b4ce0 Mon Sep 17 00:00:00 2001 From: dan miller Date: Fri, 19 Oct 2007 05:24:38 +0000 Subject: from the start... checking in ode-0.9 --- libraries/ode-0.9/ode/src/fastltsolve.c | 199 ++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 libraries/ode-0.9/ode/src/fastltsolve.c (limited to 'libraries/ode-0.9/ode/src/fastltsolve.c') diff --git a/libraries/ode-0.9/ode/src/fastltsolve.c b/libraries/ode-0.9/ode/src/fastltsolve.c new file mode 100644 index 0000000..eb950f6 --- /dev/null +++ b/libraries/ode-0.9/ode/src/fastltsolve.c @@ -0,0 +1,199 @@ +/* generated code, do not edit. */ + +#include "ode/matrix.h" + +/* solve L^T * x=b, with b containing 1 right hand side. + * L is an n*n lower triangular matrix with ones on the diagonal. + * L is stored by rows and its leading dimension is lskip. + * b is an n*1 matrix that contains the right hand side. + * b is overwritten with x. + * this processes blocks of 4. + */ + +void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1) +{ + /* declare variables - Z matrix, p and q vectors, etc */ + dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex; + const dReal *ell; + int lskip2,lskip3,i,j; + /* special handling for L and B because we're solving L1 *transpose* */ + L = L + (n-1)*(lskip1+1); + B = B + n-1; + lskip1 = -lskip1; + /* compute lskip values */ + lskip2 = 2*lskip1; + lskip3 = 3*lskip1; + /* compute all 4 x 1 blocks of X */ + for (i=0; i <= n-4; i+=4) { + /* compute all 4 x 1 block of X, from rows i..i+4-1 */ + /* set the Z matrix to 0 */ + Z11=0; + Z21=0; + Z31=0; + Z41=0; + ell = L - i; + ex = B; + /* the inner loop that computes outer products and adds them to Z */ + for (j=i-4; j >= 0; j -= 4) { + /* load p and q values */ + p1=ell[0]; + q1=ex[0]; + p2=ell[-1]; + p3=ell[-2]; + p4=ell[-3]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + m21 = p2 * q1; + m31 = p3 * q1; + m41 = p4 * q1; + ell += lskip1; + Z11 += m11; + Z21 += m21; + Z31 += m31; + Z41 += m41; + /* load p and q values */ + p1=ell[0]; + q1=ex[-1]; + p2=ell[-1]; + p3=ell[-2]; + p4=ell[-3]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + m21 = p2 * q1; + m31 = p3 * q1; + m41 = p4 * q1; + ell += lskip1; + Z11 += m11; + Z21 += m21; + Z31 += m31; + Z41 += m41; + /* load p and q values */ + p1=ell[0]; + q1=ex[-2]; + p2=ell[-1]; + p3=ell[-2]; + p4=ell[-3]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + m21 = p2 * q1; + m31 = p3 * q1; + m41 = p4 * q1; + ell += lskip1; + Z11 += m11; + Z21 += m21; + Z31 += m31; + Z41 += m41; + /* load p and q values */ + p1=ell[0]; + q1=ex[-3]; + p2=ell[-1]; + p3=ell[-2]; + p4=ell[-3]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + m21 = p2 * q1; + m31 = p3 * q1; + m41 = p4 * q1; + ell += lskip1; + ex -= 4; + Z11 += m11; + Z21 += m21; + Z31 += m31; + Z41 += m41; + /* end of inner loop */ + } + /* compute left-over iterations */ + j += 4; + for (; j > 0; j--) { + /* load p and q values */ + p1=ell[0]; + q1=ex[0]; + p2=ell[-1]; + p3=ell[-2]; + p4=ell[-3]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + m21 = p2 * q1; + m31 = p3 * q1; + m41 = p4 * q1; + ell += lskip1; + ex -= 1; + Z11 += m11; + Z21 += m21; + Z31 += m31; + Z41 += m41; + } + /* finish computing the X(i) block */ + Z11 = ex[0] - Z11; + ex[0] = Z11; + p1 = ell[-1]; + Z21 = ex[-1] - Z21 - p1*Z11; + ex[-1] = Z21; + p1 = ell[-2]; + p2 = ell[-2+lskip1]; + Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21; + ex[-2] = Z31; + p1 = ell[-3]; + p2 = ell[-3+lskip1]; + p3 = ell[-3+lskip2]; + Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; + ex[-3] = Z41; + /* end of outer loop */ + } + /* compute rows at end that are not a multiple of block size */ + for (; i < n; i++) { + /* compute all 1 x 1 block of X, from rows i..i+1-1 */ + /* set the Z matrix to 0 */ + Z11=0; + ell = L - i; + ex = B; + /* the inner loop that computes outer products and adds them to Z */ + for (j=i-4; j >= 0; j -= 4) { + /* load p and q values */ + p1=ell[0]; + q1=ex[0]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + ell += lskip1; + Z11 += m11; + /* load p and q values */ + p1=ell[0]; + q1=ex[-1]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + ell += lskip1; + Z11 += m11; + /* load p and q values */ + p1=ell[0]; + q1=ex[-2]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + ell += lskip1; + Z11 += m11; + /* load p and q values */ + p1=ell[0]; + q1=ex[-3]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + ell += lskip1; + ex -= 4; + Z11 += m11; + /* end of inner loop */ + } + /* compute left-over iterations */ + j += 4; + for (; j > 0; j--) { + /* load p and q values */ + p1=ell[0]; + q1=ex[0]; + /* compute outer product and add it to the Z matrix */ + m11 = p1 * q1; + ell += lskip1; + ex -= 1; + Z11 += m11; + } + /* finish computing the X(i) block */ + Z11 = ex[0] - Z11; + ex[0] = Z11; + } +} -- cgit v1.1