aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/ode-0.9\/ode/src/fastltsolve.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/ode-0.9\/ode/src/fastltsolve.c')
-rwxr-xr-xlibraries/ode-0.9\/ode/src/fastltsolve.c199
1 files changed, 199 insertions, 0 deletions
diff --git a/libraries/ode-0.9\/ode/src/fastltsolve.c b/libraries/ode-0.9\/ode/src/fastltsolve.c
new file mode 100755
index 0000000..eb950f6
--- /dev/null
+++ b/libraries/ode-0.9\/ode/src/fastltsolve.c
@@ -0,0 +1,199 @@
1/* generated code, do not edit. */
2
3#include "ode/matrix.h"
4
5/* solve L^T * x=b, with b containing 1 right hand side.
6 * L is an n*n lower triangular matrix with ones on the diagonal.
7 * L is stored by rows and its leading dimension is lskip.
8 * b is an n*1 matrix that contains the right hand side.
9 * b is overwritten with x.
10 * this processes blocks of 4.
11 */
12
13void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1)
14{
15 /* declare variables - Z matrix, p and q vectors, etc */
16 dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex;
17 const dReal *ell;
18 int lskip2,lskip3,i,j;
19 /* special handling for L and B because we're solving L1 *transpose* */
20 L = L + (n-1)*(lskip1+1);
21 B = B + n-1;
22 lskip1 = -lskip1;
23 /* compute lskip values */
24 lskip2 = 2*lskip1;
25 lskip3 = 3*lskip1;
26 /* compute all 4 x 1 blocks of X */
27 for (i=0; i <= n-4; i+=4) {
28 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
29 /* set the Z matrix to 0 */
30 Z11=0;
31 Z21=0;
32 Z31=0;
33 Z41=0;
34 ell = L - i;
35 ex = B;
36 /* the inner loop that computes outer products and adds them to Z */
37 for (j=i-4; j >= 0; j -= 4) {
38 /* load p and q values */
39 p1=ell[0];
40 q1=ex[0];
41 p2=ell[-1];
42 p3=ell[-2];
43 p4=ell[-3];
44 /* compute outer product and add it to the Z matrix */
45 m11 = p1 * q1;
46 m21 = p2 * q1;
47 m31 = p3 * q1;
48 m41 = p4 * q1;
49 ell += lskip1;
50 Z11 += m11;
51 Z21 += m21;
52 Z31 += m31;
53 Z41 += m41;
54 /* load p and q values */
55 p1=ell[0];
56 q1=ex[-1];
57 p2=ell[-1];
58 p3=ell[-2];
59 p4=ell[-3];
60 /* compute outer product and add it to the Z matrix */
61 m11 = p1 * q1;
62 m21 = p2 * q1;
63 m31 = p3 * q1;
64 m41 = p4 * q1;
65 ell += lskip1;
66 Z11 += m11;
67 Z21 += m21;
68 Z31 += m31;
69 Z41 += m41;
70 /* load p and q values */
71 p1=ell[0];
72 q1=ex[-2];
73 p2=ell[-1];
74 p3=ell[-2];
75 p4=ell[-3];
76 /* compute outer product and add it to the Z matrix */
77 m11 = p1 * q1;
78 m21 = p2 * q1;
79 m31 = p3 * q1;
80 m41 = p4 * q1;
81 ell += lskip1;
82 Z11 += m11;
83 Z21 += m21;
84 Z31 += m31;
85 Z41 += m41;
86 /* load p and q values */
87 p1=ell[0];
88 q1=ex[-3];
89 p2=ell[-1];
90 p3=ell[-2];
91 p4=ell[-3];
92 /* compute outer product and add it to the Z matrix */
93 m11 = p1 * q1;
94 m21 = p2 * q1;
95 m31 = p3 * q1;
96 m41 = p4 * q1;
97 ell += lskip1;
98 ex -= 4;
99 Z11 += m11;
100 Z21 += m21;
101 Z31 += m31;
102 Z41 += m41;
103 /* end of inner loop */
104 }
105 /* compute left-over iterations */
106 j += 4;
107 for (; j > 0; j--) {
108 /* load p and q values */
109 p1=ell[0];
110 q1=ex[0];
111 p2=ell[-1];
112 p3=ell[-2];
113 p4=ell[-3];
114 /* compute outer product and add it to the Z matrix */
115 m11 = p1 * q1;
116 m21 = p2 * q1;
117 m31 = p3 * q1;
118 m41 = p4 * q1;
119 ell += lskip1;
120 ex -= 1;
121 Z11 += m11;
122 Z21 += m21;
123 Z31 += m31;
124 Z41 += m41;
125 }
126 /* finish computing the X(i) block */
127 Z11 = ex[0] - Z11;
128 ex[0] = Z11;
129 p1 = ell[-1];
130 Z21 = ex[-1] - Z21 - p1*Z11;
131 ex[-1] = Z21;
132 p1 = ell[-2];
133 p2 = ell[-2+lskip1];
134 Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21;
135 ex[-2] = Z31;
136 p1 = ell[-3];
137 p2 = ell[-3+lskip1];
138 p3 = ell[-3+lskip2];
139 Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
140 ex[-3] = Z41;
141 /* end of outer loop */
142 }
143 /* compute rows at end that are not a multiple of block size */
144 for (; i < n; i++) {
145 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
146 /* set the Z matrix to 0 */
147 Z11=0;
148 ell = L - i;
149 ex = B;
150 /* the inner loop that computes outer products and adds them to Z */
151 for (j=i-4; j >= 0; j -= 4) {
152 /* load p and q values */
153 p1=ell[0];
154 q1=ex[0];
155 /* compute outer product and add it to the Z matrix */
156 m11 = p1 * q1;
157 ell += lskip1;
158 Z11 += m11;
159 /* load p and q values */
160 p1=ell[0];
161 q1=ex[-1];
162 /* compute outer product and add it to the Z matrix */
163 m11 = p1 * q1;
164 ell += lskip1;
165 Z11 += m11;
166 /* load p and q values */
167 p1=ell[0];
168 q1=ex[-2];
169 /* compute outer product and add it to the Z matrix */
170 m11 = p1 * q1;
171 ell += lskip1;
172 Z11 += m11;
173 /* load p and q values */
174 p1=ell[0];
175 q1=ex[-3];
176 /* compute outer product and add it to the Z matrix */
177 m11 = p1 * q1;
178 ell += lskip1;
179 ex -= 4;
180 Z11 += m11;
181 /* end of inner loop */
182 }
183 /* compute left-over iterations */
184 j += 4;
185 for (; j > 0; j--) {
186 /* load p and q values */
187 p1=ell[0];
188 q1=ex[0];
189 /* compute outer product and add it to the Z matrix */
190 m11 = p1 * q1;
191 ell += lskip1;
192 ex -= 1;
193 Z11 += m11;
194 }
195 /* finish computing the X(i) block */
196 Z11 = ex[0] - Z11;
197 ex[0] = Z11;
198 }
199}