aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/ode-0.9/ode/src/fastlsolve.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libraries/ode-0.9/ode/src/fastlsolve.c298
1 files changed, 298 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/fastlsolve.c b/libraries/ode-0.9/ode/src/fastlsolve.c
new file mode 100644
index 0000000..0ae99d6
--- /dev/null
+++ b/libraries/ode-0.9/ode/src/fastlsolve.c
@@ -0,0 +1,298 @@
1/* generated code, do not edit. */
2
3#include "ode/matrix.h"
4
5/* solve L*X=B, with B containing 1 right hand sides.
6 * L is an n*n lower triangular matrix with ones on the diagonal.
7 * L is stored by rows and its leading dimension is lskip.
8 * B is an n*1 matrix that contains the right hand sides.
9 * B is stored by columns and its leading dimension is also lskip.
10 * B is overwritten with X.
11 * this processes blocks of 4*4.
12 * if this is in the factorizer source file, n must be a multiple of 4.
13 */
14
15void dSolveL1 (const dReal *L, dReal *B, int n, int lskip1)
16{
17 /* declare variables - Z matrix, p and q vectors, etc */
18 dReal Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex;
19 const dReal *ell;
20 int lskip2,lskip3,i,j;
21 /* compute lskip values */
22 lskip2 = 2*lskip1;
23 lskip3 = 3*lskip1;
24 /* compute all 4 x 1 blocks of X */
25 for (i=0; i <= n-4; i+=4) {
26 /* compute all 4 x 1 block of X, from rows i..i+4-1 */
27 /* set the Z matrix to 0 */
28 Z11=0;
29 Z21=0;
30 Z31=0;
31 Z41=0;
32 ell = L + i*lskip1;
33 ex = B;
34 /* the inner loop that computes outer products and adds them to Z */
35 for (j=i-12; j >= 0; j -= 12) {
36 /* load p and q values */
37 p1=ell[0];
38 q1=ex[0];
39 p2=ell[lskip1];
40 p3=ell[lskip2];
41 p4=ell[lskip3];
42 /* compute outer product and add it to the Z matrix */
43 Z11 += p1 * q1;
44 Z21 += p2 * q1;
45 Z31 += p3 * q1;
46 Z41 += p4 * q1;
47 /* load p and q values */
48 p1=ell[1];
49 q1=ex[1];
50 p2=ell[1+lskip1];
51 p3=ell[1+lskip2];
52 p4=ell[1+lskip3];
53 /* compute outer product and add it to the Z matrix */
54 Z11 += p1 * q1;
55 Z21 += p2 * q1;
56 Z31 += p3 * q1;
57 Z41 += p4 * q1;
58 /* load p and q values */
59 p1=ell[2];
60 q1=ex[2];
61 p2=ell[2+lskip1];
62 p3=ell[2+lskip2];
63 p4=ell[2+lskip3];
64 /* compute outer product and add it to the Z matrix */
65 Z11 += p1 * q1;
66 Z21 += p2 * q1;
67 Z31 += p3 * q1;
68 Z41 += p4 * q1;
69 /* load p and q values */
70 p1=ell[3];
71 q1=ex[3];
72 p2=ell[3+lskip1];
73 p3=ell[3+lskip2];
74 p4=ell[3+lskip3];
75 /* compute outer product and add it to the Z matrix */
76 Z11 += p1 * q1;
77 Z21 += p2 * q1;
78 Z31 += p3 * q1;
79 Z41 += p4 * q1;
80 /* load p and q values */
81 p1=ell[4];
82 q1=ex[4];
83 p2=ell[4+lskip1];
84 p3=ell[4+lskip2];
85 p4=ell[4+lskip3];
86 /* compute outer product and add it to the Z matrix */
87 Z11 += p1 * q1;
88 Z21 += p2 * q1;
89 Z31 += p3 * q1;
90 Z41 += p4 * q1;
91 /* load p and q values */
92 p1=ell[5];
93 q1=ex[5];
94 p2=ell[5+lskip1];
95 p3=ell[5+lskip2];
96 p4=ell[5+lskip3];
97 /* compute outer product and add it to the Z matrix */
98 Z11 += p1 * q1;
99 Z21 += p2 * q1;
100 Z31 += p3 * q1;
101 Z41 += p4 * q1;
102 /* load p and q values */
103 p1=ell[6];
104 q1=ex[6];
105 p2=ell[6+lskip1];
106 p3=ell[6+lskip2];
107 p4=ell[6+lskip3];
108 /* compute outer product and add it to the Z matrix */
109 Z11 += p1 * q1;
110 Z21 += p2 * q1;
111 Z31 += p3 * q1;
112 Z41 += p4 * q1;
113 /* load p and q values */
114 p1=ell[7];
115 q1=ex[7];
116 p2=ell[7+lskip1];
117 p3=ell[7+lskip2];
118 p4=ell[7+lskip3];
119 /* compute outer product and add it to the Z matrix */
120 Z11 += p1 * q1;
121 Z21 += p2 * q1;
122 Z31 += p3 * q1;
123 Z41 += p4 * q1;
124 /* load p and q values */
125 p1=ell[8];
126 q1=ex[8];
127 p2=ell[8+lskip1];
128 p3=ell[8+lskip2];
129 p4=ell[8+lskip3];
130 /* compute outer product and add it to the Z matrix */
131 Z11 += p1 * q1;
132 Z21 += p2 * q1;
133 Z31 += p3 * q1;
134 Z41 += p4 * q1;
135 /* load p and q values */
136 p1=ell[9];
137 q1=ex[9];
138 p2=ell[9+lskip1];
139 p3=ell[9+lskip2];
140 p4=ell[9+lskip3];
141 /* compute outer product and add it to the Z matrix */
142 Z11 += p1 * q1;
143 Z21 += p2 * q1;
144 Z31 += p3 * q1;
145 Z41 += p4 * q1;
146 /* load p and q values */
147 p1=ell[10];
148 q1=ex[10];
149 p2=ell[10+lskip1];
150 p3=ell[10+lskip2];
151 p4=ell[10+lskip3];
152 /* compute outer product and add it to the Z matrix */
153 Z11 += p1 * q1;
154 Z21 += p2 * q1;
155 Z31 += p3 * q1;
156 Z41 += p4 * q1;
157 /* load p and q values */
158 p1=ell[11];
159 q1=ex[11];
160 p2=ell[11+lskip1];
161 p3=ell[11+lskip2];
162 p4=ell[11+lskip3];
163 /* compute outer product and add it to the Z matrix */
164 Z11 += p1 * q1;
165 Z21 += p2 * q1;
166 Z31 += p3 * q1;
167 Z41 += p4 * q1;
168 /* advance pointers */
169 ell += 12;
170 ex += 12;
171 /* end of inner loop */
172 }
173 /* compute left-over iterations */
174 j += 12;
175 for (; j > 0; j--) {
176 /* load p and q values */
177 p1=ell[0];
178 q1=ex[0];
179 p2=ell[lskip1];
180 p3=ell[lskip2];
181 p4=ell[lskip3];
182 /* compute outer product and add it to the Z matrix */
183 Z11 += p1 * q1;
184 Z21 += p2 * q1;
185 Z31 += p3 * q1;
186 Z41 += p4 * q1;
187 /* advance pointers */
188 ell += 1;
189 ex += 1;
190 }
191 /* finish computing the X(i) block */
192 Z11 = ex[0] - Z11;
193 ex[0] = Z11;
194 p1 = ell[lskip1];
195 Z21 = ex[1] - Z21 - p1*Z11;
196 ex[1] = Z21;
197 p1 = ell[lskip2];
198 p2 = ell[1+lskip2];
199 Z31 = ex[2] - Z31 - p1*Z11 - p2*Z21;
200 ex[2] = Z31;
201 p1 = ell[lskip3];
202 p2 = ell[1+lskip3];
203 p3 = ell[2+lskip3];
204 Z41 = ex[3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31;
205 ex[3] = Z41;
206 /* end of outer loop */
207 }
208 /* compute rows at end that are not a multiple of block size */
209 for (; i < n; i++) {
210 /* compute all 1 x 1 block of X, from rows i..i+1-1 */
211 /* set the Z matrix to 0 */
212 Z11=0;
213 ell = L + i*lskip1;
214 ex = B;
215 /* the inner loop that computes outer products and adds them to Z */
216 for (j=i-12; j >= 0; j -= 12) {
217 /* load p and q values */
218 p1=ell[0];
219 q1=ex[0];
220 /* compute outer product and add it to the Z matrix */
221 Z11 += p1 * q1;
222 /* load p and q values */
223 p1=ell[1];
224 q1=ex[1];
225 /* compute outer product and add it to the Z matrix */
226 Z11 += p1 * q1;
227 /* load p and q values */
228 p1=ell[2];
229 q1=ex[2];
230 /* compute outer product and add it to the Z matrix */
231 Z11 += p1 * q1;
232 /* load p and q values */
233 p1=ell[3];
234 q1=ex[3];
235 /* compute outer product and add it to the Z matrix */
236 Z11 += p1 * q1;
237 /* load p and q values */
238 p1=ell[4];
239 q1=ex[4];
240 /* compute outer product and add it to the Z matrix */
241 Z11 += p1 * q1;
242 /* load p and q values */
243 p1=ell[5];
244 q1=ex[5];
245 /* compute outer product and add it to the Z matrix */
246 Z11 += p1 * q1;
247 /* load p and q values */
248 p1=ell[6];
249 q1=ex[6];
250 /* compute outer product and add it to the Z matrix */
251 Z11 += p1 * q1;
252 /* load p and q values */
253 p1=ell[7];
254 q1=ex[7];
255 /* compute outer product and add it to the Z matrix */
256 Z11 += p1 * q1;
257 /* load p and q values */
258 p1=ell[8];
259 q1=ex[8];
260 /* compute outer product and add it to the Z matrix */
261 Z11 += p1 * q1;
262 /* load p and q values */
263 p1=ell[9];
264 q1=ex[9];
265 /* compute outer product and add it to the Z matrix */
266 Z11 += p1 * q1;
267 /* load p and q values */
268 p1=ell[10];
269 q1=ex[10];
270 /* compute outer product and add it to the Z matrix */
271 Z11 += p1 * q1;
272 /* load p and q values */
273 p1=ell[11];
274 q1=ex[11];
275 /* compute outer product and add it to the Z matrix */
276 Z11 += p1 * q1;
277 /* advance pointers */
278 ell += 12;
279 ex += 12;
280 /* end of inner loop */
281 }
282 /* compute left-over iterations */
283 j += 12;
284 for (; j > 0; j--) {
285 /* load p and q values */
286 p1=ell[0];
287 q1=ex[0];
288 /* compute outer product and add it to the Z matrix */
289 Z11 += p1 * q1;
290 /* advance pointers */
291 ell += 1;
292 ex += 1;
293 }
294 /* finish computing the X(i) block */
295 Z11 = ex[0] - Z11;
296 ex[0] = Z11;
297 }
298}