diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/ode-0.9/ode/src/fastldlt.c | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/fastldlt.c b/libraries/ode-0.9/ode/src/fastldlt.c new file mode 100644 index 0000000..df2ea6e --- /dev/null +++ b/libraries/ode-0.9/ode/src/fastldlt.c | |||
@@ -0,0 +1,381 @@ | |||
1 | /* generated code, do not edit. */ | ||
2 | |||
3 | #include "ode/matrix.h" | ||
4 | |||
5 | /* solve L*X=B, with B containing 1 right hand sides. | ||
6 | * L is an n*n lower triangular matrix with ones on the diagonal. | ||
7 | * L is stored by rows and its leading dimension is lskip. | ||
8 | * B is an n*1 matrix that contains the right hand sides. | ||
9 | * B is stored by columns and its leading dimension is also lskip. | ||
10 | * B is overwritten with X. | ||
11 | * this processes blocks of 2*2. | ||
12 | * if this is in the factorizer source file, n must be a multiple of 2. | ||
13 | */ | ||
14 | |||
15 | static void dSolveL1_1 (const dReal *L, dReal *B, int n, int lskip1) | ||
16 | { | ||
17 | /* declare variables - Z matrix, p and q vectors, etc */ | ||
18 | dReal Z11,m11,Z21,m21,p1,q1,p2,*ex; | ||
19 | const dReal *ell; | ||
20 | int i,j; | ||
21 | /* compute all 2 x 1 blocks of X */ | ||
22 | for (i=0; i < n; i+=2) { | ||
23 | /* compute all 2 x 1 block of X, from rows i..i+2-1 */ | ||
24 | /* set the Z matrix to 0 */ | ||
25 | Z11=0; | ||
26 | Z21=0; | ||
27 | ell = L + i*lskip1; | ||
28 | ex = B; | ||
29 | /* the inner loop that computes outer products and adds them to Z */ | ||
30 | for (j=i-2; j >= 0; j -= 2) { | ||
31 | /* compute outer product and add it to the Z matrix */ | ||
32 | p1=ell[0]; | ||
33 | q1=ex[0]; | ||
34 | m11 = p1 * q1; | ||
35 | p2=ell[lskip1]; | ||
36 | m21 = p2 * q1; | ||
37 | Z11 += m11; | ||
38 | Z21 += m21; | ||
39 | /* compute outer product and add it to the Z matrix */ | ||
40 | p1=ell[1]; | ||
41 | q1=ex[1]; | ||
42 | m11 = p1 * q1; | ||
43 | p2=ell[1+lskip1]; | ||
44 | m21 = p2 * q1; | ||
45 | /* advance pointers */ | ||
46 | ell += 2; | ||
47 | ex += 2; | ||
48 | Z11 += m11; | ||
49 | Z21 += m21; | ||
50 | /* end of inner loop */ | ||
51 | } | ||
52 | /* compute left-over iterations */ | ||
53 | j += 2; | ||
54 | for (; j > 0; j--) { | ||
55 | /* compute outer product and add it to the Z matrix */ | ||
56 | p1=ell[0]; | ||
57 | q1=ex[0]; | ||
58 | m11 = p1 * q1; | ||
59 | p2=ell[lskip1]; | ||
60 | m21 = p2 * q1; | ||
61 | /* advance pointers */ | ||
62 | ell += 1; | ||
63 | ex += 1; | ||
64 | Z11 += m11; | ||
65 | Z21 += m21; | ||
66 | } | ||
67 | /* finish computing the X(i) block */ | ||
68 | Z11 = ex[0] - Z11; | ||
69 | ex[0] = Z11; | ||
70 | p1 = ell[lskip1]; | ||
71 | Z21 = ex[1] - Z21 - p1*Z11; | ||
72 | ex[1] = Z21; | ||
73 | /* end of outer loop */ | ||
74 | } | ||
75 | } | ||
76 | |||
77 | /* solve L*X=B, with B containing 2 right hand sides. | ||
78 | * L is an n*n lower triangular matrix with ones on the diagonal. | ||
79 | * L is stored by rows and its leading dimension is lskip. | ||
80 | * B is an n*2 matrix that contains the right hand sides. | ||
81 | * B is stored by columns and its leading dimension is also lskip. | ||
82 | * B is overwritten with X. | ||
83 | * this processes blocks of 2*2. | ||
84 | * if this is in the factorizer source file, n must be a multiple of 2. | ||
85 | */ | ||
86 | |||
87 | static void dSolveL1_2 (const dReal *L, dReal *B, int n, int lskip1) | ||
88 | { | ||
89 | /* declare variables - Z matrix, p and q vectors, etc */ | ||
90 | dReal Z11,m11,Z12,m12,Z21,m21,Z22,m22,p1,q1,p2,q2,*ex; | ||
91 | const dReal *ell; | ||
92 | int i,j; | ||
93 | /* compute all 2 x 2 blocks of X */ | ||
94 | for (i=0; i < n; i+=2) { | ||
95 | /* compute all 2 x 2 block of X, from rows i..i+2-1 */ | ||
96 | /* set the Z matrix to 0 */ | ||
97 | Z11=0; | ||
98 | Z12=0; | ||
99 | Z21=0; | ||
100 | Z22=0; | ||
101 | ell = L + i*lskip1; | ||
102 | ex = B; | ||
103 | /* the inner loop that computes outer products and adds them to Z */ | ||
104 | for (j=i-2; j >= 0; j -= 2) { | ||
105 | /* compute outer product and add it to the Z matrix */ | ||
106 | p1=ell[0]; | ||
107 | q1=ex[0]; | ||
108 | m11 = p1 * q1; | ||
109 | q2=ex[lskip1]; | ||
110 | m12 = p1 * q2; | ||
111 | p2=ell[lskip1]; | ||
112 | m21 = p2 * q1; | ||
113 | m22 = p2 * q2; | ||
114 | Z11 += m11; | ||
115 | Z12 += m12; | ||
116 | Z21 += m21; | ||
117 | Z22 += m22; | ||
118 | /* compute outer product and add it to the Z matrix */ | ||
119 | p1=ell[1]; | ||
120 | q1=ex[1]; | ||
121 | m11 = p1 * q1; | ||
122 | q2=ex[1+lskip1]; | ||
123 | m12 = p1 * q2; | ||
124 | p2=ell[1+lskip1]; | ||
125 | m21 = p2 * q1; | ||
126 | m22 = p2 * q2; | ||
127 | /* advance pointers */ | ||
128 | ell += 2; | ||
129 | ex += 2; | ||
130 | Z11 += m11; | ||
131 | Z12 += m12; | ||
132 | Z21 += m21; | ||
133 | Z22 += m22; | ||
134 | /* end of inner loop */ | ||
135 | } | ||
136 | /* compute left-over iterations */ | ||
137 | j += 2; | ||
138 | for (; j > 0; j--) { | ||
139 | /* compute outer product and add it to the Z matrix */ | ||
140 | p1=ell[0]; | ||
141 | q1=ex[0]; | ||
142 | m11 = p1 * q1; | ||
143 | q2=ex[lskip1]; | ||
144 | m12 = p1 * q2; | ||
145 | p2=ell[lskip1]; | ||
146 | m21 = p2 * q1; | ||
147 | m22 = p2 * q2; | ||
148 | /* advance pointers */ | ||
149 | ell += 1; | ||
150 | ex += 1; | ||
151 | Z11 += m11; | ||
152 | Z12 += m12; | ||
153 | Z21 += m21; | ||
154 | Z22 += m22; | ||
155 | } | ||
156 | /* finish computing the X(i) block */ | ||
157 | Z11 = ex[0] - Z11; | ||
158 | ex[0] = Z11; | ||
159 | Z12 = ex[lskip1] - Z12; | ||
160 | ex[lskip1] = Z12; | ||
161 | p1 = ell[lskip1]; | ||
162 | Z21 = ex[1] - Z21 - p1*Z11; | ||
163 | ex[1] = Z21; | ||
164 | Z22 = ex[1+lskip1] - Z22 - p1*Z12; | ||
165 | ex[1+lskip1] = Z22; | ||
166 | /* end of outer loop */ | ||
167 | } | ||
168 | } | ||
169 | |||
170 | |||
171 | void dFactorLDLT (dReal *A, dReal *d, int n, int nskip1) | ||
172 | { | ||
173 | int i,j; | ||
174 | dReal sum,*ell,*dee,dd,p1,p2,q1,q2,Z11,m11,Z21,m21,Z22,m22; | ||
175 | if (n < 1) return; | ||
176 | |||
177 | for (i=0; i<=n-2; i += 2) { | ||
178 | /* solve L*(D*l)=a, l is scaled elements in 2 x i block at A(i,0) */ | ||
179 | dSolveL1_2 (A,A+i*nskip1,i,nskip1); | ||
180 | /* scale the elements in a 2 x i block at A(i,0), and also */ | ||
181 | /* compute Z = the outer product matrix that we'll need. */ | ||
182 | Z11 = 0; | ||
183 | Z21 = 0; | ||
184 | Z22 = 0; | ||
185 | ell = A+i*nskip1; | ||
186 | dee = d; | ||
187 | for (j=i-6; j >= 0; j -= 6) { | ||
188 | p1 = ell[0]; | ||
189 | p2 = ell[nskip1]; | ||
190 | dd = dee[0]; | ||
191 | q1 = p1*dd; | ||
192 | q2 = p2*dd; | ||
193 | ell[0] = q1; | ||
194 | ell[nskip1] = q2; | ||
195 | m11 = p1*q1; | ||
196 | m21 = p2*q1; | ||
197 | m22 = p2*q2; | ||
198 | Z11 += m11; | ||
199 | Z21 += m21; | ||
200 | Z22 += m22; | ||
201 | p1 = ell[1]; | ||
202 | p2 = ell[1+nskip1]; | ||
203 | dd = dee[1]; | ||
204 | q1 = p1*dd; | ||
205 | q2 = p2*dd; | ||
206 | ell[1] = q1; | ||
207 | ell[1+nskip1] = q2; | ||
208 | m11 = p1*q1; | ||
209 | m21 = p2*q1; | ||
210 | m22 = p2*q2; | ||
211 | Z11 += m11; | ||
212 | Z21 += m21; | ||
213 | Z22 += m22; | ||
214 | p1 = ell[2]; | ||
215 | p2 = ell[2+nskip1]; | ||
216 | dd = dee[2]; | ||
217 | q1 = p1*dd; | ||
218 | q2 = p2*dd; | ||
219 | ell[2] = q1; | ||
220 | ell[2+nskip1] = q2; | ||
221 | m11 = p1*q1; | ||
222 | m21 = p2*q1; | ||
223 | m22 = p2*q2; | ||
224 | Z11 += m11; | ||
225 | Z21 += m21; | ||
226 | Z22 += m22; | ||
227 | p1 = ell[3]; | ||
228 | p2 = ell[3+nskip1]; | ||
229 | dd = dee[3]; | ||
230 | q1 = p1*dd; | ||
231 | q2 = p2*dd; | ||
232 | ell[3] = q1; | ||
233 | ell[3+nskip1] = q2; | ||
234 | m11 = p1*q1; | ||
235 | m21 = p2*q1; | ||
236 | m22 = p2*q2; | ||
237 | Z11 += m11; | ||
238 | Z21 += m21; | ||
239 | Z22 += m22; | ||
240 | p1 = ell[4]; | ||
241 | p2 = ell[4+nskip1]; | ||
242 | dd = dee[4]; | ||
243 | q1 = p1*dd; | ||
244 | q2 = p2*dd; | ||
245 | ell[4] = q1; | ||
246 | ell[4+nskip1] = q2; | ||
247 | m11 = p1*q1; | ||
248 | m21 = p2*q1; | ||
249 | m22 = p2*q2; | ||
250 | Z11 += m11; | ||
251 | Z21 += m21; | ||
252 | Z22 += m22; | ||
253 | p1 = ell[5]; | ||
254 | p2 = ell[5+nskip1]; | ||
255 | dd = dee[5]; | ||
256 | q1 = p1*dd; | ||
257 | q2 = p2*dd; | ||
258 | ell[5] = q1; | ||
259 | ell[5+nskip1] = q2; | ||
260 | m11 = p1*q1; | ||
261 | m21 = p2*q1; | ||
262 | m22 = p2*q2; | ||
263 | Z11 += m11; | ||
264 | Z21 += m21; | ||
265 | Z22 += m22; | ||
266 | ell += 6; | ||
267 | dee += 6; | ||
268 | } | ||
269 | /* compute left-over iterations */ | ||
270 | j += 6; | ||
271 | for (; j > 0; j--) { | ||
272 | p1 = ell[0]; | ||
273 | p2 = ell[nskip1]; | ||
274 | dd = dee[0]; | ||
275 | q1 = p1*dd; | ||
276 | q2 = p2*dd; | ||
277 | ell[0] = q1; | ||
278 | ell[nskip1] = q2; | ||
279 | m11 = p1*q1; | ||
280 | m21 = p2*q1; | ||
281 | m22 = p2*q2; | ||
282 | Z11 += m11; | ||
283 | Z21 += m21; | ||
284 | Z22 += m22; | ||
285 | ell++; | ||
286 | dee++; | ||
287 | } | ||
288 | /* solve for diagonal 2 x 2 block at A(i,i) */ | ||
289 | Z11 = ell[0] - Z11; | ||
290 | Z21 = ell[nskip1] - Z21; | ||
291 | Z22 = ell[1+nskip1] - Z22; | ||
292 | dee = d + i; | ||
293 | /* factorize 2 x 2 block Z,dee */ | ||
294 | /* factorize row 1 */ | ||
295 | dee[0] = dRecip(Z11); | ||
296 | /* factorize row 2 */ | ||
297 | sum = 0; | ||
298 | q1 = Z21; | ||
299 | q2 = q1 * dee[0]; | ||
300 | Z21 = q2; | ||
301 | sum += q1*q2; | ||
302 | dee[1] = dRecip(Z22 - sum); | ||
303 | /* done factorizing 2 x 2 block */ | ||
304 | ell[nskip1] = Z21; | ||
305 | } | ||
306 | /* compute the (less than 2) rows at the bottom */ | ||
307 | switch (n-i) { | ||
308 | case 0: | ||
309 | break; | ||
310 | |||
311 | case 1: | ||
312 | dSolveL1_1 (A,A+i*nskip1,i,nskip1); | ||
313 | /* scale the elements in a 1 x i block at A(i,0), and also */ | ||
314 | /* compute Z = the outer product matrix that we'll need. */ | ||
315 | Z11 = 0; | ||
316 | ell = A+i*nskip1; | ||
317 | dee = d; | ||
318 | for (j=i-6; j >= 0; j -= 6) { | ||
319 | p1 = ell[0]; | ||
320 | dd = dee[0]; | ||
321 | q1 = p1*dd; | ||
322 | ell[0] = q1; | ||
323 | m11 = p1*q1; | ||
324 | Z11 += m11; | ||
325 | p1 = ell[1]; | ||
326 | dd = dee[1]; | ||
327 | q1 = p1*dd; | ||
328 | ell[1] = q1; | ||
329 | m11 = p1*q1; | ||
330 | Z11 += m11; | ||
331 | p1 = ell[2]; | ||
332 | dd = dee[2]; | ||
333 | q1 = p1*dd; | ||
334 | ell[2] = q1; | ||
335 | m11 = p1*q1; | ||
336 | Z11 += m11; | ||
337 | p1 = ell[3]; | ||
338 | dd = dee[3]; | ||
339 | q1 = p1*dd; | ||
340 | ell[3] = q1; | ||
341 | m11 = p1*q1; | ||
342 | Z11 += m11; | ||
343 | p1 = ell[4]; | ||
344 | dd = dee[4]; | ||
345 | q1 = p1*dd; | ||
346 | ell[4] = q1; | ||
347 | m11 = p1*q1; | ||
348 | Z11 += m11; | ||
349 | p1 = ell[5]; | ||
350 | dd = dee[5]; | ||
351 | q1 = p1*dd; | ||
352 | ell[5] = q1; | ||
353 | m11 = p1*q1; | ||
354 | Z11 += m11; | ||
355 | ell += 6; | ||
356 | dee += 6; | ||
357 | } | ||
358 | /* compute left-over iterations */ | ||
359 | j += 6; | ||
360 | for (; j > 0; j--) { | ||
361 | p1 = ell[0]; | ||
362 | dd = dee[0]; | ||
363 | q1 = p1*dd; | ||
364 | ell[0] = q1; | ||
365 | m11 = p1*q1; | ||
366 | Z11 += m11; | ||
367 | ell++; | ||
368 | dee++; | ||
369 | } | ||
370 | /* solve for diagonal 1 x 1 block at A(i,i) */ | ||
371 | Z11 = ell[0] - Z11; | ||
372 | dee = d + i; | ||
373 | /* factorize 1 x 1 block Z,dee */ | ||
374 | /* factorize row 1 */ | ||
375 | dee[0] = dRecip(Z11); | ||
376 | /* done factorizing 1 x 1 block */ | ||
377 | break; | ||
378 | |||
379 | default: *((char*)0)=0; /* this should never happen! */ | ||
380 | } | ||
381 | } | ||