diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/ode-0.9/ode/src/fastlsolve.c | 298 |
1 files changed, 298 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/fastlsolve.c b/libraries/ode-0.9/ode/src/fastlsolve.c new file mode 100644 index 0000000..0ae99d6 --- /dev/null +++ b/libraries/ode-0.9/ode/src/fastlsolve.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* generated code, do not edit. */ | ||
2 | |||
3 | #include "ode/matrix.h" | ||
4 | |||
5 | /* solve L*X=B, with B containing 1 right hand sides. | ||
6 | * L is an n*n lower triangular matrix with ones on the diagonal. | ||
7 | * L is stored by rows and its leading dimension is lskip. | ||
8 | * B is an n*1 matrix that contains the right hand sides. | ||
9 | * B is stored by columns and its leading dimension is also lskip. | ||
10 | * B is overwritten with X. | ||
11 | * this processes blocks of 4*4. | ||
12 | * if this is in the factorizer source file, n must be a multiple of 4. | ||
13 | */ | ||
14 | |||
15 | void dSolveL1 (const dReal *L, dReal *B, int n, int lskip1) | ||
16 | { | ||
17 | /* declare variables - Z matrix, p and q vectors, etc */ | ||
18 | dReal Z11,Z21,Z31,Z41,p1,q1,p2,p3,p4,*ex; | ||
19 | const dReal *ell; | ||
20 | int lskip2,lskip3,i,j; | ||
21 | /* compute lskip values */ | ||
22 | lskip2 = 2*lskip1; | ||
23 | lskip3 = 3*lskip1; | ||
24 | /* compute all 4 x 1 blocks of X */ | ||
25 | for (i=0; i <= n-4; i+=4) { | ||
26 | /* compute all 4 x 1 block of X, from rows i..i+4-1 */ | ||
27 | /* set the Z matrix to 0 */ | ||
28 | Z11=0; | ||
29 | Z21=0; | ||
30 | Z31=0; | ||
31 | Z41=0; | ||
32 | ell = L + i*lskip1; | ||
33 | ex = B; | ||
34 | /* the inner loop that computes outer products and adds them to Z */ | ||
35 | for (j=i-12; j >= 0; j -= 12) { | ||
36 | /* load p and q values */ | ||
37 | p1=ell[0]; | ||
38 | q1=ex[0]; | ||
39 | p2=ell[lskip1]; | ||
40 | p3=ell[lskip2]; | ||
41 | p4=ell[lskip3]; | ||
42 | /* compute outer product and add it to the Z matrix */ | ||
43 | Z11 += p1 * q1; | ||
44 | Z21 += p2 * q1; | ||
45 | Z31 += p3 * q1; | ||
46 | Z41 += p4 * q1; | ||
47 | /* load p and q values */ | ||
48 | p1=ell[1]; | ||
49 | q1=ex[1]; | ||
50 | p2=ell[1+lskip1]; | ||
51 | p3=ell[1+lskip2]; | ||
52 | p4=ell[1+lskip3]; | ||
53 | /* compute outer product and add it to the Z matrix */ | ||
54 | Z11 += p1 * q1; | ||
55 | Z21 += p2 * q1; | ||
56 | Z31 += p3 * q1; | ||
57 | Z41 += p4 * q1; | ||
58 | /* load p and q values */ | ||
59 | p1=ell[2]; | ||
60 | q1=ex[2]; | ||
61 | p2=ell[2+lskip1]; | ||
62 | p3=ell[2+lskip2]; | ||
63 | p4=ell[2+lskip3]; | ||
64 | /* compute outer product and add it to the Z matrix */ | ||
65 | Z11 += p1 * q1; | ||
66 | Z21 += p2 * q1; | ||
67 | Z31 += p3 * q1; | ||
68 | Z41 += p4 * q1; | ||
69 | /* load p and q values */ | ||
70 | p1=ell[3]; | ||
71 | q1=ex[3]; | ||
72 | p2=ell[3+lskip1]; | ||
73 | p3=ell[3+lskip2]; | ||
74 | p4=ell[3+lskip3]; | ||
75 | /* compute outer product and add it to the Z matrix */ | ||
76 | Z11 += p1 * q1; | ||
77 | Z21 += p2 * q1; | ||
78 | Z31 += p3 * q1; | ||
79 | Z41 += p4 * q1; | ||
80 | /* load p and q values */ | ||
81 | p1=ell[4]; | ||
82 | q1=ex[4]; | ||
83 | p2=ell[4+lskip1]; | ||
84 | p3=ell[4+lskip2]; | ||
85 | p4=ell[4+lskip3]; | ||
86 | /* compute outer product and add it to the Z matrix */ | ||
87 | Z11 += p1 * q1; | ||
88 | Z21 += p2 * q1; | ||
89 | Z31 += p3 * q1; | ||
90 | Z41 += p4 * q1; | ||
91 | /* load p and q values */ | ||
92 | p1=ell[5]; | ||
93 | q1=ex[5]; | ||
94 | p2=ell[5+lskip1]; | ||
95 | p3=ell[5+lskip2]; | ||
96 | p4=ell[5+lskip3]; | ||
97 | /* compute outer product and add it to the Z matrix */ | ||
98 | Z11 += p1 * q1; | ||
99 | Z21 += p2 * q1; | ||
100 | Z31 += p3 * q1; | ||
101 | Z41 += p4 * q1; | ||
102 | /* load p and q values */ | ||
103 | p1=ell[6]; | ||
104 | q1=ex[6]; | ||
105 | p2=ell[6+lskip1]; | ||
106 | p3=ell[6+lskip2]; | ||
107 | p4=ell[6+lskip3]; | ||
108 | /* compute outer product and add it to the Z matrix */ | ||
109 | Z11 += p1 * q1; | ||
110 | Z21 += p2 * q1; | ||
111 | Z31 += p3 * q1; | ||
112 | Z41 += p4 * q1; | ||
113 | /* load p and q values */ | ||
114 | p1=ell[7]; | ||
115 | q1=ex[7]; | ||
116 | p2=ell[7+lskip1]; | ||
117 | p3=ell[7+lskip2]; | ||
118 | p4=ell[7+lskip3]; | ||
119 | /* compute outer product and add it to the Z matrix */ | ||
120 | Z11 += p1 * q1; | ||
121 | Z21 += p2 * q1; | ||
122 | Z31 += p3 * q1; | ||
123 | Z41 += p4 * q1; | ||
124 | /* load p and q values */ | ||
125 | p1=ell[8]; | ||
126 | q1=ex[8]; | ||
127 | p2=ell[8+lskip1]; | ||
128 | p3=ell[8+lskip2]; | ||
129 | p4=ell[8+lskip3]; | ||
130 | /* compute outer product and add it to the Z matrix */ | ||
131 | Z11 += p1 * q1; | ||
132 | Z21 += p2 * q1; | ||
133 | Z31 += p3 * q1; | ||
134 | Z41 += p4 * q1; | ||
135 | /* load p and q values */ | ||
136 | p1=ell[9]; | ||
137 | q1=ex[9]; | ||
138 | p2=ell[9+lskip1]; | ||
139 | p3=ell[9+lskip2]; | ||
140 | p4=ell[9+lskip3]; | ||
141 | /* compute outer product and add it to the Z matrix */ | ||
142 | Z11 += p1 * q1; | ||
143 | Z21 += p2 * q1; | ||
144 | Z31 += p3 * q1; | ||
145 | Z41 += p4 * q1; | ||
146 | /* load p and q values */ | ||
147 | p1=ell[10]; | ||
148 | q1=ex[10]; | ||
149 | p2=ell[10+lskip1]; | ||
150 | p3=ell[10+lskip2]; | ||
151 | p4=ell[10+lskip3]; | ||
152 | /* compute outer product and add it to the Z matrix */ | ||
153 | Z11 += p1 * q1; | ||
154 | Z21 += p2 * q1; | ||
155 | Z31 += p3 * q1; | ||
156 | Z41 += p4 * q1; | ||
157 | /* load p and q values */ | ||
158 | p1=ell[11]; | ||
159 | q1=ex[11]; | ||
160 | p2=ell[11+lskip1]; | ||
161 | p3=ell[11+lskip2]; | ||
162 | p4=ell[11+lskip3]; | ||
163 | /* compute outer product and add it to the Z matrix */ | ||
164 | Z11 += p1 * q1; | ||
165 | Z21 += p2 * q1; | ||
166 | Z31 += p3 * q1; | ||
167 | Z41 += p4 * q1; | ||
168 | /* advance pointers */ | ||
169 | ell += 12; | ||
170 | ex += 12; | ||
171 | /* end of inner loop */ | ||
172 | } | ||
173 | /* compute left-over iterations */ | ||
174 | j += 12; | ||
175 | for (; j > 0; j--) { | ||
176 | /* load p and q values */ | ||
177 | p1=ell[0]; | ||
178 | q1=ex[0]; | ||
179 | p2=ell[lskip1]; | ||
180 | p3=ell[lskip2]; | ||
181 | p4=ell[lskip3]; | ||
182 | /* compute outer product and add it to the Z matrix */ | ||
183 | Z11 += p1 * q1; | ||
184 | Z21 += p2 * q1; | ||
185 | Z31 += p3 * q1; | ||
186 | Z41 += p4 * q1; | ||
187 | /* advance pointers */ | ||
188 | ell += 1; | ||
189 | ex += 1; | ||
190 | } | ||
191 | /* finish computing the X(i) block */ | ||
192 | Z11 = ex[0] - Z11; | ||
193 | ex[0] = Z11; | ||
194 | p1 = ell[lskip1]; | ||
195 | Z21 = ex[1] - Z21 - p1*Z11; | ||
196 | ex[1] = Z21; | ||
197 | p1 = ell[lskip2]; | ||
198 | p2 = ell[1+lskip2]; | ||
199 | Z31 = ex[2] - Z31 - p1*Z11 - p2*Z21; | ||
200 | ex[2] = Z31; | ||
201 | p1 = ell[lskip3]; | ||
202 | p2 = ell[1+lskip3]; | ||
203 | p3 = ell[2+lskip3]; | ||
204 | Z41 = ex[3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; | ||
205 | ex[3] = Z41; | ||
206 | /* end of outer loop */ | ||
207 | } | ||
208 | /* compute rows at end that are not a multiple of block size */ | ||
209 | for (; i < n; i++) { | ||
210 | /* compute all 1 x 1 block of X, from rows i..i+1-1 */ | ||
211 | /* set the Z matrix to 0 */ | ||
212 | Z11=0; | ||
213 | ell = L + i*lskip1; | ||
214 | ex = B; | ||
215 | /* the inner loop that computes outer products and adds them to Z */ | ||
216 | for (j=i-12; j >= 0; j -= 12) { | ||
217 | /* load p and q values */ | ||
218 | p1=ell[0]; | ||
219 | q1=ex[0]; | ||
220 | /* compute outer product and add it to the Z matrix */ | ||
221 | Z11 += p1 * q1; | ||
222 | /* load p and q values */ | ||
223 | p1=ell[1]; | ||
224 | q1=ex[1]; | ||
225 | /* compute outer product and add it to the Z matrix */ | ||
226 | Z11 += p1 * q1; | ||
227 | /* load p and q values */ | ||
228 | p1=ell[2]; | ||
229 | q1=ex[2]; | ||
230 | /* compute outer product and add it to the Z matrix */ | ||
231 | Z11 += p1 * q1; | ||
232 | /* load p and q values */ | ||
233 | p1=ell[3]; | ||
234 | q1=ex[3]; | ||
235 | /* compute outer product and add it to the Z matrix */ | ||
236 | Z11 += p1 * q1; | ||
237 | /* load p and q values */ | ||
238 | p1=ell[4]; | ||
239 | q1=ex[4]; | ||
240 | /* compute outer product and add it to the Z matrix */ | ||
241 | Z11 += p1 * q1; | ||
242 | /* load p and q values */ | ||
243 | p1=ell[5]; | ||
244 | q1=ex[5]; | ||
245 | /* compute outer product and add it to the Z matrix */ | ||
246 | Z11 += p1 * q1; | ||
247 | /* load p and q values */ | ||
248 | p1=ell[6]; | ||
249 | q1=ex[6]; | ||
250 | /* compute outer product and add it to the Z matrix */ | ||
251 | Z11 += p1 * q1; | ||
252 | /* load p and q values */ | ||
253 | p1=ell[7]; | ||
254 | q1=ex[7]; | ||
255 | /* compute outer product and add it to the Z matrix */ | ||
256 | Z11 += p1 * q1; | ||
257 | /* load p and q values */ | ||
258 | p1=ell[8]; | ||
259 | q1=ex[8]; | ||
260 | /* compute outer product and add it to the Z matrix */ | ||
261 | Z11 += p1 * q1; | ||
262 | /* load p and q values */ | ||
263 | p1=ell[9]; | ||
264 | q1=ex[9]; | ||
265 | /* compute outer product and add it to the Z matrix */ | ||
266 | Z11 += p1 * q1; | ||
267 | /* load p and q values */ | ||
268 | p1=ell[10]; | ||
269 | q1=ex[10]; | ||
270 | /* compute outer product and add it to the Z matrix */ | ||
271 | Z11 += p1 * q1; | ||
272 | /* load p and q values */ | ||
273 | p1=ell[11]; | ||
274 | q1=ex[11]; | ||
275 | /* compute outer product and add it to the Z matrix */ | ||
276 | Z11 += p1 * q1; | ||
277 | /* advance pointers */ | ||
278 | ell += 12; | ||
279 | ex += 12; | ||
280 | /* end of inner loop */ | ||
281 | } | ||
282 | /* compute left-over iterations */ | ||
283 | j += 12; | ||
284 | for (; j > 0; j--) { | ||
285 | /* load p and q values */ | ||
286 | p1=ell[0]; | ||
287 | q1=ex[0]; | ||
288 | /* compute outer product and add it to the Z matrix */ | ||
289 | Z11 += p1 * q1; | ||
290 | /* advance pointers */ | ||
291 | ell += 1; | ||
292 | ex += 1; | ||
293 | } | ||
294 | /* finish computing the X(i) block */ | ||
295 | Z11 = ex[0] - Z11; | ||
296 | ex[0] = Z11; | ||
297 | } | ||
298 | } | ||