diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/ode-0.9/ode/src/fastltsolve.c | 199 |
1 files changed, 199 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/fastltsolve.c b/libraries/ode-0.9/ode/src/fastltsolve.c new file mode 100644 index 0000000..eb950f6 --- /dev/null +++ b/libraries/ode-0.9/ode/src/fastltsolve.c | |||
@@ -0,0 +1,199 @@ | |||
1 | /* generated code, do not edit. */ | ||
2 | |||
3 | #include "ode/matrix.h" | ||
4 | |||
5 | /* solve L^T * x=b, with b containing 1 right hand side. | ||
6 | * L is an n*n lower triangular matrix with ones on the diagonal. | ||
7 | * L is stored by rows and its leading dimension is lskip. | ||
8 | * b is an n*1 matrix that contains the right hand side. | ||
9 | * b is overwritten with x. | ||
10 | * this processes blocks of 4. | ||
11 | */ | ||
12 | |||
13 | void dSolveL1T (const dReal *L, dReal *B, int n, int lskip1) | ||
14 | { | ||
15 | /* declare variables - Z matrix, p and q vectors, etc */ | ||
16 | dReal Z11,m11,Z21,m21,Z31,m31,Z41,m41,p1,q1,p2,p3,p4,*ex; | ||
17 | const dReal *ell; | ||
18 | int lskip2,lskip3,i,j; | ||
19 | /* special handling for L and B because we're solving L1 *transpose* */ | ||
20 | L = L + (n-1)*(lskip1+1); | ||
21 | B = B + n-1; | ||
22 | lskip1 = -lskip1; | ||
23 | /* compute lskip values */ | ||
24 | lskip2 = 2*lskip1; | ||
25 | lskip3 = 3*lskip1; | ||
26 | /* compute all 4 x 1 blocks of X */ | ||
27 | for (i=0; i <= n-4; i+=4) { | ||
28 | /* compute all 4 x 1 block of X, from rows i..i+4-1 */ | ||
29 | /* set the Z matrix to 0 */ | ||
30 | Z11=0; | ||
31 | Z21=0; | ||
32 | Z31=0; | ||
33 | Z41=0; | ||
34 | ell = L - i; | ||
35 | ex = B; | ||
36 | /* the inner loop that computes outer products and adds them to Z */ | ||
37 | for (j=i-4; j >= 0; j -= 4) { | ||
38 | /* load p and q values */ | ||
39 | p1=ell[0]; | ||
40 | q1=ex[0]; | ||
41 | p2=ell[-1]; | ||
42 | p3=ell[-2]; | ||
43 | p4=ell[-3]; | ||
44 | /* compute outer product and add it to the Z matrix */ | ||
45 | m11 = p1 * q1; | ||
46 | m21 = p2 * q1; | ||
47 | m31 = p3 * q1; | ||
48 | m41 = p4 * q1; | ||
49 | ell += lskip1; | ||
50 | Z11 += m11; | ||
51 | Z21 += m21; | ||
52 | Z31 += m31; | ||
53 | Z41 += m41; | ||
54 | /* load p and q values */ | ||
55 | p1=ell[0]; | ||
56 | q1=ex[-1]; | ||
57 | p2=ell[-1]; | ||
58 | p3=ell[-2]; | ||
59 | p4=ell[-3]; | ||
60 | /* compute outer product and add it to the Z matrix */ | ||
61 | m11 = p1 * q1; | ||
62 | m21 = p2 * q1; | ||
63 | m31 = p3 * q1; | ||
64 | m41 = p4 * q1; | ||
65 | ell += lskip1; | ||
66 | Z11 += m11; | ||
67 | Z21 += m21; | ||
68 | Z31 += m31; | ||
69 | Z41 += m41; | ||
70 | /* load p and q values */ | ||
71 | p1=ell[0]; | ||
72 | q1=ex[-2]; | ||
73 | p2=ell[-1]; | ||
74 | p3=ell[-2]; | ||
75 | p4=ell[-3]; | ||
76 | /* compute outer product and add it to the Z matrix */ | ||
77 | m11 = p1 * q1; | ||
78 | m21 = p2 * q1; | ||
79 | m31 = p3 * q1; | ||
80 | m41 = p4 * q1; | ||
81 | ell += lskip1; | ||
82 | Z11 += m11; | ||
83 | Z21 += m21; | ||
84 | Z31 += m31; | ||
85 | Z41 += m41; | ||
86 | /* load p and q values */ | ||
87 | p1=ell[0]; | ||
88 | q1=ex[-3]; | ||
89 | p2=ell[-1]; | ||
90 | p3=ell[-2]; | ||
91 | p4=ell[-3]; | ||
92 | /* compute outer product and add it to the Z matrix */ | ||
93 | m11 = p1 * q1; | ||
94 | m21 = p2 * q1; | ||
95 | m31 = p3 * q1; | ||
96 | m41 = p4 * q1; | ||
97 | ell += lskip1; | ||
98 | ex -= 4; | ||
99 | Z11 += m11; | ||
100 | Z21 += m21; | ||
101 | Z31 += m31; | ||
102 | Z41 += m41; | ||
103 | /* end of inner loop */ | ||
104 | } | ||
105 | /* compute left-over iterations */ | ||
106 | j += 4; | ||
107 | for (; j > 0; j--) { | ||
108 | /* load p and q values */ | ||
109 | p1=ell[0]; | ||
110 | q1=ex[0]; | ||
111 | p2=ell[-1]; | ||
112 | p3=ell[-2]; | ||
113 | p4=ell[-3]; | ||
114 | /* compute outer product and add it to the Z matrix */ | ||
115 | m11 = p1 * q1; | ||
116 | m21 = p2 * q1; | ||
117 | m31 = p3 * q1; | ||
118 | m41 = p4 * q1; | ||
119 | ell += lskip1; | ||
120 | ex -= 1; | ||
121 | Z11 += m11; | ||
122 | Z21 += m21; | ||
123 | Z31 += m31; | ||
124 | Z41 += m41; | ||
125 | } | ||
126 | /* finish computing the X(i) block */ | ||
127 | Z11 = ex[0] - Z11; | ||
128 | ex[0] = Z11; | ||
129 | p1 = ell[-1]; | ||
130 | Z21 = ex[-1] - Z21 - p1*Z11; | ||
131 | ex[-1] = Z21; | ||
132 | p1 = ell[-2]; | ||
133 | p2 = ell[-2+lskip1]; | ||
134 | Z31 = ex[-2] - Z31 - p1*Z11 - p2*Z21; | ||
135 | ex[-2] = Z31; | ||
136 | p1 = ell[-3]; | ||
137 | p2 = ell[-3+lskip1]; | ||
138 | p3 = ell[-3+lskip2]; | ||
139 | Z41 = ex[-3] - Z41 - p1*Z11 - p2*Z21 - p3*Z31; | ||
140 | ex[-3] = Z41; | ||
141 | /* end of outer loop */ | ||
142 | } | ||
143 | /* compute rows at end that are not a multiple of block size */ | ||
144 | for (; i < n; i++) { | ||
145 | /* compute all 1 x 1 block of X, from rows i..i+1-1 */ | ||
146 | /* set the Z matrix to 0 */ | ||
147 | Z11=0; | ||
148 | ell = L - i; | ||
149 | ex = B; | ||
150 | /* the inner loop that computes outer products and adds them to Z */ | ||
151 | for (j=i-4; j >= 0; j -= 4) { | ||
152 | /* load p and q values */ | ||
153 | p1=ell[0]; | ||
154 | q1=ex[0]; | ||
155 | /* compute outer product and add it to the Z matrix */ | ||
156 | m11 = p1 * q1; | ||
157 | ell += lskip1; | ||
158 | Z11 += m11; | ||
159 | /* load p and q values */ | ||
160 | p1=ell[0]; | ||
161 | q1=ex[-1]; | ||
162 | /* compute outer product and add it to the Z matrix */ | ||
163 | m11 = p1 * q1; | ||
164 | ell += lskip1; | ||
165 | Z11 += m11; | ||
166 | /* load p and q values */ | ||
167 | p1=ell[0]; | ||
168 | q1=ex[-2]; | ||
169 | /* compute outer product and add it to the Z matrix */ | ||
170 | m11 = p1 * q1; | ||
171 | ell += lskip1; | ||
172 | Z11 += m11; | ||
173 | /* load p and q values */ | ||
174 | p1=ell[0]; | ||
175 | q1=ex[-3]; | ||
176 | /* compute outer product and add it to the Z matrix */ | ||
177 | m11 = p1 * q1; | ||
178 | ell += lskip1; | ||
179 | ex -= 4; | ||
180 | Z11 += m11; | ||
181 | /* end of inner loop */ | ||
182 | } | ||
183 | /* compute left-over iterations */ | ||
184 | j += 4; | ||
185 | for (; j > 0; j--) { | ||
186 | /* load p and q values */ | ||
187 | p1=ell[0]; | ||
188 | q1=ex[0]; | ||
189 | /* compute outer product and add it to the Z matrix */ | ||
190 | m11 = p1 * q1; | ||
191 | ell += lskip1; | ||
192 | ex -= 1; | ||
193 | Z11 += m11; | ||
194 | } | ||
195 | /* finish computing the X(i) block */ | ||
196 | Z11 = ex[0] - Z11; | ||
197 | ex[0] = Z11; | ||
198 | } | ||
199 | } | ||