diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/ode-0.9/ode/src/quickstep.cpp | 880 |
1 files changed, 880 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/quickstep.cpp b/libraries/ode-0.9/ode/src/quickstep.cpp new file mode 100644 index 0000000..07aa9f7 --- /dev/null +++ b/libraries/ode-0.9/ode/src/quickstep.cpp | |||
@@ -0,0 +1,880 @@ | |||
1 | /************************************************************************* | ||
2 | * * | ||
3 | * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. * | ||
4 | * All rights reserved. Email: russ@q12.org Web: www.q12.org * | ||
5 | * * | ||
6 | * This library is free software; you can redistribute it and/or * | ||
7 | * modify it under the terms of EITHER: * | ||
8 | * (1) The GNU Lesser General Public License as published by the Free * | ||
9 | * Software Foundation; either version 2.1 of the License, or (at * | ||
10 | * your option) any later version. The text of the GNU Lesser * | ||
11 | * General Public License is included with this library in the * | ||
12 | * file LICENSE.TXT. * | ||
13 | * (2) The BSD-style license that is included with this library in * | ||
14 | * the file LICENSE-BSD.TXT. * | ||
15 | * * | ||
16 | * This library is distributed in the hope that it will be useful, * | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of * | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * | ||
19 | * LICENSE.TXT and LICENSE-BSD.TXT for more details. * | ||
20 | * * | ||
21 | *************************************************************************/ | ||
22 | |||
23 | #include "objects.h" | ||
24 | #include "joint.h" | ||
25 | #include <ode/config.h> | ||
26 | #include <ode/odemath.h> | ||
27 | #include <ode/rotation.h> | ||
28 | #include <ode/timer.h> | ||
29 | #include <ode/error.h> | ||
30 | #include <ode/matrix.h> | ||
31 | #include <ode/misc.h> | ||
32 | #include "lcp.h" | ||
33 | #include "util.h" | ||
34 | |||
35 | #define ALLOCA dALLOCA16 | ||
36 | |||
37 | typedef const dReal *dRealPtr; | ||
38 | typedef dReal *dRealMutablePtr; | ||
39 | #define dRealArray(name,n) dReal name[n]; | ||
40 | #define dRealAllocaArray(name,n) dReal *name = (dReal*) ALLOCA ((n)*sizeof(dReal)); | ||
41 | |||
42 | //*************************************************************************** | ||
43 | // configuration | ||
44 | |||
45 | // for the SOR and CG methods: | ||
46 | // uncomment the following line to use warm starting. this definitely | ||
47 | // help for motor-driven joints. unfortunately it appears to hurt | ||
48 | // with high-friction contacts using the SOR method. use with care | ||
49 | |||
50 | //#define WARM_STARTING 1 | ||
51 | |||
52 | |||
53 | // for the SOR method: | ||
54 | // uncomment the following line to determine a new constraint-solving | ||
55 | // order for each iteration. however, the qsort per iteration is expensive, | ||
56 | // and the optimal order is somewhat problem dependent. | ||
57 | // @@@ try the leaf->root ordering. | ||
58 | |||
59 | //#define REORDER_CONSTRAINTS 1 | ||
60 | |||
61 | |||
62 | // for the SOR method: | ||
63 | // uncomment the following line to randomly reorder constraint rows | ||
64 | // during the solution. depending on the situation, this can help a lot | ||
65 | // or hardly at all, but it doesn't seem to hurt. | ||
66 | |||
67 | #define RANDOMLY_REORDER_CONSTRAINTS 1 | ||
68 | |||
69 | //**************************************************************************** | ||
70 | // special matrix multipliers | ||
71 | |||
72 | // multiply block of B matrix (q x 6) with 12 dReal per row with C vektor (q) | ||
73 | static void Multiply1_12q1 (dReal *A, dReal *B, dReal *C, int q) | ||
74 | { | ||
75 | int k; | ||
76 | dReal sum; | ||
77 | dIASSERT (q>0 && A && B && C); | ||
78 | sum = 0; | ||
79 | for (k=0; k<q; k++) sum += B[k*12] * C[k]; | ||
80 | A[0] = sum; | ||
81 | sum = 0; | ||
82 | for (k=0; k<q; k++) sum += B[1+k*12] * C[k]; | ||
83 | A[1] = sum; | ||
84 | sum = 0; | ||
85 | for (k=0; k<q; k++) sum += B[2+k*12] * C[k]; | ||
86 | A[2] = sum; | ||
87 | sum = 0; | ||
88 | for (k=0; k<q; k++) sum += B[3+k*12] * C[k]; | ||
89 | A[3] = sum; | ||
90 | sum = 0; | ||
91 | for (k=0; k<q; k++) sum += B[4+k*12] * C[k]; | ||
92 | A[4] = sum; | ||
93 | sum = 0; | ||
94 | for (k=0; k<q; k++) sum += B[5+k*12] * C[k]; | ||
95 | A[5] = sum; | ||
96 | } | ||
97 | |||
98 | //*************************************************************************** | ||
99 | // testing stuff | ||
100 | |||
101 | #ifdef TIMING | ||
102 | #define IFTIMING(x) x | ||
103 | #else | ||
104 | #define IFTIMING(x) /* */ | ||
105 | #endif | ||
106 | |||
107 | //*************************************************************************** | ||
108 | // various common computations involving the matrix J | ||
109 | |||
110 | // compute iMJ = inv(M)*J' | ||
111 | |||
112 | static void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb, | ||
113 | dxBody * const *body, dRealPtr invI) | ||
114 | { | ||
115 | int i,j; | ||
116 | dRealMutablePtr iMJ_ptr = iMJ; | ||
117 | dRealMutablePtr J_ptr = J; | ||
118 | for (i=0; i<m; i++) { | ||
119 | int b1 = jb[i*2]; | ||
120 | int b2 = jb[i*2+1]; | ||
121 | dReal k = body[b1]->invMass; | ||
122 | for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j]; | ||
123 | dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3); | ||
124 | if (b2 >= 0) { | ||
125 | k = body[b2]->invMass; | ||
126 | for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6]; | ||
127 | dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9); | ||
128 | } | ||
129 | J_ptr += 12; | ||
130 | iMJ_ptr += 12; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | |||
135 | // compute out = inv(M)*J'*in. | ||
136 | |||
137 | static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb, | ||
138 | dRealMutablePtr in, dRealMutablePtr out) | ||
139 | { | ||
140 | int i,j; | ||
141 | dSetZero (out,6*nb); | ||
142 | dRealPtr iMJ_ptr = iMJ; | ||
143 | for (i=0; i<m; i++) { | ||
144 | int b1 = jb[i*2]; | ||
145 | int b2 = jb[i*2+1]; | ||
146 | dRealMutablePtr out_ptr = out + b1*6; | ||
147 | for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; | ||
148 | iMJ_ptr += 6; | ||
149 | if (b2 >= 0) { | ||
150 | out_ptr = out + b2*6; | ||
151 | for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i]; | ||
152 | } | ||
153 | iMJ_ptr += 6; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | |||
158 | // compute out = J*in. | ||
159 | |||
160 | static void multiply_J (int m, dRealMutablePtr J, int *jb, | ||
161 | dRealMutablePtr in, dRealMutablePtr out) | ||
162 | { | ||
163 | int i,j; | ||
164 | dRealPtr J_ptr = J; | ||
165 | for (i=0; i<m; i++) { | ||
166 | int b1 = jb[i*2]; | ||
167 | int b2 = jb[i*2+1]; | ||
168 | dReal sum = 0; | ||
169 | dRealMutablePtr in_ptr = in + b1*6; | ||
170 | for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; | ||
171 | J_ptr += 6; | ||
172 | if (b2 >= 0) { | ||
173 | in_ptr = in + b2*6; | ||
174 | for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j]; | ||
175 | } | ||
176 | J_ptr += 6; | ||
177 | out[i] = sum; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | |||
182 | // compute out = (J*inv(M)*J' + cfm)*in. | ||
183 | // use z as an nb*6 temporary. | ||
184 | |||
185 | static void multiply_J_invM_JT (int m, int nb, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb, | ||
186 | dRealPtr cfm, dRealMutablePtr z, dRealMutablePtr in, dRealMutablePtr out) | ||
187 | { | ||
188 | multiply_invM_JT (m,nb,iMJ,jb,in,z); | ||
189 | multiply_J (m,J,jb,z,out); | ||
190 | |||
191 | // add cfm | ||
192 | for (int i=0; i<m; i++) out[i] += cfm[i] * in[i]; | ||
193 | } | ||
194 | |||
195 | //*************************************************************************** | ||
196 | // conjugate gradient method with jacobi preconditioner | ||
197 | // THIS IS EXPERIMENTAL CODE that doesn't work too well, so it is ifdefed out. | ||
198 | // | ||
199 | // adding CFM seems to be critically important to this method. | ||
200 | |||
201 | #if 0 | ||
202 | |||
203 | static inline dReal dot (int n, dRealPtr x, dRealPtr y) | ||
204 | { | ||
205 | dReal sum=0; | ||
206 | for (int i=0; i<n; i++) sum += x[i]*y[i]; | ||
207 | return sum; | ||
208 | } | ||
209 | |||
210 | |||
211 | // x = y + z*alpha | ||
212 | |||
213 | static inline void add (int n, dRealMutablePtr x, dRealPtr y, dRealPtr z, dReal alpha) | ||
214 | { | ||
215 | for (int i=0; i<n; i++) x[i] = y[i] + z[i]*alpha; | ||
216 | } | ||
217 | |||
218 | |||
219 | static void CG_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body, | ||
220 | dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b, | ||
221 | dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, | ||
222 | dxQuickStepParameters *qs) | ||
223 | { | ||
224 | int i,j; | ||
225 | const int num_iterations = qs->num_iterations; | ||
226 | |||
227 | // precompute iMJ = inv(M)*J' | ||
228 | dRealAllocaArray (iMJ,m*12); | ||
229 | compute_invM_JT (m,J,iMJ,jb,body,invI); | ||
230 | |||
231 | dReal last_rho = 0; | ||
232 | dRealAllocaArray (r,m); | ||
233 | dRealAllocaArray (z,m); | ||
234 | dRealAllocaArray (p,m); | ||
235 | dRealAllocaArray (q,m); | ||
236 | |||
237 | // precompute 1 / diagonals of A | ||
238 | dRealAllocaArray (Ad,m); | ||
239 | dRealPtr iMJ_ptr = iMJ; | ||
240 | dRealPtr J_ptr = J; | ||
241 | for (i=0; i<m; i++) { | ||
242 | dReal sum = 0; | ||
243 | for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j]; | ||
244 | if (jb[i*2+1] >= 0) { | ||
245 | for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j]; | ||
246 | } | ||
247 | iMJ_ptr += 12; | ||
248 | J_ptr += 12; | ||
249 | Ad[i] = REAL(1.0) / (sum + cfm[i]); | ||
250 | } | ||
251 | |||
252 | #ifdef WARM_STARTING | ||
253 | // compute residual r = b - A*lambda | ||
254 | multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r); | ||
255 | for (i=0; i<m; i++) r[i] = b[i] - r[i]; | ||
256 | #else | ||
257 | dSetZero (lambda,m); | ||
258 | memcpy (r,b,m*sizeof(dReal)); // residual r = b - A*lambda | ||
259 | #endif | ||
260 | |||
261 | for (int iteration=0; iteration < num_iterations; iteration++) { | ||
262 | for (i=0; i<m; i++) z[i] = r[i]*Ad[i]; // z = inv(M)*r | ||
263 | dReal rho = dot (m,r,z); // rho = r'*z | ||
264 | |||
265 | // @@@ | ||
266 | // we must check for convergence, otherwise rho will go to 0 if | ||
267 | // we get an exact solution, which will introduce NaNs into the equations. | ||
268 | if (rho < 1e-10) { | ||
269 | printf ("CG returned at iteration %d\n",iteration); | ||
270 | break; | ||
271 | } | ||
272 | |||
273 | if (iteration==0) { | ||
274 | memcpy (p,z,m*sizeof(dReal)); // p = z | ||
275 | } | ||
276 | else { | ||
277 | add (m,p,z,p,rho/last_rho); // p = z + (rho/last_rho)*p | ||
278 | } | ||
279 | |||
280 | // compute q = (J*inv(M)*J')*p | ||
281 | multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,p,q); | ||
282 | |||
283 | dReal alpha = rho/dot (m,p,q); // alpha = rho/(p'*q) | ||
284 | add (m,lambda,lambda,p,alpha); // lambda = lambda + alpha*p | ||
285 | add (m,r,r,q,-alpha); // r = r - alpha*q | ||
286 | last_rho = rho; | ||
287 | } | ||
288 | |||
289 | // compute fc = inv(M)*J'*lambda | ||
290 | multiply_invM_JT (m,nb,iMJ,jb,lambda,fc); | ||
291 | |||
292 | #if 0 | ||
293 | // measure solution error | ||
294 | multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r); | ||
295 | dReal error = 0; | ||
296 | for (i=0; i<m; i++) error += dFabs(r[i] - b[i]); | ||
297 | printf ("lambda error = %10.6e\n",error); | ||
298 | #endif | ||
299 | } | ||
300 | |||
301 | #endif | ||
302 | |||
303 | //*************************************************************************** | ||
304 | // SOR-LCP method | ||
305 | |||
306 | // nb is the number of bodies in the body array. | ||
307 | // J is an m*12 matrix of constraint rows | ||
308 | // jb is an array of first and second body numbers for each constraint row | ||
309 | // invI is the global frame inverse inertia for each body (stacked 3x3 matrices) | ||
310 | // | ||
311 | // this returns lambda and fc (the constraint force). | ||
312 | // note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda | ||
313 | // | ||
314 | // b, lo and hi are modified on exit | ||
315 | |||
316 | |||
317 | struct IndexError { | ||
318 | dReal error; // error to sort on | ||
319 | int findex; | ||
320 | int index; // row index | ||
321 | }; | ||
322 | |||
323 | |||
324 | #ifdef REORDER_CONSTRAINTS | ||
325 | |||
326 | static int compare_index_error (const void *a, const void *b) | ||
327 | { | ||
328 | const IndexError *i1 = (IndexError*) a; | ||
329 | const IndexError *i2 = (IndexError*) b; | ||
330 | if (i1->findex < 0 && i2->findex >= 0) return -1; | ||
331 | if (i1->findex >= 0 && i2->findex < 0) return 1; | ||
332 | if (i1->error < i2->error) return -1; | ||
333 | if (i1->error > i2->error) return 1; | ||
334 | return 0; | ||
335 | } | ||
336 | |||
337 | #endif | ||
338 | |||
339 | |||
340 | static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body, | ||
341 | dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b, | ||
342 | dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex, | ||
343 | dxQuickStepParameters *qs) | ||
344 | { | ||
345 | const int num_iterations = qs->num_iterations; | ||
346 | const dReal sor_w = qs->w; // SOR over-relaxation parameter | ||
347 | |||
348 | int i,j; | ||
349 | |||
350 | #ifdef WARM_STARTING | ||
351 | // for warm starting, this seems to be necessary to prevent | ||
352 | // jerkiness in motor-driven joints. i have no idea why this works. | ||
353 | for (i=0; i<m; i++) lambda[i] *= 0.9; | ||
354 | #else | ||
355 | dSetZero (lambda,m); | ||
356 | #endif | ||
357 | |||
358 | #ifdef REORDER_CONSTRAINTS | ||
359 | // the lambda computed at the previous iteration. | ||
360 | // this is used to measure error for when we are reordering the indexes. | ||
361 | dRealAllocaArray (last_lambda,m); | ||
362 | #endif | ||
363 | |||
364 | // a copy of the 'hi' vector in case findex[] is being used | ||
365 | dRealAllocaArray (hicopy,m); | ||
366 | memcpy (hicopy,hi,m*sizeof(dReal)); | ||
367 | |||
368 | // precompute iMJ = inv(M)*J' | ||
369 | dRealAllocaArray (iMJ,m*12); | ||
370 | compute_invM_JT (m,J,iMJ,jb,body,invI); | ||
371 | |||
372 | // compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc | ||
373 | // as we change lambda. | ||
374 | #ifdef WARM_STARTING | ||
375 | multiply_invM_JT (m,nb,iMJ,jb,lambda,fc); | ||
376 | #else | ||
377 | dSetZero (fc,nb*6); | ||
378 | #endif | ||
379 | |||
380 | // precompute 1 / diagonals of A | ||
381 | dRealAllocaArray (Ad,m); | ||
382 | dRealPtr iMJ_ptr = iMJ; | ||
383 | dRealMutablePtr J_ptr = J; | ||
384 | for (i=0; i<m; i++) { | ||
385 | dReal sum = 0; | ||
386 | for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j]; | ||
387 | if (jb[i*2+1] >= 0) { | ||
388 | for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j]; | ||
389 | } | ||
390 | iMJ_ptr += 12; | ||
391 | J_ptr += 12; | ||
392 | Ad[i] = sor_w / (sum + cfm[i]); | ||
393 | } | ||
394 | |||
395 | // scale J and b by Ad | ||
396 | J_ptr = J; | ||
397 | for (i=0; i<m; i++) { | ||
398 | for (j=0; j<12; j++) { | ||
399 | J_ptr[0] *= Ad[i]; | ||
400 | J_ptr++; | ||
401 | } | ||
402 | b[i] *= Ad[i]; | ||
403 | |||
404 | // scale Ad by CFM. N.B. this should be done last since it is used above | ||
405 | Ad[i] *= cfm[i]; | ||
406 | } | ||
407 | |||
408 | // order to solve constraint rows in | ||
409 | IndexError *order = (IndexError*) ALLOCA (m*sizeof(IndexError)); | ||
410 | |||
411 | #ifndef REORDER_CONSTRAINTS | ||
412 | // make sure constraints with findex < 0 come first. | ||
413 | j=0; | ||
414 | int k=1; | ||
415 | |||
416 | // Fill the array from both ends | ||
417 | for (i=0; i<m; i++) | ||
418 | if (findex[i] < 0) | ||
419 | order[j++].index = i; // Place them at the front | ||
420 | else | ||
421 | order[m-k++].index = i; // Place them at the end | ||
422 | |||
423 | dIASSERT ((j+k-1)==m); // -1 since k was started at 1 and not 0 | ||
424 | #endif | ||
425 | |||
426 | for (int iteration=0; iteration < num_iterations; iteration++) { | ||
427 | |||
428 | #ifdef REORDER_CONSTRAINTS | ||
429 | // constraints with findex < 0 always come first. | ||
430 | if (iteration < 2) { | ||
431 | // for the first two iterations, solve the constraints in | ||
432 | // the given order | ||
433 | for (i=0; i<m; i++) { | ||
434 | order[i].error = i; | ||
435 | order[i].findex = findex[i]; | ||
436 | order[i].index = i; | ||
437 | } | ||
438 | } | ||
439 | else { | ||
440 | // sort the constraints so that the ones converging slowest | ||
441 | // get solved last. use the absolute (not relative) error. | ||
442 | for (i=0; i<m; i++) { | ||
443 | dReal v1 = dFabs (lambda[i]); | ||
444 | dReal v2 = dFabs (last_lambda[i]); | ||
445 | dReal max = (v1 > v2) ? v1 : v2; | ||
446 | if (max > 0) { | ||
447 | //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max; | ||
448 | order[i].error = dFabs(lambda[i]-last_lambda[i]); | ||
449 | } | ||
450 | else { | ||
451 | order[i].error = dInfinity; | ||
452 | } | ||
453 | order[i].findex = findex[i]; | ||
454 | order[i].index = i; | ||
455 | } | ||
456 | } | ||
457 | qsort (order,m,sizeof(IndexError),&compare_index_error); | ||
458 | |||
459 | //@@@ potential optimization: swap lambda and last_lambda pointers rather | ||
460 | // than copying the data. we must make sure lambda is properly | ||
461 | // returned to the caller | ||
462 | memcpy (last_lambda,lambda,m*sizeof(dReal)); | ||
463 | #endif | ||
464 | #ifdef RANDOMLY_REORDER_CONSTRAINTS | ||
465 | if ((iteration & 7) == 0) { | ||
466 | for (i=1; i<m; ++i) { | ||
467 | IndexError tmp = order[i]; | ||
468 | int swapi = dRandInt(i+1); | ||
469 | order[i] = order[swapi]; | ||
470 | order[swapi] = tmp; | ||
471 | } | ||
472 | } | ||
473 | #endif | ||
474 | |||
475 | for (int i=0; i<m; i++) { | ||
476 | // @@@ potential optimization: we could pre-sort J and iMJ, thereby | ||
477 | // linearizing access to those arrays. hmmm, this does not seem | ||
478 | // like a win, but we should think carefully about our memory | ||
479 | // access pattern. | ||
480 | |||
481 | int index = order[i].index; | ||
482 | J_ptr = J + index*12; | ||
483 | iMJ_ptr = iMJ + index*12; | ||
484 | |||
485 | // set the limits for this constraint. note that 'hicopy' is used. | ||
486 | // this is the place where the QuickStep method differs from the | ||
487 | // direct LCP solving method, since that method only performs this | ||
488 | // limit adjustment once per time step, whereas this method performs | ||
489 | // once per iteration per constraint row. | ||
490 | // the constraints are ordered so that all lambda[] values needed have | ||
491 | // already been computed. | ||
492 | if (findex[index] >= 0) { | ||
493 | hi[index] = dFabs (hicopy[index] * lambda[findex[index]]); | ||
494 | lo[index] = -hi[index]; | ||
495 | } | ||
496 | |||
497 | int b1 = jb[index*2]; | ||
498 | int b2 = jb[index*2+1]; | ||
499 | dReal delta = b[index] - lambda[index]*Ad[index]; | ||
500 | dRealMutablePtr fc_ptr = fc + 6*b1; | ||
501 | |||
502 | // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case | ||
503 | delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] + | ||
504 | fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] + | ||
505 | fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5]; | ||
506 | // @@@ potential optimization: handle 1-body constraints in a separate | ||
507 | // loop to avoid the cost of test & jump? | ||
508 | if (b2 >= 0) { | ||
509 | fc_ptr = fc + 6*b2; | ||
510 | delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] + | ||
511 | fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] + | ||
512 | fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11]; | ||
513 | } | ||
514 | |||
515 | // compute lambda and clamp it to [lo,hi]. | ||
516 | // @@@ potential optimization: does SSE have clamping instructions | ||
517 | // to save test+jump penalties here? | ||
518 | dReal new_lambda = lambda[index] + delta; | ||
519 | if (new_lambda < lo[index]) { | ||
520 | delta = lo[index]-lambda[index]; | ||
521 | lambda[index] = lo[index]; | ||
522 | } | ||
523 | else if (new_lambda > hi[index]) { | ||
524 | delta = hi[index]-lambda[index]; | ||
525 | lambda[index] = hi[index]; | ||
526 | } | ||
527 | else { | ||
528 | lambda[index] = new_lambda; | ||
529 | } | ||
530 | |||
531 | //@@@ a trick that may or may not help | ||
532 | //dReal ramp = (1-((dReal)(iteration+1)/(dReal)num_iterations)); | ||
533 | //delta *= ramp; | ||
534 | |||
535 | // update fc. | ||
536 | // @@@ potential optimization: SIMD for this and the b2 >= 0 case | ||
537 | fc_ptr = fc + 6*b1; | ||
538 | fc_ptr[0] += delta * iMJ_ptr[0]; | ||
539 | fc_ptr[1] += delta * iMJ_ptr[1]; | ||
540 | fc_ptr[2] += delta * iMJ_ptr[2]; | ||
541 | fc_ptr[3] += delta * iMJ_ptr[3]; | ||
542 | fc_ptr[4] += delta * iMJ_ptr[4]; | ||
543 | fc_ptr[5] += delta * iMJ_ptr[5]; | ||
544 | // @@@ potential optimization: handle 1-body constraints in a separate | ||
545 | // loop to avoid the cost of test & jump? | ||
546 | if (b2 >= 0) { | ||
547 | fc_ptr = fc + 6*b2; | ||
548 | fc_ptr[0] += delta * iMJ_ptr[6]; | ||
549 | fc_ptr[1] += delta * iMJ_ptr[7]; | ||
550 | fc_ptr[2] += delta * iMJ_ptr[8]; | ||
551 | fc_ptr[3] += delta * iMJ_ptr[9]; | ||
552 | fc_ptr[4] += delta * iMJ_ptr[10]; | ||
553 | fc_ptr[5] += delta * iMJ_ptr[11]; | ||
554 | } | ||
555 | } | ||
556 | } | ||
557 | } | ||
558 | |||
559 | |||
560 | void dxQuickStepper (dxWorld *world, dxBody * const *body, int nb, | ||
561 | dxJoint * const *_joint, int nj, dReal stepsize) | ||
562 | { | ||
563 | int i,j; | ||
564 | IFTIMING(dTimerStart("preprocessing");) | ||
565 | |||
566 | dReal stepsize1 = dRecip(stepsize); | ||
567 | |||
568 | // number all bodies in the body list - set their tag values | ||
569 | for (i=0; i<nb; i++) body[i]->tag = i; | ||
570 | |||
571 | // make a local copy of the joint array, because we might want to modify it. | ||
572 | // (the "dxJoint *const*" declaration says we're allowed to modify the joints | ||
573 | // but not the joint array, because the caller might need it unchanged). | ||
574 | //@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints | ||
575 | dxJoint **joint = (dxJoint**) ALLOCA (nj * sizeof(dxJoint*)); | ||
576 | memcpy (joint,_joint,nj * sizeof(dxJoint*)); | ||
577 | |||
578 | // for all bodies, compute the inertia tensor and its inverse in the global | ||
579 | // frame, and compute the rotational force and add it to the torque | ||
580 | // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body. | ||
581 | //dRealAllocaArray (I,3*4*nb); // need to remember all I's for feedback purposes only | ||
582 | dRealAllocaArray (invI,3*4*nb); | ||
583 | for (i=0; i<nb; i++) { | ||
584 | dMatrix3 tmp; | ||
585 | |||
586 | // compute inverse inertia tensor in global frame | ||
587 | dMULTIPLY2_333 (tmp,body[i]->invI,body[i]->posr.R); | ||
588 | dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp); | ||
589 | #ifdef dGYROSCOPIC | ||
590 | dMatrix3 I; | ||
591 | // compute inertia tensor in global frame | ||
592 | dMULTIPLY2_333 (tmp,body[i]->mass.I,body[i]->posr.R); | ||
593 | //dMULTIPLY0_333 (I+i*12,body[i]->posr.R,tmp); | ||
594 | dMULTIPLY0_333 (I,body[i]->posr.R,tmp); | ||
595 | // compute rotational force | ||
596 | //dMULTIPLY0_331 (tmp,I+i*12,body[i]->avel); | ||
597 | dMULTIPLY0_331 (tmp,I,body[i]->avel); | ||
598 | dCROSS (body[i]->tacc,-=,body[i]->avel,tmp); | ||
599 | #endif | ||
600 | } | ||
601 | |||
602 | // add the gravity force to all bodies | ||
603 | for (i=0; i<nb; i++) { | ||
604 | if ((body[i]->flags & dxBodyNoGravity)==0) { | ||
605 | body[i]->facc[0] += body[i]->mass.mass * world->gravity[0]; | ||
606 | body[i]->facc[1] += body[i]->mass.mass * world->gravity[1]; | ||
607 | body[i]->facc[2] += body[i]->mass.mass * world->gravity[2]; | ||
608 | } | ||
609 | } | ||
610 | |||
611 | // get joint information (m = total constraint dimension, nub = number of unbounded variables). | ||
612 | // joints with m=0 are inactive and are removed from the joints array | ||
613 | // entirely, so that the code that follows does not consider them. | ||
614 | //@@@ do we really need to save all the info1's | ||
615 | dxJoint::Info1 *info = (dxJoint::Info1*) ALLOCA (nj*sizeof(dxJoint::Info1)); | ||
616 | for (i=0, j=0; j<nj; j++) { // i=dest, j=src | ||
617 | joint[j]->vtable->getInfo1 (joint[j],info+i); | ||
618 | dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m); | ||
619 | if (info[i].m > 0) { | ||
620 | joint[i] = joint[j]; | ||
621 | i++; | ||
622 | } | ||
623 | } | ||
624 | nj = i; | ||
625 | |||
626 | // create the row offset array | ||
627 | int m = 0; | ||
628 | int *ofs = (int*) ALLOCA (nj*sizeof(int)); | ||
629 | for (i=0; i<nj; i++) { | ||
630 | ofs[i] = m; | ||
631 | m += info[i].m; | ||
632 | } | ||
633 | |||
634 | // if there are constraints, compute the constraint force | ||
635 | dRealAllocaArray (J,m*12); | ||
636 | int *jb = (int*) ALLOCA (m*2*sizeof(int)); | ||
637 | if (m > 0) { | ||
638 | // create a constraint equation right hand side vector `c', a constraint | ||
639 | // force mixing vector `cfm', and LCP low and high bound vectors, and an | ||
640 | // 'findex' vector. | ||
641 | dRealAllocaArray (c,m); | ||
642 | dRealAllocaArray (cfm,m); | ||
643 | dRealAllocaArray (lo,m); | ||
644 | dRealAllocaArray (hi,m); | ||
645 | int *findex = (int*) ALLOCA (m*sizeof(int)); | ||
646 | dSetZero (c,m); | ||
647 | dSetValue (cfm,m,world->global_cfm); | ||
648 | dSetValue (lo,m,-dInfinity); | ||
649 | dSetValue (hi,m, dInfinity); | ||
650 | for (i=0; i<m; i++) findex[i] = -1; | ||
651 | |||
652 | // get jacobian data from constraints. an m*12 matrix will be created | ||
653 | // to store the two jacobian blocks from each constraint. it has this | ||
654 | // format: | ||
655 | // | ||
656 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 \ . | ||
657 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }-- jacobian for joint 0, body 1 and body 2 (3 rows) | ||
658 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 / | ||
659 | // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }--- jacobian for joint 1, body 1 and body 2 (3 rows) | ||
660 | // etc... | ||
661 | // | ||
662 | // (lll) = linear jacobian data | ||
663 | // (aaa) = angular jacobian data | ||
664 | // | ||
665 | IFTIMING (dTimerNow ("create J");) | ||
666 | dSetZero (J,m*12); | ||
667 | dxJoint::Info2 Jinfo; | ||
668 | Jinfo.rowskip = 12; | ||
669 | Jinfo.fps = stepsize1; | ||
670 | Jinfo.erp = world->global_erp; | ||
671 | int mfb = 0; // number of rows of Jacobian we will have to save for joint feedback | ||
672 | for (i=0; i<nj; i++) { | ||
673 | Jinfo.J1l = J + ofs[i]*12; | ||
674 | Jinfo.J1a = Jinfo.J1l + 3; | ||
675 | Jinfo.J2l = Jinfo.J1l + 6; | ||
676 | Jinfo.J2a = Jinfo.J1l + 9; | ||
677 | Jinfo.c = c + ofs[i]; | ||
678 | Jinfo.cfm = cfm + ofs[i]; | ||
679 | Jinfo.lo = lo + ofs[i]; | ||
680 | Jinfo.hi = hi + ofs[i]; | ||
681 | Jinfo.findex = findex + ofs[i]; | ||
682 | joint[i]->vtable->getInfo2 (joint[i],&Jinfo); | ||
683 | // adjust returned findex values for global index numbering | ||
684 | for (j=0; j<info[i].m; j++) { | ||
685 | if (findex[ofs[i] + j] >= 0) findex[ofs[i] + j] += ofs[i]; | ||
686 | } | ||
687 | if (joint[i]->feedback) | ||
688 | mfb += info[i].m; | ||
689 | } | ||
690 | |||
691 | // we need a copy of Jacobian for joint feedbacks | ||
692 | // because it gets destroyed by SOR solver | ||
693 | // instead of saving all Jacobian, we can save just rows | ||
694 | // for joints, that requested feedback (which is normaly much less) | ||
695 | dRealAllocaArray (Jcopy,mfb*12); | ||
696 | if (mfb > 0) { | ||
697 | mfb = 0; | ||
698 | for (i=0; i<nj; i++) | ||
699 | if (joint[i]->feedback) { | ||
700 | memcpy(Jcopy+mfb*12, J+ofs[i]*12, info[i].m*12*sizeof(dReal)); | ||
701 | mfb += info[i].m; | ||
702 | } | ||
703 | } | ||
704 | |||
705 | |||
706 | // create an array of body numbers for each joint row | ||
707 | int *jb_ptr = jb; | ||
708 | for (i=0; i<nj; i++) { | ||
709 | int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->tag) : -1; | ||
710 | int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->tag) : -1; | ||
711 | for (j=0; j<info[i].m; j++) { | ||
712 | jb_ptr[0] = b1; | ||
713 | jb_ptr[1] = b2; | ||
714 | jb_ptr += 2; | ||
715 | } | ||
716 | } | ||
717 | dIASSERT (jb_ptr == jb+2*m); | ||
718 | |||
719 | // compute the right hand side `rhs' | ||
720 | IFTIMING (dTimerNow ("compute rhs");) | ||
721 | dRealAllocaArray (tmp1,nb*6); | ||
722 | // put v/h + invM*fe into tmp1 | ||
723 | for (i=0; i<nb; i++) { | ||
724 | dReal body_invMass = body[i]->invMass; | ||
725 | for (j=0; j<3; j++) tmp1[i*6+j] = body[i]->facc[j] * body_invMass + body[i]->lvel[j] * stepsize1; | ||
726 | dMULTIPLY0_331 (tmp1 + i*6 + 3,invI + i*12,body[i]->tacc); | ||
727 | for (j=0; j<3; j++) tmp1[i*6+3+j] += body[i]->avel[j] * stepsize1; | ||
728 | } | ||
729 | |||
730 | // put J*tmp1 into rhs | ||
731 | dRealAllocaArray (rhs,m); | ||
732 | multiply_J (m,J,jb,tmp1,rhs); | ||
733 | |||
734 | // complete rhs | ||
735 | for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i]; | ||
736 | |||
737 | // scale CFM | ||
738 | for (i=0; i<m; i++) cfm[i] *= stepsize1; | ||
739 | |||
740 | // load lambda from the value saved on the previous iteration | ||
741 | dRealAllocaArray (lambda,m); | ||
742 | #ifdef WARM_STARTING | ||
743 | dSetZero (lambda,m); //@@@ shouldn't be necessary | ||
744 | for (i=0; i<nj; i++) { | ||
745 | memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(dReal)); | ||
746 | } | ||
747 | #endif | ||
748 | |||
749 | // solve the LCP problem and get lambda and invM*constraint_force | ||
750 | IFTIMING (dTimerNow ("solving LCP problem");) | ||
751 | dRealAllocaArray (cforce,nb*6); | ||
752 | SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,&world->qs); | ||
753 | |||
754 | #ifdef WARM_STARTING | ||
755 | // save lambda for the next iteration | ||
756 | //@@@ note that this doesn't work for contact joints yet, as they are | ||
757 | // recreated every iteration | ||
758 | for (i=0; i<nj; i++) { | ||
759 | memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(dReal)); | ||
760 | } | ||
761 | #endif | ||
762 | |||
763 | // note that the SOR method overwrites rhs and J at this point, so | ||
764 | // they should not be used again. | ||
765 | |||
766 | // add stepsize * cforce to the body velocity | ||
767 | for (i=0; i<nb; i++) { | ||
768 | for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * cforce[i*6+j]; | ||
769 | for (j=0; j<3; j++) body[i]->avel[j] += stepsize * cforce[i*6+3+j]; | ||
770 | } | ||
771 | |||
772 | // if joint feedback is requested, compute the constraint force. | ||
773 | // BUT: cforce is inv(M)*J'*lambda, whereas we want just J'*lambda, | ||
774 | // so we must compute M*cforce. | ||
775 | // @@@ if any joint has a feedback request we compute the entire | ||
776 | // adjusted cforce, which is not the most efficient way to do it. | ||
777 | /*for (j=0; j<nj; j++) { | ||
778 | if (joint[j]->feedback) { | ||
779 | // compute adjusted cforce | ||
780 | for (i=0; i<nb; i++) { | ||
781 | dReal k = body[i]->mass.mass; | ||
782 | cforce [i*6+0] *= k; | ||
783 | cforce [i*6+1] *= k; | ||
784 | cforce [i*6+2] *= k; | ||
785 | dVector3 tmp; | ||
786 | dMULTIPLY0_331 (tmp, I + 12*i, cforce + i*6 + 3); | ||
787 | cforce [i*6+3] = tmp[0]; | ||
788 | cforce [i*6+4] = tmp[1]; | ||
789 | cforce [i*6+5] = tmp[2]; | ||
790 | } | ||
791 | // compute feedback for this and all remaining joints | ||
792 | for (; j<nj; j++) { | ||
793 | dJointFeedback *fb = joint[j]->feedback; | ||
794 | if (fb) { | ||
795 | int b1 = joint[j]->node[0].body->tag; | ||
796 | memcpy (fb->f1,cforce+b1*6,3*sizeof(dReal)); | ||
797 | memcpy (fb->t1,cforce+b1*6+3,3*sizeof(dReal)); | ||
798 | if (joint[j]->node[1].body) { | ||
799 | int b2 = joint[j]->node[1].body->tag; | ||
800 | memcpy (fb->f2,cforce+b2*6,3*sizeof(dReal)); | ||
801 | memcpy (fb->t2,cforce+b2*6+3,3*sizeof(dReal)); | ||
802 | } | ||
803 | } | ||
804 | } | ||
805 | } | ||
806 | }*/ | ||
807 | |||
808 | if (mfb > 0) { | ||
809 | // straightforward computation of joint constraint forces: | ||
810 | // multiply related lambdas with respective J' block for joints | ||
811 | // where feedback was requested | ||
812 | mfb = 0; | ||
813 | for (i=0; i<nj; i++) { | ||
814 | if (joint[i]->feedback) { | ||
815 | dJointFeedback *fb = joint[i]->feedback; | ||
816 | dReal data[6]; | ||
817 | Multiply1_12q1 (data, Jcopy+mfb*12, lambda+ofs[i], info[i].m); | ||
818 | fb->f1[0] = data[0]; | ||
819 | fb->f1[1] = data[1]; | ||
820 | fb->f1[2] = data[2]; | ||
821 | fb->t1[0] = data[3]; | ||
822 | fb->t1[1] = data[4]; | ||
823 | fb->t1[2] = data[5]; | ||
824 | if (joint[i]->node[1].body) | ||
825 | { | ||
826 | Multiply1_12q1 (data, Jcopy+mfb*12+6, lambda+ofs[i], info[i].m); | ||
827 | fb->f2[0] = data[0]; | ||
828 | fb->f2[1] = data[1]; | ||
829 | fb->f2[2] = data[2]; | ||
830 | fb->t2[0] = data[3]; | ||
831 | fb->t2[1] = data[4]; | ||
832 | fb->t2[2] = data[5]; | ||
833 | } | ||
834 | mfb += info[i].m; | ||
835 | } | ||
836 | } | ||
837 | } | ||
838 | } | ||
839 | |||
840 | // compute the velocity update: | ||
841 | // add stepsize * invM * fe to the body velocity | ||
842 | |||
843 | IFTIMING (dTimerNow ("compute velocity update");) | ||
844 | for (i=0; i<nb; i++) { | ||
845 | dReal body_invMass = body[i]->invMass; | ||
846 | for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * body_invMass * body[i]->facc[j]; | ||
847 | for (j=0; j<3; j++) body[i]->tacc[j] *= stepsize; | ||
848 | dMULTIPLYADD0_331 (body[i]->avel,invI + i*12,body[i]->tacc); | ||
849 | } | ||
850 | |||
851 | #if 0 | ||
852 | // check that the updated velocity obeys the constraint (this check needs unmodified J) | ||
853 | dRealAllocaArray (vel,nb*6); | ||
854 | for (i=0; i<nb; i++) { | ||
855 | for (j=0; j<3; j++) vel[i*6+j] = body[i]->lvel[j]; | ||
856 | for (j=0; j<3; j++) vel[i*6+3+j] = body[i]->avel[j]; | ||
857 | } | ||
858 | dRealAllocaArray (tmp,m); | ||
859 | multiply_J (m,J,jb,vel,tmp); | ||
860 | dReal error = 0; | ||
861 | for (i=0; i<m; i++) error += dFabs(tmp[i]); | ||
862 | printf ("velocity error = %10.6e\n",error); | ||
863 | #endif | ||
864 | |||
865 | // update the position and orientation from the new linear/angular velocity | ||
866 | // (over the given timestep) | ||
867 | IFTIMING (dTimerNow ("update position");) | ||
868 | for (i=0; i<nb; i++) dxStepBody (body[i],stepsize); | ||
869 | |||
870 | IFTIMING (dTimerNow ("tidy up");) | ||
871 | |||
872 | // zero all force accumulators | ||
873 | for (i=0; i<nb; i++) { | ||
874 | dSetZero (body[i]->facc,3); | ||
875 | dSetZero (body[i]->tacc,3); | ||
876 | } | ||
877 | |||
878 | IFTIMING (dTimerEnd();) | ||
879 | IFTIMING (if (m > 0) dTimerReport (stdout,1);) | ||
880 | } | ||