aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/ode-0.9/ode/src/quickstep.cpp
diff options
context:
space:
mode:
authordan miller2007-10-19 05:24:38 +0000
committerdan miller2007-10-19 05:24:38 +0000
commitf205de7847da7ae1c10212d82e7042d0100b4ce0 (patch)
tree9acc9608a6880502aaeda43af52c33e278e95b9c /libraries/ode-0.9/ode/src/quickstep.cpp
parenttrying to fix my screwup part deux (diff)
downloadopensim-SC-f205de7847da7ae1c10212d82e7042d0100b4ce0.zip
opensim-SC-f205de7847da7ae1c10212d82e7042d0100b4ce0.tar.gz
opensim-SC-f205de7847da7ae1c10212d82e7042d0100b4ce0.tar.bz2
opensim-SC-f205de7847da7ae1c10212d82e7042d0100b4ce0.tar.xz
from the start... checking in ode-0.9
Diffstat (limited to '')
-rw-r--r--libraries/ode-0.9/ode/src/quickstep.cpp880
1 files changed, 880 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/quickstep.cpp b/libraries/ode-0.9/ode/src/quickstep.cpp
new file mode 100644
index 0000000..07aa9f7
--- /dev/null
+++ b/libraries/ode-0.9/ode/src/quickstep.cpp
@@ -0,0 +1,880 @@
1/*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001-2003 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
22
23#include "objects.h"
24#include "joint.h"
25#include <ode/config.h>
26#include <ode/odemath.h>
27#include <ode/rotation.h>
28#include <ode/timer.h>
29#include <ode/error.h>
30#include <ode/matrix.h>
31#include <ode/misc.h>
32#include "lcp.h"
33#include "util.h"
34
35#define ALLOCA dALLOCA16
36
37typedef const dReal *dRealPtr;
38typedef dReal *dRealMutablePtr;
39#define dRealArray(name,n) dReal name[n];
40#define dRealAllocaArray(name,n) dReal *name = (dReal*) ALLOCA ((n)*sizeof(dReal));
41
42//***************************************************************************
43// configuration
44
45// for the SOR and CG methods:
46// uncomment the following line to use warm starting. this definitely
47// help for motor-driven joints. unfortunately it appears to hurt
48// with high-friction contacts using the SOR method. use with care
49
50//#define WARM_STARTING 1
51
52
53// for the SOR method:
54// uncomment the following line to determine a new constraint-solving
55// order for each iteration. however, the qsort per iteration is expensive,
56// and the optimal order is somewhat problem dependent.
57// @@@ try the leaf->root ordering.
58
59//#define REORDER_CONSTRAINTS 1
60
61
62// for the SOR method:
63// uncomment the following line to randomly reorder constraint rows
64// during the solution. depending on the situation, this can help a lot
65// or hardly at all, but it doesn't seem to hurt.
66
67#define RANDOMLY_REORDER_CONSTRAINTS 1
68
69//****************************************************************************
70// special matrix multipliers
71
72// multiply block of B matrix (q x 6) with 12 dReal per row with C vektor (q)
73static void Multiply1_12q1 (dReal *A, dReal *B, dReal *C, int q)
74{
75 int k;
76 dReal sum;
77 dIASSERT (q>0 && A && B && C);
78 sum = 0;
79 for (k=0; k<q; k++) sum += B[k*12] * C[k];
80 A[0] = sum;
81 sum = 0;
82 for (k=0; k<q; k++) sum += B[1+k*12] * C[k];
83 A[1] = sum;
84 sum = 0;
85 for (k=0; k<q; k++) sum += B[2+k*12] * C[k];
86 A[2] = sum;
87 sum = 0;
88 for (k=0; k<q; k++) sum += B[3+k*12] * C[k];
89 A[3] = sum;
90 sum = 0;
91 for (k=0; k<q; k++) sum += B[4+k*12] * C[k];
92 A[4] = sum;
93 sum = 0;
94 for (k=0; k<q; k++) sum += B[5+k*12] * C[k];
95 A[5] = sum;
96}
97
98//***************************************************************************
99// testing stuff
100
101#ifdef TIMING
102#define IFTIMING(x) x
103#else
104#define IFTIMING(x) /* */
105#endif
106
107//***************************************************************************
108// various common computations involving the matrix J
109
110// compute iMJ = inv(M)*J'
111
112static void compute_invM_JT (int m, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb,
113 dxBody * const *body, dRealPtr invI)
114{
115 int i,j;
116 dRealMutablePtr iMJ_ptr = iMJ;
117 dRealMutablePtr J_ptr = J;
118 for (i=0; i<m; i++) {
119 int b1 = jb[i*2];
120 int b2 = jb[i*2+1];
121 dReal k = body[b1]->invMass;
122 for (j=0; j<3; j++) iMJ_ptr[j] = k*J_ptr[j];
123 dMULTIPLY0_331 (iMJ_ptr + 3, invI + 12*b1, J_ptr + 3);
124 if (b2 >= 0) {
125 k = body[b2]->invMass;
126 for (j=0; j<3; j++) iMJ_ptr[j+6] = k*J_ptr[j+6];
127 dMULTIPLY0_331 (iMJ_ptr + 9, invI + 12*b2, J_ptr + 9);
128 }
129 J_ptr += 12;
130 iMJ_ptr += 12;
131 }
132}
133
134
135// compute out = inv(M)*J'*in.
136
137static void multiply_invM_JT (int m, int nb, dRealMutablePtr iMJ, int *jb,
138 dRealMutablePtr in, dRealMutablePtr out)
139{
140 int i,j;
141 dSetZero (out,6*nb);
142 dRealPtr iMJ_ptr = iMJ;
143 for (i=0; i<m; i++) {
144 int b1 = jb[i*2];
145 int b2 = jb[i*2+1];
146 dRealMutablePtr out_ptr = out + b1*6;
147 for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i];
148 iMJ_ptr += 6;
149 if (b2 >= 0) {
150 out_ptr = out + b2*6;
151 for (j=0; j<6; j++) out_ptr[j] += iMJ_ptr[j] * in[i];
152 }
153 iMJ_ptr += 6;
154 }
155}
156
157
158// compute out = J*in.
159
160static void multiply_J (int m, dRealMutablePtr J, int *jb,
161 dRealMutablePtr in, dRealMutablePtr out)
162{
163 int i,j;
164 dRealPtr J_ptr = J;
165 for (i=0; i<m; i++) {
166 int b1 = jb[i*2];
167 int b2 = jb[i*2+1];
168 dReal sum = 0;
169 dRealMutablePtr in_ptr = in + b1*6;
170 for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
171 J_ptr += 6;
172 if (b2 >= 0) {
173 in_ptr = in + b2*6;
174 for (j=0; j<6; j++) sum += J_ptr[j] * in_ptr[j];
175 }
176 J_ptr += 6;
177 out[i] = sum;
178 }
179}
180
181
182// compute out = (J*inv(M)*J' + cfm)*in.
183// use z as an nb*6 temporary.
184
185static void multiply_J_invM_JT (int m, int nb, dRealMutablePtr J, dRealMutablePtr iMJ, int *jb,
186 dRealPtr cfm, dRealMutablePtr z, dRealMutablePtr in, dRealMutablePtr out)
187{
188 multiply_invM_JT (m,nb,iMJ,jb,in,z);
189 multiply_J (m,J,jb,z,out);
190
191 // add cfm
192 for (int i=0; i<m; i++) out[i] += cfm[i] * in[i];
193}
194
195//***************************************************************************
196// conjugate gradient method with jacobi preconditioner
197// THIS IS EXPERIMENTAL CODE that doesn't work too well, so it is ifdefed out.
198//
199// adding CFM seems to be critically important to this method.
200
201#if 0
202
203static inline dReal dot (int n, dRealPtr x, dRealPtr y)
204{
205 dReal sum=0;
206 for (int i=0; i<n; i++) sum += x[i]*y[i];
207 return sum;
208}
209
210
211// x = y + z*alpha
212
213static inline void add (int n, dRealMutablePtr x, dRealPtr y, dRealPtr z, dReal alpha)
214{
215 for (int i=0; i<n; i++) x[i] = y[i] + z[i]*alpha;
216}
217
218
219static void CG_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body,
220 dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b,
221 dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
222 dxQuickStepParameters *qs)
223{
224 int i,j;
225 const int num_iterations = qs->num_iterations;
226
227 // precompute iMJ = inv(M)*J'
228 dRealAllocaArray (iMJ,m*12);
229 compute_invM_JT (m,J,iMJ,jb,body,invI);
230
231 dReal last_rho = 0;
232 dRealAllocaArray (r,m);
233 dRealAllocaArray (z,m);
234 dRealAllocaArray (p,m);
235 dRealAllocaArray (q,m);
236
237 // precompute 1 / diagonals of A
238 dRealAllocaArray (Ad,m);
239 dRealPtr iMJ_ptr = iMJ;
240 dRealPtr J_ptr = J;
241 for (i=0; i<m; i++) {
242 dReal sum = 0;
243 for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
244 if (jb[i*2+1] >= 0) {
245 for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
246 }
247 iMJ_ptr += 12;
248 J_ptr += 12;
249 Ad[i] = REAL(1.0) / (sum + cfm[i]);
250 }
251
252#ifdef WARM_STARTING
253 // compute residual r = b - A*lambda
254 multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
255 for (i=0; i<m; i++) r[i] = b[i] - r[i];
256#else
257 dSetZero (lambda,m);
258 memcpy (r,b,m*sizeof(dReal)); // residual r = b - A*lambda
259#endif
260
261 for (int iteration=0; iteration < num_iterations; iteration++) {
262 for (i=0; i<m; i++) z[i] = r[i]*Ad[i]; // z = inv(M)*r
263 dReal rho = dot (m,r,z); // rho = r'*z
264
265 // @@@
266 // we must check for convergence, otherwise rho will go to 0 if
267 // we get an exact solution, which will introduce NaNs into the equations.
268 if (rho < 1e-10) {
269 printf ("CG returned at iteration %d\n",iteration);
270 break;
271 }
272
273 if (iteration==0) {
274 memcpy (p,z,m*sizeof(dReal)); // p = z
275 }
276 else {
277 add (m,p,z,p,rho/last_rho); // p = z + (rho/last_rho)*p
278 }
279
280 // compute q = (J*inv(M)*J')*p
281 multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,p,q);
282
283 dReal alpha = rho/dot (m,p,q); // alpha = rho/(p'*q)
284 add (m,lambda,lambda,p,alpha); // lambda = lambda + alpha*p
285 add (m,r,r,q,-alpha); // r = r - alpha*q
286 last_rho = rho;
287 }
288
289 // compute fc = inv(M)*J'*lambda
290 multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
291
292#if 0
293 // measure solution error
294 multiply_J_invM_JT (m,nb,J,iMJ,jb,cfm,fc,lambda,r);
295 dReal error = 0;
296 for (i=0; i<m; i++) error += dFabs(r[i] - b[i]);
297 printf ("lambda error = %10.6e\n",error);
298#endif
299}
300
301#endif
302
303//***************************************************************************
304// SOR-LCP method
305
306// nb is the number of bodies in the body array.
307// J is an m*12 matrix of constraint rows
308// jb is an array of first and second body numbers for each constraint row
309// invI is the global frame inverse inertia for each body (stacked 3x3 matrices)
310//
311// this returns lambda and fc (the constraint force).
312// note: fc is returned as inv(M)*J'*lambda, the constraint force is actually J'*lambda
313//
314// b, lo and hi are modified on exit
315
316
317struct IndexError {
318 dReal error; // error to sort on
319 int findex;
320 int index; // row index
321};
322
323
324#ifdef REORDER_CONSTRAINTS
325
326static int compare_index_error (const void *a, const void *b)
327{
328 const IndexError *i1 = (IndexError*) a;
329 const IndexError *i2 = (IndexError*) b;
330 if (i1->findex < 0 && i2->findex >= 0) return -1;
331 if (i1->findex >= 0 && i2->findex < 0) return 1;
332 if (i1->error < i2->error) return -1;
333 if (i1->error > i2->error) return 1;
334 return 0;
335}
336
337#endif
338
339
340static void SOR_LCP (int m, int nb, dRealMutablePtr J, int *jb, dxBody * const *body,
341 dRealPtr invI, dRealMutablePtr lambda, dRealMutablePtr fc, dRealMutablePtr b,
342 dRealMutablePtr lo, dRealMutablePtr hi, dRealPtr cfm, int *findex,
343 dxQuickStepParameters *qs)
344{
345 const int num_iterations = qs->num_iterations;
346 const dReal sor_w = qs->w; // SOR over-relaxation parameter
347
348 int i,j;
349
350#ifdef WARM_STARTING
351 // for warm starting, this seems to be necessary to prevent
352 // jerkiness in motor-driven joints. i have no idea why this works.
353 for (i=0; i<m; i++) lambda[i] *= 0.9;
354#else
355 dSetZero (lambda,m);
356#endif
357
358#ifdef REORDER_CONSTRAINTS
359 // the lambda computed at the previous iteration.
360 // this is used to measure error for when we are reordering the indexes.
361 dRealAllocaArray (last_lambda,m);
362#endif
363
364 // a copy of the 'hi' vector in case findex[] is being used
365 dRealAllocaArray (hicopy,m);
366 memcpy (hicopy,hi,m*sizeof(dReal));
367
368 // precompute iMJ = inv(M)*J'
369 dRealAllocaArray (iMJ,m*12);
370 compute_invM_JT (m,J,iMJ,jb,body,invI);
371
372 // compute fc=(inv(M)*J')*lambda. we will incrementally maintain fc
373 // as we change lambda.
374#ifdef WARM_STARTING
375 multiply_invM_JT (m,nb,iMJ,jb,lambda,fc);
376#else
377 dSetZero (fc,nb*6);
378#endif
379
380 // precompute 1 / diagonals of A
381 dRealAllocaArray (Ad,m);
382 dRealPtr iMJ_ptr = iMJ;
383 dRealMutablePtr J_ptr = J;
384 for (i=0; i<m; i++) {
385 dReal sum = 0;
386 for (j=0; j<6; j++) sum += iMJ_ptr[j] * J_ptr[j];
387 if (jb[i*2+1] >= 0) {
388 for (j=6; j<12; j++) sum += iMJ_ptr[j] * J_ptr[j];
389 }
390 iMJ_ptr += 12;
391 J_ptr += 12;
392 Ad[i] = sor_w / (sum + cfm[i]);
393 }
394
395 // scale J and b by Ad
396 J_ptr = J;
397 for (i=0; i<m; i++) {
398 for (j=0; j<12; j++) {
399 J_ptr[0] *= Ad[i];
400 J_ptr++;
401 }
402 b[i] *= Ad[i];
403
404 // scale Ad by CFM. N.B. this should be done last since it is used above
405 Ad[i] *= cfm[i];
406 }
407
408 // order to solve constraint rows in
409 IndexError *order = (IndexError*) ALLOCA (m*sizeof(IndexError));
410
411#ifndef REORDER_CONSTRAINTS
412 // make sure constraints with findex < 0 come first.
413 j=0;
414 int k=1;
415
416 // Fill the array from both ends
417 for (i=0; i<m; i++)
418 if (findex[i] < 0)
419 order[j++].index = i; // Place them at the front
420 else
421 order[m-k++].index = i; // Place them at the end
422
423 dIASSERT ((j+k-1)==m); // -1 since k was started at 1 and not 0
424#endif
425
426 for (int iteration=0; iteration < num_iterations; iteration++) {
427
428#ifdef REORDER_CONSTRAINTS
429 // constraints with findex < 0 always come first.
430 if (iteration < 2) {
431 // for the first two iterations, solve the constraints in
432 // the given order
433 for (i=0; i<m; i++) {
434 order[i].error = i;
435 order[i].findex = findex[i];
436 order[i].index = i;
437 }
438 }
439 else {
440 // sort the constraints so that the ones converging slowest
441 // get solved last. use the absolute (not relative) error.
442 for (i=0; i<m; i++) {
443 dReal v1 = dFabs (lambda[i]);
444 dReal v2 = dFabs (last_lambda[i]);
445 dReal max = (v1 > v2) ? v1 : v2;
446 if (max > 0) {
447 //@@@ relative error: order[i].error = dFabs(lambda[i]-last_lambda[i])/max;
448 order[i].error = dFabs(lambda[i]-last_lambda[i]);
449 }
450 else {
451 order[i].error = dInfinity;
452 }
453 order[i].findex = findex[i];
454 order[i].index = i;
455 }
456 }
457 qsort (order,m,sizeof(IndexError),&compare_index_error);
458
459 //@@@ potential optimization: swap lambda and last_lambda pointers rather
460 // than copying the data. we must make sure lambda is properly
461 // returned to the caller
462 memcpy (last_lambda,lambda,m*sizeof(dReal));
463#endif
464#ifdef RANDOMLY_REORDER_CONSTRAINTS
465 if ((iteration & 7) == 0) {
466 for (i=1; i<m; ++i) {
467 IndexError tmp = order[i];
468 int swapi = dRandInt(i+1);
469 order[i] = order[swapi];
470 order[swapi] = tmp;
471 }
472 }
473#endif
474
475 for (int i=0; i<m; i++) {
476 // @@@ potential optimization: we could pre-sort J and iMJ, thereby
477 // linearizing access to those arrays. hmmm, this does not seem
478 // like a win, but we should think carefully about our memory
479 // access pattern.
480
481 int index = order[i].index;
482 J_ptr = J + index*12;
483 iMJ_ptr = iMJ + index*12;
484
485 // set the limits for this constraint. note that 'hicopy' is used.
486 // this is the place where the QuickStep method differs from the
487 // direct LCP solving method, since that method only performs this
488 // limit adjustment once per time step, whereas this method performs
489 // once per iteration per constraint row.
490 // the constraints are ordered so that all lambda[] values needed have
491 // already been computed.
492 if (findex[index] >= 0) {
493 hi[index] = dFabs (hicopy[index] * lambda[findex[index]]);
494 lo[index] = -hi[index];
495 }
496
497 int b1 = jb[index*2];
498 int b2 = jb[index*2+1];
499 dReal delta = b[index] - lambda[index]*Ad[index];
500 dRealMutablePtr fc_ptr = fc + 6*b1;
501
502 // @@@ potential optimization: SIMD-ize this and the b2 >= 0 case
503 delta -=fc_ptr[0] * J_ptr[0] + fc_ptr[1] * J_ptr[1] +
504 fc_ptr[2] * J_ptr[2] + fc_ptr[3] * J_ptr[3] +
505 fc_ptr[4] * J_ptr[4] + fc_ptr[5] * J_ptr[5];
506 // @@@ potential optimization: handle 1-body constraints in a separate
507 // loop to avoid the cost of test & jump?
508 if (b2 >= 0) {
509 fc_ptr = fc + 6*b2;
510 delta -=fc_ptr[0] * J_ptr[6] + fc_ptr[1] * J_ptr[7] +
511 fc_ptr[2] * J_ptr[8] + fc_ptr[3] * J_ptr[9] +
512 fc_ptr[4] * J_ptr[10] + fc_ptr[5] * J_ptr[11];
513 }
514
515 // compute lambda and clamp it to [lo,hi].
516 // @@@ potential optimization: does SSE have clamping instructions
517 // to save test+jump penalties here?
518 dReal new_lambda = lambda[index] + delta;
519 if (new_lambda < lo[index]) {
520 delta = lo[index]-lambda[index];
521 lambda[index] = lo[index];
522 }
523 else if (new_lambda > hi[index]) {
524 delta = hi[index]-lambda[index];
525 lambda[index] = hi[index];
526 }
527 else {
528 lambda[index] = new_lambda;
529 }
530
531 //@@@ a trick that may or may not help
532 //dReal ramp = (1-((dReal)(iteration+1)/(dReal)num_iterations));
533 //delta *= ramp;
534
535 // update fc.
536 // @@@ potential optimization: SIMD for this and the b2 >= 0 case
537 fc_ptr = fc + 6*b1;
538 fc_ptr[0] += delta * iMJ_ptr[0];
539 fc_ptr[1] += delta * iMJ_ptr[1];
540 fc_ptr[2] += delta * iMJ_ptr[2];
541 fc_ptr[3] += delta * iMJ_ptr[3];
542 fc_ptr[4] += delta * iMJ_ptr[4];
543 fc_ptr[5] += delta * iMJ_ptr[5];
544 // @@@ potential optimization: handle 1-body constraints in a separate
545 // loop to avoid the cost of test & jump?
546 if (b2 >= 0) {
547 fc_ptr = fc + 6*b2;
548 fc_ptr[0] += delta * iMJ_ptr[6];
549 fc_ptr[1] += delta * iMJ_ptr[7];
550 fc_ptr[2] += delta * iMJ_ptr[8];
551 fc_ptr[3] += delta * iMJ_ptr[9];
552 fc_ptr[4] += delta * iMJ_ptr[10];
553 fc_ptr[5] += delta * iMJ_ptr[11];
554 }
555 }
556 }
557}
558
559
560void dxQuickStepper (dxWorld *world, dxBody * const *body, int nb,
561 dxJoint * const *_joint, int nj, dReal stepsize)
562{
563 int i,j;
564 IFTIMING(dTimerStart("preprocessing");)
565
566 dReal stepsize1 = dRecip(stepsize);
567
568 // number all bodies in the body list - set their tag values
569 for (i=0; i<nb; i++) body[i]->tag = i;
570
571 // make a local copy of the joint array, because we might want to modify it.
572 // (the "dxJoint *const*" declaration says we're allowed to modify the joints
573 // but not the joint array, because the caller might need it unchanged).
574 //@@@ do we really need to do this? we'll be sorting constraint rows individually, not joints
575 dxJoint **joint = (dxJoint**) ALLOCA (nj * sizeof(dxJoint*));
576 memcpy (joint,_joint,nj * sizeof(dxJoint*));
577
578 // for all bodies, compute the inertia tensor and its inverse in the global
579 // frame, and compute the rotational force and add it to the torque
580 // accumulator. I and invI are a vertical stack of 3x4 matrices, one per body.
581 //dRealAllocaArray (I,3*4*nb); // need to remember all I's for feedback purposes only
582 dRealAllocaArray (invI,3*4*nb);
583 for (i=0; i<nb; i++) {
584 dMatrix3 tmp;
585
586 // compute inverse inertia tensor in global frame
587 dMULTIPLY2_333 (tmp,body[i]->invI,body[i]->posr.R);
588 dMULTIPLY0_333 (invI+i*12,body[i]->posr.R,tmp);
589#ifdef dGYROSCOPIC
590 dMatrix3 I;
591 // compute inertia tensor in global frame
592 dMULTIPLY2_333 (tmp,body[i]->mass.I,body[i]->posr.R);
593 //dMULTIPLY0_333 (I+i*12,body[i]->posr.R,tmp);
594 dMULTIPLY0_333 (I,body[i]->posr.R,tmp);
595 // compute rotational force
596 //dMULTIPLY0_331 (tmp,I+i*12,body[i]->avel);
597 dMULTIPLY0_331 (tmp,I,body[i]->avel);
598 dCROSS (body[i]->tacc,-=,body[i]->avel,tmp);
599#endif
600 }
601
602 // add the gravity force to all bodies
603 for (i=0; i<nb; i++) {
604 if ((body[i]->flags & dxBodyNoGravity)==0) {
605 body[i]->facc[0] += body[i]->mass.mass * world->gravity[0];
606 body[i]->facc[1] += body[i]->mass.mass * world->gravity[1];
607 body[i]->facc[2] += body[i]->mass.mass * world->gravity[2];
608 }
609 }
610
611 // get joint information (m = total constraint dimension, nub = number of unbounded variables).
612 // joints with m=0 are inactive and are removed from the joints array
613 // entirely, so that the code that follows does not consider them.
614 //@@@ do we really need to save all the info1's
615 dxJoint::Info1 *info = (dxJoint::Info1*) ALLOCA (nj*sizeof(dxJoint::Info1));
616 for (i=0, j=0; j<nj; j++) { // i=dest, j=src
617 joint[j]->vtable->getInfo1 (joint[j],info+i);
618 dIASSERT (info[i].m >= 0 && info[i].m <= 6 && info[i].nub >= 0 && info[i].nub <= info[i].m);
619 if (info[i].m > 0) {
620 joint[i] = joint[j];
621 i++;
622 }
623 }
624 nj = i;
625
626 // create the row offset array
627 int m = 0;
628 int *ofs = (int*) ALLOCA (nj*sizeof(int));
629 for (i=0; i<nj; i++) {
630 ofs[i] = m;
631 m += info[i].m;
632 }
633
634 // if there are constraints, compute the constraint force
635 dRealAllocaArray (J,m*12);
636 int *jb = (int*) ALLOCA (m*2*sizeof(int));
637 if (m > 0) {
638 // create a constraint equation right hand side vector `c', a constraint
639 // force mixing vector `cfm', and LCP low and high bound vectors, and an
640 // 'findex' vector.
641 dRealAllocaArray (c,m);
642 dRealAllocaArray (cfm,m);
643 dRealAllocaArray (lo,m);
644 dRealAllocaArray (hi,m);
645 int *findex = (int*) ALLOCA (m*sizeof(int));
646 dSetZero (c,m);
647 dSetValue (cfm,m,world->global_cfm);
648 dSetValue (lo,m,-dInfinity);
649 dSetValue (hi,m, dInfinity);
650 for (i=0; i<m; i++) findex[i] = -1;
651
652 // get jacobian data from constraints. an m*12 matrix will be created
653 // to store the two jacobian blocks from each constraint. it has this
654 // format:
655 //
656 // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 \ .
657 // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }-- jacobian for joint 0, body 1 and body 2 (3 rows)
658 // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 /
659 // l1 l1 l1 a1 a1 a1 l2 l2 l2 a2 a2 a2 }--- jacobian for joint 1, body 1 and body 2 (3 rows)
660 // etc...
661 //
662 // (lll) = linear jacobian data
663 // (aaa) = angular jacobian data
664 //
665 IFTIMING (dTimerNow ("create J");)
666 dSetZero (J,m*12);
667 dxJoint::Info2 Jinfo;
668 Jinfo.rowskip = 12;
669 Jinfo.fps = stepsize1;
670 Jinfo.erp = world->global_erp;
671 int mfb = 0; // number of rows of Jacobian we will have to save for joint feedback
672 for (i=0; i<nj; i++) {
673 Jinfo.J1l = J + ofs[i]*12;
674 Jinfo.J1a = Jinfo.J1l + 3;
675 Jinfo.J2l = Jinfo.J1l + 6;
676 Jinfo.J2a = Jinfo.J1l + 9;
677 Jinfo.c = c + ofs[i];
678 Jinfo.cfm = cfm + ofs[i];
679 Jinfo.lo = lo + ofs[i];
680 Jinfo.hi = hi + ofs[i];
681 Jinfo.findex = findex + ofs[i];
682 joint[i]->vtable->getInfo2 (joint[i],&Jinfo);
683 // adjust returned findex values for global index numbering
684 for (j=0; j<info[i].m; j++) {
685 if (findex[ofs[i] + j] >= 0) findex[ofs[i] + j] += ofs[i];
686 }
687 if (joint[i]->feedback)
688 mfb += info[i].m;
689 }
690
691 // we need a copy of Jacobian for joint feedbacks
692 // because it gets destroyed by SOR solver
693 // instead of saving all Jacobian, we can save just rows
694 // for joints, that requested feedback (which is normaly much less)
695 dRealAllocaArray (Jcopy,mfb*12);
696 if (mfb > 0) {
697 mfb = 0;
698 for (i=0; i<nj; i++)
699 if (joint[i]->feedback) {
700 memcpy(Jcopy+mfb*12, J+ofs[i]*12, info[i].m*12*sizeof(dReal));
701 mfb += info[i].m;
702 }
703 }
704
705
706 // create an array of body numbers for each joint row
707 int *jb_ptr = jb;
708 for (i=0; i<nj; i++) {
709 int b1 = (joint[i]->node[0].body) ? (joint[i]->node[0].body->tag) : -1;
710 int b2 = (joint[i]->node[1].body) ? (joint[i]->node[1].body->tag) : -1;
711 for (j=0; j<info[i].m; j++) {
712 jb_ptr[0] = b1;
713 jb_ptr[1] = b2;
714 jb_ptr += 2;
715 }
716 }
717 dIASSERT (jb_ptr == jb+2*m);
718
719 // compute the right hand side `rhs'
720 IFTIMING (dTimerNow ("compute rhs");)
721 dRealAllocaArray (tmp1,nb*6);
722 // put v/h + invM*fe into tmp1
723 for (i=0; i<nb; i++) {
724 dReal body_invMass = body[i]->invMass;
725 for (j=0; j<3; j++) tmp1[i*6+j] = body[i]->facc[j] * body_invMass + body[i]->lvel[j] * stepsize1;
726 dMULTIPLY0_331 (tmp1 + i*6 + 3,invI + i*12,body[i]->tacc);
727 for (j=0; j<3; j++) tmp1[i*6+3+j] += body[i]->avel[j] * stepsize1;
728 }
729
730 // put J*tmp1 into rhs
731 dRealAllocaArray (rhs,m);
732 multiply_J (m,J,jb,tmp1,rhs);
733
734 // complete rhs
735 for (i=0; i<m; i++) rhs[i] = c[i]*stepsize1 - rhs[i];
736
737 // scale CFM
738 for (i=0; i<m; i++) cfm[i] *= stepsize1;
739
740 // load lambda from the value saved on the previous iteration
741 dRealAllocaArray (lambda,m);
742#ifdef WARM_STARTING
743 dSetZero (lambda,m); //@@@ shouldn't be necessary
744 for (i=0; i<nj; i++) {
745 memcpy (lambda+ofs[i],joint[i]->lambda,info[i].m * sizeof(dReal));
746 }
747#endif
748
749 // solve the LCP problem and get lambda and invM*constraint_force
750 IFTIMING (dTimerNow ("solving LCP problem");)
751 dRealAllocaArray (cforce,nb*6);
752 SOR_LCP (m,nb,J,jb,body,invI,lambda,cforce,rhs,lo,hi,cfm,findex,&world->qs);
753
754#ifdef WARM_STARTING
755 // save lambda for the next iteration
756 //@@@ note that this doesn't work for contact joints yet, as they are
757 // recreated every iteration
758 for (i=0; i<nj; i++) {
759 memcpy (joint[i]->lambda,lambda+ofs[i],info[i].m * sizeof(dReal));
760 }
761#endif
762
763 // note that the SOR method overwrites rhs and J at this point, so
764 // they should not be used again.
765
766 // add stepsize * cforce to the body velocity
767 for (i=0; i<nb; i++) {
768 for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * cforce[i*6+j];
769 for (j=0; j<3; j++) body[i]->avel[j] += stepsize * cforce[i*6+3+j];
770 }
771
772 // if joint feedback is requested, compute the constraint force.
773 // BUT: cforce is inv(M)*J'*lambda, whereas we want just J'*lambda,
774 // so we must compute M*cforce.
775 // @@@ if any joint has a feedback request we compute the entire
776 // adjusted cforce, which is not the most efficient way to do it.
777 /*for (j=0; j<nj; j++) {
778 if (joint[j]->feedback) {
779 // compute adjusted cforce
780 for (i=0; i<nb; i++) {
781 dReal k = body[i]->mass.mass;
782 cforce [i*6+0] *= k;
783 cforce [i*6+1] *= k;
784 cforce [i*6+2] *= k;
785 dVector3 tmp;
786 dMULTIPLY0_331 (tmp, I + 12*i, cforce + i*6 + 3);
787 cforce [i*6+3] = tmp[0];
788 cforce [i*6+4] = tmp[1];
789 cforce [i*6+5] = tmp[2];
790 }
791 // compute feedback for this and all remaining joints
792 for (; j<nj; j++) {
793 dJointFeedback *fb = joint[j]->feedback;
794 if (fb) {
795 int b1 = joint[j]->node[0].body->tag;
796 memcpy (fb->f1,cforce+b1*6,3*sizeof(dReal));
797 memcpy (fb->t1,cforce+b1*6+3,3*sizeof(dReal));
798 if (joint[j]->node[1].body) {
799 int b2 = joint[j]->node[1].body->tag;
800 memcpy (fb->f2,cforce+b2*6,3*sizeof(dReal));
801 memcpy (fb->t2,cforce+b2*6+3,3*sizeof(dReal));
802 }
803 }
804 }
805 }
806 }*/
807
808 if (mfb > 0) {
809 // straightforward computation of joint constraint forces:
810 // multiply related lambdas with respective J' block for joints
811 // where feedback was requested
812 mfb = 0;
813 for (i=0; i<nj; i++) {
814 if (joint[i]->feedback) {
815 dJointFeedback *fb = joint[i]->feedback;
816 dReal data[6];
817 Multiply1_12q1 (data, Jcopy+mfb*12, lambda+ofs[i], info[i].m);
818 fb->f1[0] = data[0];
819 fb->f1[1] = data[1];
820 fb->f1[2] = data[2];
821 fb->t1[0] = data[3];
822 fb->t1[1] = data[4];
823 fb->t1[2] = data[5];
824 if (joint[i]->node[1].body)
825 {
826 Multiply1_12q1 (data, Jcopy+mfb*12+6, lambda+ofs[i], info[i].m);
827 fb->f2[0] = data[0];
828 fb->f2[1] = data[1];
829 fb->f2[2] = data[2];
830 fb->t2[0] = data[3];
831 fb->t2[1] = data[4];
832 fb->t2[2] = data[5];
833 }
834 mfb += info[i].m;
835 }
836 }
837 }
838 }
839
840 // compute the velocity update:
841 // add stepsize * invM * fe to the body velocity
842
843 IFTIMING (dTimerNow ("compute velocity update");)
844 for (i=0; i<nb; i++) {
845 dReal body_invMass = body[i]->invMass;
846 for (j=0; j<3; j++) body[i]->lvel[j] += stepsize * body_invMass * body[i]->facc[j];
847 for (j=0; j<3; j++) body[i]->tacc[j] *= stepsize;
848 dMULTIPLYADD0_331 (body[i]->avel,invI + i*12,body[i]->tacc);
849 }
850
851#if 0
852 // check that the updated velocity obeys the constraint (this check needs unmodified J)
853 dRealAllocaArray (vel,nb*6);
854 for (i=0; i<nb; i++) {
855 for (j=0; j<3; j++) vel[i*6+j] = body[i]->lvel[j];
856 for (j=0; j<3; j++) vel[i*6+3+j] = body[i]->avel[j];
857 }
858 dRealAllocaArray (tmp,m);
859 multiply_J (m,J,jb,vel,tmp);
860 dReal error = 0;
861 for (i=0; i<m; i++) error += dFabs(tmp[i]);
862 printf ("velocity error = %10.6e\n",error);
863#endif
864
865 // update the position and orientation from the new linear/angular velocity
866 // (over the given timestep)
867 IFTIMING (dTimerNow ("update position");)
868 for (i=0; i<nb; i++) dxStepBody (body[i],stepsize);
869
870 IFTIMING (dTimerNow ("tidy up");)
871
872 // zero all force accumulators
873 for (i=0; i<nb; i++) {
874 dSetZero (body[i]->facc,3);
875 dSetZero (body[i]->tacc,3);
876 }
877
878 IFTIMING (dTimerEnd();)
879 IFTIMING (if (m > 0) dTimerReport (stdout,1);)
880}