aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/ode-0.9/ode/src/lcp.cpp
diff options
context:
space:
mode:
authordan miller2007-10-19 05:15:33 +0000
committerdan miller2007-10-19 05:15:33 +0000
commit79eca25c945a535a7a0325999034bae17da92412 (patch)
tree40ff433d94859d629aac933d5ec73b382f62ba1a /libraries/ode-0.9/ode/src/lcp.cpp
parentadding ode source to /libraries (diff)
downloadopensim-SC-79eca25c945a535a7a0325999034bae17da92412.zip
opensim-SC-79eca25c945a535a7a0325999034bae17da92412.tar.gz
opensim-SC-79eca25c945a535a7a0325999034bae17da92412.tar.bz2
opensim-SC-79eca25c945a535a7a0325999034bae17da92412.tar.xz
resubmitting ode
Diffstat (limited to '')
-rw-r--r--libraries/ode-0.9/ode/src/lcp.cpp2007
1 files changed, 2007 insertions, 0 deletions
diff --git a/libraries/ode-0.9/ode/src/lcp.cpp b/libraries/ode-0.9/ode/src/lcp.cpp
new file mode 100644
index 0000000..d03d3e8
--- /dev/null
+++ b/libraries/ode-0.9/ode/src/lcp.cpp
@@ -0,0 +1,2007 @@
1/*************************************************************************
2 * *
3 * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. *
4 * All rights reserved. Email: russ@q12.org Web: www.q12.org *
5 * *
6 * This library is free software; you can redistribute it and/or *
7 * modify it under the terms of EITHER: *
8 * (1) The GNU Lesser General Public License as published by the Free *
9 * Software Foundation; either version 2.1 of the License, or (at *
10 * your option) any later version. The text of the GNU Lesser *
11 * General Public License is included with this library in the *
12 * file LICENSE.TXT. *
13 * (2) The BSD-style license that is included with this library in *
14 * the file LICENSE-BSD.TXT. *
15 * *
16 * This library is distributed in the hope that it will be useful, *
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files *
19 * LICENSE.TXT and LICENSE-BSD.TXT for more details. *
20 * *
21 *************************************************************************/
22
23/*
24
25
26THE ALGORITHM
27-------------
28
29solve A*x = b+w, with x and w subject to certain LCP conditions.
30each x(i),w(i) must lie on one of the three line segments in the following
31diagram. each line segment corresponds to one index set :
32
33 w(i)
34 /|\ | :
35 | | :
36 | |i in N :
37 w>0 | |state[i]=0 :
38 | | :
39 | | : i in C
40 w=0 + +-----------------------+
41 | : |
42 | : |
43 w<0 | : |i in N
44 | : |state[i]=1
45 | : |
46 | : |
47 +-------|-----------|-----------|----------> x(i)
48 lo 0 hi
49
50the Dantzig algorithm proceeds as follows:
51 for i=1:n
52 * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or
53 negative towards the line. as this is done, the other (x(j),w(j))
54 for j<i are constrained to be on the line. if any (x,w) reaches the
55 end of a line segment then it is switched between index sets.
56 * i is added to the appropriate index set depending on what line segment
57 it hits.
58
59we restrict lo(i) <= 0 and hi(i) >= 0. this makes the algorithm a bit
60simpler, because the starting point for x(i),w(i) is always on the dotted
61line x=0 and x will only ever increase in one direction, so it can only hit
62two out of the three line segments.
63
64
65NOTES
66-----
67
68this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m".
69the implementation is split into an LCP problem object (dLCP) and an LCP
70driver function. most optimization occurs in the dLCP object.
71
72a naive implementation of the algorithm requires either a lot of data motion
73or a lot of permutation-array lookup, because we are constantly re-ordering
74rows and columns. to avoid this and make a more optimized algorithm, a
75non-trivial data structure is used to represent the matrix A (this is
76implemented in the fast version of the dLCP object).
77
78during execution of this algorithm, some indexes in A are clamped (set C),
79some are non-clamped (set N), and some are "don't care" (where x=0).
80A,x,b,w (and other problem vectors) are permuted such that the clamped
81indexes are first, the unclamped indexes are next, and the don't-care
82indexes are last. this permutation is recorded in the array `p'.
83initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped,
84the corresponding elements of p are swapped.
85
86because the C and N elements are grouped together in the rows of A, we can do
87lots of work with a fast dot product function. if A,x,etc were not permuted
88and we only had a permutation array, then those dot products would be much
89slower as we would have a permutation array lookup in some inner loops.
90
91A is accessed through an array of row pointers, so that element (i,j) of the
92permuted matrix is A[i][j]. this makes row swapping fast. for column swapping
93we still have to actually move the data.
94
95during execution of this algorithm we maintain an L*D*L' factorization of
96the clamped submatrix of A (call it `AC') which is the top left nC*nC
97submatrix of A. there are two ways we could arrange the rows/columns in AC.
98
99(1) AC is always permuted such that L*D*L' = AC. this causes a problem
100 when a row/column is removed from C, because then all the rows/columns of A
101 between the deleted index and the end of C need to be rotated downward.
102 this results in a lot of data motion and slows things down.
103(2) L*D*L' is actually a factorization of a *permutation* of AC (which is
104 itself a permutation of the underlying A). this is what we do - the
105 permutation is recorded in the vector C. call this permutation A[C,C].
106 when a row/column is removed from C, all we have to do is swap two
107 rows/columns and manipulate C.
108
109*/
110
111#include <ode/common.h>
112#include "lcp.h"
113#include <ode/matrix.h>
114#include <ode/misc.h>
115#include "mat.h" // for testing
116#include <ode/timer.h> // for testing
117
118//***************************************************************************
119// code generation parameters
120
121// LCP debugging (mosty for fast dLCP) - this slows things down a lot
122//#define DEBUG_LCP
123
124//#define dLCP_SLOW // use slow dLCP object
125#define dLCP_FAST // use fast dLCP object
126
127// option 1 : matrix row pointers (less data copying)
128#define ROWPTRS
129#define ATYPE dReal **
130#define AROW(i) (A[i])
131
132// option 2 : no matrix row pointers (slightly faster inner loops)
133//#define NOROWPTRS
134//#define ATYPE dReal *
135//#define AROW(i) (A+(i)*nskip)
136
137// use protected, non-stack memory allocation system
138
139#ifdef dUSE_MALLOC_FOR_ALLOCA
140extern unsigned int dMemoryFlag;
141
142#define ALLOCA(t,v,s) t* v = (t*) malloc(s)
143#define UNALLOCA(t) free(t)
144
145#else
146
147#define ALLOCA(t,v,s) t* v =(t*)dALLOCA16(s)
148#define UNALLOCA(t) /* nothing */
149
150#endif
151
152#define NUB_OPTIMIZATIONS
153
154//***************************************************************************
155
156// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of
157// A is nskip. this only references and swaps the lower triangle.
158// if `do_fast_row_swaps' is nonzero and row pointers are being used, then
159// rows will be swapped by exchanging row pointers. otherwise the data will
160// be copied.
161
162static void swapRowsAndCols (ATYPE A, int n, int i1, int i2, int nskip,
163 int do_fast_row_swaps)
164{
165 int i;
166 dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
167 nskip >= n && i1 < i2);
168
169# ifdef ROWPTRS
170 for (i=i1+1; i<i2; i++) A[i1][i] = A[i][i1];
171 for (i=i1+1; i<i2; i++) A[i][i1] = A[i2][i];
172 A[i1][i2] = A[i1][i1];
173 A[i1][i1] = A[i2][i1];
174 A[i2][i1] = A[i2][i2];
175 // swap rows, by swapping row pointers
176 if (do_fast_row_swaps) {
177 dReal *tmpp;
178 tmpp = A[i1];
179 A[i1] = A[i2];
180 A[i2] = tmpp;
181 }
182 else {
183 ALLOCA (dReal,tmprow,n * sizeof(dReal));
184
185#ifdef dUSE_MALLOC_FOR_ALLOCA
186 if (tmprow == NULL) {
187 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
188 return;
189 }
190#endif
191
192 memcpy (tmprow,A[i1],n * sizeof(dReal));
193 memcpy (A[i1],A[i2],n * sizeof(dReal));
194 memcpy (A[i2],tmprow,n * sizeof(dReal));
195 UNALLOCA(tmprow);
196 }
197 // swap columns the hard way
198 for (i=i2+1; i<n; i++) {
199 dReal tmp = A[i][i1];
200 A[i][i1] = A[i][i2];
201 A[i][i2] = tmp;
202 }
203# else
204 dReal tmp;
205 ALLOCA (dReal,tmprow,n * sizeof(dReal));
206
207#ifdef dUSE_MALLOC_FOR_ALLOCA
208 if (tmprow == NULL) {
209 return;
210 }
211#endif
212
213 if (i1 > 0) {
214 memcpy (tmprow,A+i1*nskip,i1*sizeof(dReal));
215 memcpy (A+i1*nskip,A+i2*nskip,i1*sizeof(dReal));
216 memcpy (A+i2*nskip,tmprow,i1*sizeof(dReal));
217 }
218 for (i=i1+1; i<i2; i++) {
219 tmp = A[i2*nskip+i];
220 A[i2*nskip+i] = A[i*nskip+i1];
221 A[i*nskip+i1] = tmp;
222 }
223 tmp = A[i1*nskip+i1];
224 A[i1*nskip+i1] = A[i2*nskip+i2];
225 A[i2*nskip+i2] = tmp;
226 for (i=i2+1; i<n; i++) {
227 tmp = A[i*nskip+i1];
228 A[i*nskip+i1] = A[i*nskip+i2];
229 A[i*nskip+i2] = tmp;
230 }
231 UNALLOCA(tmprow);
232# endif
233
234}
235
236
237// swap two indexes in the n*n LCP problem. i1 must be <= i2.
238
239static void swapProblem (ATYPE A, dReal *x, dReal *b, dReal *w, dReal *lo,
240 dReal *hi, int *p, int *state, int *findex,
241 int n, int i1, int i2, int nskip,
242 int do_fast_row_swaps)
243{
244 dReal tmp;
245 int tmpi;
246 dIASSERT (n>0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n &&
247 i1 <= i2);
248 if (i1==i2) return;
249 swapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps);
250#ifdef dUSE_MALLOC_FOR_ALLOCA
251 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY)
252 return;
253#endif
254 tmp = x[i1];
255 x[i1] = x[i2];
256 x[i2] = tmp;
257 tmp = b[i1];
258 b[i1] = b[i2];
259 b[i2] = tmp;
260 tmp = w[i1];
261 w[i1] = w[i2];
262 w[i2] = tmp;
263 tmp = lo[i1];
264 lo[i1] = lo[i2];
265 lo[i2] = tmp;
266 tmp = hi[i1];
267 hi[i1] = hi[i2];
268 hi[i2] = tmp;
269 tmpi = p[i1];
270 p[i1] = p[i2];
271 p[i2] = tmpi;
272 tmpi = state[i1];
273 state[i1] = state[i2];
274 state[i2] = tmpi;
275 if (findex) {
276 tmpi = findex[i1];
277 findex[i1] = findex[i2];
278 findex[i2] = tmpi;
279 }
280}
281
282
283// for debugging - check that L,d is the factorization of A[C,C].
284// A[C,C] has size nC*nC and leading dimension nskip.
285// L has size nC*nC and leading dimension nskip.
286// d has size nC.
287
288#ifdef DEBUG_LCP
289
290static void checkFactorization (ATYPE A, dReal *_L, dReal *_d,
291 int nC, int *C, int nskip)
292{
293 int i,j;
294 if (nC==0) return;
295
296 // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C]
297 dMatrix A1 (nC,nC);
298 for (i=0; i<nC; i++) {
299 for (j=0; j<=i; j++) A1(i,j) = A1(j,i) = AROW(i)[j];
300 }
301 dMatrix A2 = A1.select (nC,C,nC,C);
302
303 // printf ("A1=\n"); A1.print(); printf ("\n");
304 // printf ("A2=\n"); A2.print(); printf ("\n");
305
306 // compute A3 = L*D*L'
307 dMatrix L (nC,nC,_L,nskip,1);
308 dMatrix D (nC,nC);
309 for (i=0; i<nC; i++) D(i,i) = 1/_d[i];
310 L.clearUpperTriangle();
311 for (i=0; i<nC; i++) L(i,i) = 1;
312 dMatrix A3 = L * D * L.transpose();
313
314 // printf ("L=\n"); L.print(); printf ("\n");
315 // printf ("D=\n"); D.print(); printf ("\n");
316 // printf ("A3=\n"); A2.print(); printf ("\n");
317
318 // compare A2 and A3
319 dReal diff = A2.maxDifference (A3);
320 if (diff > 1e-8)
321 dDebug (0,"L*D*L' check, maximum difference = %.6e\n",diff);
322}
323
324#endif
325
326
327// for debugging
328
329#ifdef DEBUG_LCP
330
331static void checkPermutations (int i, int n, int nC, int nN, int *p, int *C)
332{
333 int j,k;
334 dIASSERT (nC>=0 && nN>=0 && (nC+nN)==i && i < n);
335 for (k=0; k<i; k++) dIASSERT (p[k] >= 0 && p[k] < i);
336 for (k=i; k<n; k++) dIASSERT (p[k] == k);
337 for (j=0; j<nC; j++) {
338 int C_is_bad = 1;
339 for (k=0; k<nC; k++) if (C[k]==j) C_is_bad = 0;
340 dIASSERT (C_is_bad==0);
341 }
342}
343
344#endif
345
346//***************************************************************************
347// dLCP manipulator object. this represents an n*n LCP problem.
348//
349// two index sets C and N are kept. each set holds a subset of
350// the variable indexes 0..n-1. an index can only be in one set.
351// initially both sets are empty.
352//
353// the index set C is special: solutions to A(C,C)\A(C,i) can be generated.
354
355#ifdef dLCP_SLOW
356
357// simple but slow implementation of dLCP, for testing the LCP drivers.
358
359#include "array.h"
360
361struct dLCP {
362 int n,nub,nskip;
363 dReal *Adata,*x,*b,*w,*lo,*hi; // LCP problem data
364 ATYPE A; // A rows
365 dArray<int> C,N; // index sets
366 int last_i_for_solve1; // last i value given to solve1
367
368 dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
369 dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
370 dReal *_Dell, dReal *_ell, dReal *_tmp,
371 int *_state, int *_findex, int *_p, int *_C, dReal **Arows);
372 // the constructor is given an initial problem description (A,x,b,w) and
373 // space for other working data (which the caller may allocate on the stack).
374 // some of this data is specific to the fast dLCP implementation.
375 // the matrices A and L have size n*n, vectors have size n*1.
376 // A represents a symmetric matrix but only the lower triangle is valid.
377 // `nub' is the number of unbounded indexes at the start. all the indexes
378 // 0..nub-1 will be put into C.
379
380 ~dLCP();
381
382 int getNub() { return nub; }
383 // return the value of `nub'. the constructor may want to change it,
384 // so the caller should find out its new value.
385
386 // transfer functions: transfer index i to the given set (C or N). indexes
387 // less than `nub' can never be given. A,x,b,w,etc may be permuted by these
388 // functions, the caller must be robust to this.
389
390 void transfer_i_to_C (int i);
391 // this assumes C and N span 1:i-1. this also assumes that solve1() has
392 // been recently called for the same i without any other transfer
393 // functions in between (thereby allowing some data reuse for the fast
394 // implementation).
395 void transfer_i_to_N (int i);
396 // this assumes C and N span 1:i-1.
397 void transfer_i_from_N_to_C (int i);
398 void transfer_i_from_C_to_N (int i);
399
400 int numC();
401 int numN();
402 // return the number of indexes in set C/N
403
404 int indexC (int i);
405 int indexN (int i);
406 // return index i in set C/N.
407
408 // accessor and arithmetic functions. Aij translates as A(i,j), etc.
409 // make sure that only the lower triangle of A is ever referenced.
410
411 dReal Aii (int i);
412 dReal AiC_times_qC (int i, dReal *q);
413 dReal AiN_times_qN (int i, dReal *q); // for all Nj
414 void pN_equals_ANC_times_qC (dReal *p, dReal *q); // for all Nj
415 void pN_plusequals_ANi (dReal *p, int i, int sign=1);
416 // for all Nj. sign = +1,-1. assumes i > maximum index in N.
417 void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q);
418 void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q); // for all Nj
419 void solve1 (dReal *a, int i, int dir=1, int only_transfer=0);
420 // get a(C) = - dir * A(C,C) \ A(C,i). dir must be +/- 1.
421 // the fast version of this function computes some data that is needed by
422 // transfer_i_to_C(). if only_transfer is nonzero then this function
423 // *only* computes that data, it does not set a(C).
424
425 void unpermute();
426 // call this at the end of the LCP function. if the x/w values have been
427 // permuted then this will unscramble them.
428};
429
430
431dLCP::dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
432 dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
433 dReal *_Dell, dReal *_ell, dReal *_tmp,
434 int *_state, int *_findex, int *_p, int *_C, dReal **Arows)
435{
436 dUASSERT (_findex==0,"slow dLCP object does not support findex array");
437
438 n = _n;
439 nub = _nub;
440 Adata = _Adata;
441 A = 0;
442 x = _x;
443 b = _b;
444 w = _w;
445 lo = _lo;
446 hi = _hi;
447 nskip = dPAD(n);
448 dSetZero (x,n);
449 last_i_for_solve1 = -1;
450
451 int i,j;
452 C.setSize (n);
453 N.setSize (n);
454 for (i=0; i<n; i++) {
455 C[i] = 0;
456 N[i] = 0;
457 }
458
459# ifdef ROWPTRS
460 // make matrix row pointers
461 A = Arows;
462 for (i=0; i<n; i++) A[i] = Adata + i*nskip;
463# else
464 A = Adata;
465# endif
466
467 // lets make A symmetric
468 for (i=0; i<n; i++) {
469 for (j=i+1; j<n; j++) AROW(i)[j] = AROW(j)[i];
470 }
471
472 // if nub>0, put all indexes 0..nub-1 into C and solve for x
473 if (nub > 0) {
474 for (i=0; i<nub; i++) memcpy (_L+i*nskip,AROW(i),(i+1)*sizeof(dReal));
475 dFactorLDLT (_L,_d,nub,nskip);
476 memcpy (x,b,nub*sizeof(dReal));
477 dSolveLDLT (_L,_d,x,nub,nskip);
478 dSetZero (_w,nub);
479 for (i=0; i<nub; i++) C[i] = 1;
480 }
481}
482
483
484dLCP::~dLCP()
485{
486}
487
488
489void dLCP::transfer_i_to_C (int i)
490{
491 if (i < nub) dDebug (0,"bad i");
492 if (C[i]) dDebug (0,"i already in C");
493 if (N[i]) dDebug (0,"i already in N");
494 for (int k=0; k<i; k++) {
495 if (!(C[k] ^ N[k])) dDebug (0,"assumptions for C and N violated");
496 }
497 for (int k=i; k<n; k++)
498 if (C[k] || N[k]) dDebug (0,"assumptions for C and N violated");
499 if (i != last_i_for_solve1) dDebug (0,"assumptions for i violated");
500 last_i_for_solve1 = -1;
501 C[i] = 1;
502}
503
504
505void dLCP::transfer_i_to_N (int i)
506{
507 if (i < nub) dDebug (0,"bad i");
508 if (C[i]) dDebug (0,"i already in C");
509 if (N[i]) dDebug (0,"i already in N");
510 for (int k=0; k<i; k++)
511 if (!C[k] && !N[k]) dDebug (0,"assumptions for C and N violated");
512 for (int k=i; k<n; k++)
513 if (C[k] || N[k]) dDebug (0,"assumptions for C and N violated");
514 last_i_for_solve1 = -1;
515 N[i] = 1;
516}
517
518
519void dLCP::transfer_i_from_N_to_C (int i)
520{
521 if (i < nub) dDebug (0,"bad i");
522 if (C[i]) dDebug (0,"i already in C");
523 if (!N[i]) dDebug (0,"i not in N");
524 last_i_for_solve1 = -1;
525 N[i] = 0;
526 C[i] = 1;
527}
528
529
530void dLCP::transfer_i_from_C_to_N (int i)
531{
532 if (i < nub) dDebug (0,"bad i");
533 if (N[i]) dDebug (0,"i already in N");
534 if (!C[i]) dDebug (0,"i not in C");
535 last_i_for_solve1 = -1;
536 C[i] = 0;
537 N[i] = 1;
538}
539
540
541int dLCP::numC()
542{
543 int i,count=0;
544 for (i=0; i<n; i++) if (C[i]) count++;
545 return count;
546}
547
548
549int dLCP::numN()
550{
551 int i,count=0;
552 for (i=0; i<n; i++) if (N[i]) count++;
553 return count;
554}
555
556
557int dLCP::indexC (int i)
558{
559 int k,count=0;
560 for (k=0; k<n; k++) {
561 if (C[k]) {
562 if (count==i) return k;
563 count++;
564 }
565 }
566 dDebug (0,"bad index C (%d)",i);
567 return 0;
568}
569
570
571int dLCP::indexN (int i)
572{
573 int k,count=0;
574 for (k=0; k<n; k++) {
575 if (N[k]) {
576 if (count==i) return k;
577 count++;
578 }
579 }
580 dDebug (0,"bad index into N");
581 return 0;
582}
583
584
585dReal dLCP::Aii (int i)
586{
587 return AROW(i)[i];
588}
589
590
591dReal dLCP::AiC_times_qC (int i, dReal *q)
592{
593 dReal sum = 0;
594 for (int k=0; k<n; k++) if (C[k]) sum += AROW(i)[k] * q[k];
595 return sum;
596}
597
598
599dReal dLCP::AiN_times_qN (int i, dReal *q)
600{
601 dReal sum = 0;
602 for (int k=0; k<n; k++) if (N[k]) sum += AROW(i)[k] * q[k];
603 return sum;
604}
605
606
607void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
608{
609 dReal sum;
610 for (int ii=0; ii<n; ii++) if (N[ii]) {
611 sum = 0;
612 for (int jj=0; jj<n; jj++) if (C[jj]) sum += AROW(ii)[jj] * q[jj];
613 p[ii] = sum;
614 }
615}
616
617
618void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign)
619{
620 int k;
621 for (k=0; k<n; k++) if (N[k] && k >= i) dDebug (0,"N assumption violated");
622 if (sign > 0) {
623 for (k=0; k<n; k++) if (N[k]) p[k] += AROW(i)[k];
624 }
625 else {
626 for (k=0; k<n; k++) if (N[k]) p[k] -= AROW(i)[k];
627 }
628}
629
630
631void dLCP::pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
632{
633 for (int k=0; k<n; k++) if (C[k]) p[k] += s*q[k];
634}
635
636
637void dLCP::pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
638{
639 for (int k=0; k<n; k++) if (N[k]) p[k] += s*q[k];
640}
641
642
643void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer)
644{
645
646 ALLOCA (dReal,AA,n*nskip*sizeof(dReal));
647#ifdef dUSE_MALLOC_FOR_ALLOCA
648 if (AA == NULL) {
649 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
650 return;
651 }
652#endif
653 ALLOCA (dReal,dd,n*sizeof(dReal));
654#ifdef dUSE_MALLOC_FOR_ALLOCA
655 if (dd == NULL) {
656 UNALLOCA(AA);
657 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
658 return;
659 }
660#endif
661 ALLOCA (dReal,bb,n*sizeof(dReal));
662#ifdef dUSE_MALLOC_FOR_ALLOCA
663 if (bb == NULL) {
664 UNALLOCA(AA);
665 UNALLOCA(dd);
666 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
667 return;
668 }
669#endif
670
671 int ii,jj,AAi,AAj;
672
673 last_i_for_solve1 = i;
674 AAi = 0;
675 for (ii=0; ii<n; ii++) if (C[ii]) {
676 AAj = 0;
677 for (jj=0; jj<n; jj++) if (C[jj]) {
678 AA[AAi*nskip+AAj] = AROW(ii)[jj];
679 AAj++;
680 }
681 bb[AAi] = AROW(i)[ii];
682 AAi++;
683 }
684 if (AAi==0) {
685 UNALLOCA (AA);
686 UNALLOCA (dd);
687 UNALLOCA (bb);
688 return;
689 }
690
691 dFactorLDLT (AA,dd,AAi,nskip);
692 dSolveLDLT (AA,dd,bb,AAi,nskip);
693
694 AAi=0;
695 if (dir > 0) {
696 for (ii=0; ii<n; ii++) if (C[ii]) a[ii] = -bb[AAi++];
697 }
698 else {
699 for (ii=0; ii<n; ii++) if (C[ii]) a[ii] = bb[AAi++];
700 }
701
702 UNALLOCA (AA);
703 UNALLOCA (dd);
704 UNALLOCA (bb);
705}
706
707
708void dLCP::unpermute()
709{
710}
711
712#endif // dLCP_SLOW
713
714//***************************************************************************
715// fast implementation of dLCP. see the above definition of dLCP for
716// interface comments.
717//
718// `p' records the permutation of A,x,b,w,etc. p is initially 1:n and is
719// permuted as the other vectors/matrices are permuted.
720//
721// A,x,b,w,lo,hi,state,findex,p,c are permuted such that sets C,N have
722// contiguous indexes. the don't-care indexes follow N.
723//
724// an L*D*L' factorization is maintained of A(C,C), and whenever indexes are
725// added or removed from the set C the factorization is updated.
726// thus L*D*L'=A[C,C], i.e. a permuted top left nC*nC submatrix of A.
727// the leading dimension of the matrix L is always `nskip'.
728//
729// at the start there may be other indexes that are unbounded but are not
730// included in `nub'. dLCP will permute the matrix so that absolutely all
731// unbounded vectors are at the start. thus there may be some initial
732// permutation.
733//
734// the algorithms here assume certain patterns, particularly with respect to
735// index transfer.
736
737#ifdef dLCP_FAST
738
739struct dLCP {
740 int n,nskip,nub;
741 ATYPE A; // A rows
742 dReal *Adata,*x,*b,*w,*lo,*hi; // permuted LCP problem data
743 dReal *L,*d; // L*D*L' factorization of set C
744 dReal *Dell,*ell,*tmp;
745 int *state,*findex,*p,*C;
746 int nC,nN; // size of each index set
747
748 dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
749 dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
750 dReal *_Dell, dReal *_ell, dReal *_tmp,
751 int *_state, int *_findex, int *_p, int *_C, dReal **Arows);
752 int getNub() { return nub; }
753 void transfer_i_to_C (int i);
754 void transfer_i_to_N (int i)
755 { nN++; } // because we can assume C and N span 1:i-1
756 void transfer_i_from_N_to_C (int i);
757 void transfer_i_from_C_to_N (int i);
758 int numC() { return nC; }
759 int numN() { return nN; }
760 int indexC (int i) { return i; }
761 int indexN (int i) { return i+nC; }
762 dReal Aii (int i) { return AROW(i)[i]; }
763 dReal AiC_times_qC (int i, dReal *q) { return dDot (AROW(i),q,nC); }
764 dReal AiN_times_qN (int i, dReal *q) { return dDot (AROW(i)+nC,q+nC,nN); }
765 void pN_equals_ANC_times_qC (dReal *p, dReal *q);
766 void pN_plusequals_ANi (dReal *p, int i, int sign=1);
767 void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q)
768 { for (int i=0; i<nC; i++) p[i] += s*q[i]; }
769 void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q)
770 { for (int i=0; i<nN; i++) p[i+nC] += s*q[i+nC]; }
771 void solve1 (dReal *a, int i, int dir=1, int only_transfer=0);
772 void unpermute();
773};
774
775
776dLCP::dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w,
777 dReal *_lo, dReal *_hi, dReal *_L, dReal *_d,
778 dReal *_Dell, dReal *_ell, dReal *_tmp,
779 int *_state, int *_findex, int *_p, int *_C, dReal **Arows)
780{
781 n = _n;
782 nub = _nub;
783 Adata = _Adata;
784 A = 0;
785 x = _x;
786 b = _b;
787 w = _w;
788 lo = _lo;
789 hi = _hi;
790 L = _L;
791 d = _d;
792 Dell = _Dell;
793 ell = _ell;
794 tmp = _tmp;
795 state = _state;
796 findex = _findex;
797 p = _p;
798 C = _C;
799 nskip = dPAD(n);
800 dSetZero (x,n);
801
802 int k;
803
804# ifdef ROWPTRS
805 // make matrix row pointers
806 A = Arows;
807 for (k=0; k<n; k++) A[k] = Adata + k*nskip;
808# else
809 A = Adata;
810# endif
811
812 nC = 0;
813 nN = 0;
814 for (k=0; k<n; k++) p[k]=k; // initially unpermuted
815
816 /*
817 // for testing, we can do some random swaps in the area i > nub
818 if (nub < n) {
819 for (k=0; k<100; k++) {
820 int i1,i2;
821 do {
822 i1 = dRandInt(n-nub)+nub;
823 i2 = dRandInt(n-nub)+nub;
824 }
825 while (i1 > i2);
826 //printf ("--> %d %d\n",i1,i2);
827 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i1,i2,nskip,0);
828 }
829 }
830 */
831
832 // permute the problem so that *all* the unbounded variables are at the
833 // start, i.e. look for unbounded variables not included in `nub'. we can
834 // potentially push up `nub' this way and get a bigger initial factorization.
835 // note that when we swap rows/cols here we must not just swap row pointers,
836 // as the initial factorization relies on the data being all in one chunk.
837 // variables that have findex >= 0 are *not* considered to be unbounded even
838 // if lo=-inf and hi=inf - this is because these limits may change during the
839 // solution process.
840
841 for (k=nub; k<n; k++) {
842 if (findex && findex[k] >= 0) continue;
843 if (lo[k]==-dInfinity && hi[k]==dInfinity) {
844 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nub,k,nskip,0);
845 nub++;
846 }
847 }
848
849 // if there are unbounded variables at the start, factorize A up to that
850 // point and solve for x. this puts all indexes 0..nub-1 into C.
851 if (nub > 0) {
852 for (k=0; k<nub; k++) memcpy (L+k*nskip,AROW(k),(k+1)*sizeof(dReal));
853 dFactorLDLT (L,d,nub,nskip);
854 memcpy (x,b,nub*sizeof(dReal));
855 dSolveLDLT (L,d,x,nub,nskip);
856 dSetZero (w,nub);
857 for (k=0; k<nub; k++) C[k] = k;
858 nC = nub;
859 }
860
861 // permute the indexes > nub such that all findex variables are at the end
862 if (findex) {
863 int num_at_end = 0;
864 for (k=n-1; k >= nub; k--) {
865 if (findex[k] >= 0) {
866 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,k,n-1-num_at_end,nskip,1);
867 num_at_end++;
868 }
869 }
870 }
871
872 // print info about indexes
873 /*
874 for (k=0; k<n; k++) {
875 if (k<nub) printf ("C");
876 else if (lo[k]==-dInfinity && hi[k]==dInfinity) printf ("c");
877 else printf (".");
878 }
879 printf ("\n");
880 */
881}
882
883
884void dLCP::transfer_i_to_C (int i)
885{
886 int j;
887 if (nC > 0) {
888 // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C))
889 for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j];
890 d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC));
891 }
892 else {
893 d[0] = dRecip (AROW(i)[i]);
894 }
895 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1);
896 C[nC] = nC;
897 nC++;
898
899# ifdef DEBUG_LCP
900 checkFactorization (A,L,d,nC,C,nskip);
901 if (i < (n-1)) checkPermutations (i+1,n,nC,nN,p,C);
902# endif
903}
904
905
906void dLCP::transfer_i_from_N_to_C (int i)
907{
908 int j;
909 if (nC > 0) {
910 dReal *aptr = AROW(i);
911# ifdef NUB_OPTIMIZATIONS
912 // if nub>0, initial part of aptr unpermuted
913 for (j=0; j<nub; j++) Dell[j] = aptr[j];
914 for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]];
915# else
916 for (j=0; j<nC; j++) Dell[j] = aptr[C[j]];
917# endif
918 dSolveL1 (L,Dell,nC,nskip);
919 for (j=0; j<nC; j++) ell[j] = Dell[j] * d[j];
920 for (j=0; j<nC; j++) L[nC*nskip+j] = ell[j];
921 d[nC] = dRecip (AROW(i)[i] - dDot(ell,Dell,nC));
922 }
923 else {
924 d[0] = dRecip (AROW(i)[i]);
925 }
926 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nC,i,nskip,1);
927 C[nC] = nC;
928 nN--;
929 nC++;
930
931 // @@@ TO DO LATER
932 // if we just finish here then we'll go back and re-solve for
933 // delta_x. but actually we can be more efficient and incrementally
934 // update delta_x here. but if we do this, we wont have ell and Dell
935 // to use in updating the factorization later.
936
937# ifdef DEBUG_LCP
938 checkFactorization (A,L,d,nC,C,nskip);
939# endif
940}
941
942
943void dLCP::transfer_i_from_C_to_N (int i)
944{
945 // remove a row/column from the factorization, and adjust the
946 // indexes (black magic!)
947 int j,k;
948 for (j=0; j<nC; j++) if (C[j]==i) {
949 dLDLTRemove (A,C,L,d,n,nC,j,nskip);
950 for (k=0; k<nC; k++) if (C[k]==nC-1) {
951 C[k] = C[j];
952 if (j < (nC-1)) memmove (C+j,C+j+1,(nC-j-1)*sizeof(int));
953 break;
954 }
955 dIASSERT (k < nC);
956 break;
957 }
958 dIASSERT (j < nC);
959 swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i,nC-1,nskip,1);
960 nC--;
961 nN++;
962
963# ifdef DEBUG_LCP
964 checkFactorization (A,L,d,nC,C,nskip);
965# endif
966}
967
968
969void dLCP::pN_equals_ANC_times_qC (dReal *p, dReal *q)
970{
971 // we could try to make this matrix-vector multiplication faster using
972 // outer product matrix tricks, e.g. with the dMultidotX() functions.
973 // but i tried it and it actually made things slower on random 100x100
974 // problems because of the overhead involved. so we'll stick with the
975 // simple method for now.
976 for (int i=0; i<nN; i++) p[i+nC] = dDot (AROW(i+nC),q,nC);
977}
978
979
980void dLCP::pN_plusequals_ANi (dReal *p, int i, int sign)
981{
982 dReal *aptr = AROW(i)+nC;
983 if (sign > 0) {
984 for (int i=0; i<nN; i++) p[i+nC] += aptr[i];
985 }
986 else {
987 for (int i=0; i<nN; i++) p[i+nC] -= aptr[i];
988 }
989}
990
991
992void dLCP::solve1 (dReal *a, int i, int dir, int only_transfer)
993{
994 // the `Dell' and `ell' that are computed here are saved. if index i is
995 // later added to the factorization then they can be reused.
996 //
997 // @@@ question: do we need to solve for entire delta_x??? yes, but
998 // only if an x goes below 0 during the step.
999
1000 int j;
1001 if (nC > 0) {
1002 dReal *aptr = AROW(i);
1003# ifdef NUB_OPTIMIZATIONS
1004 // if nub>0, initial part of aptr[] is guaranteed unpermuted
1005 for (j=0; j<nub; j++) Dell[j] = aptr[j];
1006 for (j=nub; j<nC; j++) Dell[j] = aptr[C[j]];
1007# else
1008 for (j=0; j<nC; j++) Dell[j] = aptr[C[j]];
1009# endif
1010 dSolveL1 (L,Dell,nC,nskip);
1011 for (j=0; j<nC; j++) ell[j] = Dell[j] * d[j];
1012
1013 if (!only_transfer) {
1014 for (j=0; j<nC; j++) tmp[j] = ell[j];
1015 dSolveL1T (L,tmp,nC,nskip);
1016 if (dir > 0) {
1017 for (j=0; j<nC; j++) a[C[j]] = -tmp[j];
1018 }
1019 else {
1020 for (j=0; j<nC; j++) a[C[j]] = tmp[j];
1021 }
1022 }
1023 }
1024}
1025
1026
1027void dLCP::unpermute()
1028{
1029 // now we have to un-permute x and w
1030 int j;
1031 ALLOCA (dReal,tmp,n*sizeof(dReal));
1032#ifdef dUSE_MALLOC_FOR_ALLOCA
1033 if (tmp == NULL) {
1034 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1035 return;
1036 }
1037#endif
1038 memcpy (tmp,x,n*sizeof(dReal));
1039 for (j=0; j<n; j++) x[p[j]] = tmp[j];
1040 memcpy (tmp,w,n*sizeof(dReal));
1041 for (j=0; j<n; j++) w[p[j]] = tmp[j];
1042
1043 UNALLOCA (tmp);
1044}
1045
1046#endif // dLCP_FAST
1047
1048//***************************************************************************
1049// an unoptimized Dantzig LCP driver routine for the basic LCP problem.
1050// must have lo=0, hi=dInfinity, and nub=0.
1051
1052void dSolveLCPBasic (int n, dReal *A, dReal *x, dReal *b,
1053 dReal *w, int nub, dReal *lo, dReal *hi)
1054{
1055 dAASSERT (n>0 && A && x && b && w && nub == 0);
1056
1057 int i,k;
1058 int nskip = dPAD(n);
1059 ALLOCA (dReal,L,n*nskip*sizeof(dReal));
1060#ifdef dUSE_MALLOC_FOR_ALLOCA
1061 if (L == NULL) {
1062 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1063 return;
1064 }
1065#endif
1066 ALLOCA (dReal,d,n*sizeof(dReal));
1067#ifdef dUSE_MALLOC_FOR_ALLOCA
1068 if (d == NULL) {
1069 UNALLOCA(L);
1070 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1071 return;
1072 }
1073#endif
1074 ALLOCA (dReal,delta_x,n*sizeof(dReal));
1075#ifdef dUSE_MALLOC_FOR_ALLOCA
1076 if (delta_x == NULL) {
1077 UNALLOCA(d);
1078 UNALLOCA(L);
1079 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1080 return;
1081 }
1082#endif
1083 ALLOCA (dReal,delta_w,n*sizeof(dReal));
1084#ifdef dUSE_MALLOC_FOR_ALLOCA
1085 if (delta_w == NULL) {
1086 UNALLOCA(delta_x);
1087 UNALLOCA(d);
1088 UNALLOCA(L);
1089 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1090 return;
1091 }
1092#endif
1093 ALLOCA (dReal,Dell,n*sizeof(dReal));
1094#ifdef dUSE_MALLOC_FOR_ALLOCA
1095 if (Dell == NULL) {
1096 UNALLOCA(delta_w);
1097 UNALLOCA(delta_x);
1098 UNALLOCA(d);
1099 UNALLOCA(L);
1100 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1101 return;
1102 }
1103#endif
1104 ALLOCA (dReal,ell,n*sizeof(dReal));
1105#ifdef dUSE_MALLOC_FOR_ALLOCA
1106 if (ell == NULL) {
1107 UNALLOCA(Dell);
1108 UNALLOCA(delta_w);
1109 UNALLOCA(delta_x);
1110 UNALLOCA(d);
1111 UNALLOCA(L);
1112 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1113 return;
1114 }
1115#endif
1116 ALLOCA (dReal,tmp,n*sizeof(dReal));
1117#ifdef dUSE_MALLOC_FOR_ALLOCA
1118 if (tmp == NULL) {
1119 UNALLOCA(ell);
1120 UNALLOCA(Dell);
1121 UNALLOCA(delta_w);
1122 UNALLOCA(delta_x);
1123 UNALLOCA(d);
1124 UNALLOCA(L);
1125 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1126 return;
1127 }
1128#endif
1129 ALLOCA (dReal*,Arows,n*sizeof(dReal*));
1130#ifdef dUSE_MALLOC_FOR_ALLOCA
1131 if (Arows == NULL) {
1132 UNALLOCA(tmp);
1133 UNALLOCA(ell);
1134 UNALLOCA(Dell);
1135 UNALLOCA(delta_w);
1136 UNALLOCA(delta_x);
1137 UNALLOCA(d);
1138 UNALLOCA(L);
1139 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1140 return;
1141 }
1142#endif
1143 ALLOCA (int,p,n*sizeof(int));
1144#ifdef dUSE_MALLOC_FOR_ALLOCA
1145 if (p == NULL) {
1146 UNALLOCA(Arows);
1147 UNALLOCA(tmp);
1148 UNALLOCA(ell);
1149 UNALLOCA(Dell);
1150 UNALLOCA(delta_w);
1151 UNALLOCA(delta_x);
1152 UNALLOCA(d);
1153 UNALLOCA(L);
1154 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1155 return;
1156 }
1157#endif
1158 ALLOCA (int,C,n*sizeof(int));
1159#ifdef dUSE_MALLOC_FOR_ALLOCA
1160 if (C == NULL) {
1161 UNALLOCA(p);
1162 UNALLOCA(Arows);
1163 UNALLOCA(tmp);
1164 UNALLOCA(ell);
1165 UNALLOCA(Dell);
1166 UNALLOCA(delta_w);
1167 UNALLOCA(delta_x);
1168 UNALLOCA(d);
1169 UNALLOCA(L);
1170 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1171 return;
1172 }
1173#endif
1174 ALLOCA (int,dummy,n*sizeof(int));
1175#ifdef dUSE_MALLOC_FOR_ALLOCA
1176 if (dummy == NULL) {
1177 UNALLOCA(C);
1178 UNALLOCA(p);
1179 UNALLOCA(Arows);
1180 UNALLOCA(tmp);
1181 UNALLOCA(ell);
1182 UNALLOCA(Dell);
1183 UNALLOCA(delta_w);
1184 UNALLOCA(delta_x);
1185 UNALLOCA(d);
1186 UNALLOCA(L);
1187 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1188 return;
1189 }
1190#endif
1191
1192
1193 dLCP lcp (n,0,A,x,b,w,tmp,tmp,L,d,Dell,ell,tmp,dummy,dummy,p,C,Arows);
1194 nub = lcp.getNub();
1195
1196 for (i=0; i<n; i++) {
1197 w[i] = lcp.AiC_times_qC (i,x) - b[i];
1198 if (w[i] >= 0) {
1199 lcp.transfer_i_to_N (i);
1200 }
1201 else {
1202 for (;;) {
1203 // compute: delta_x(C) = -A(C,C)\A(C,i)
1204 dSetZero (delta_x,n);
1205 lcp.solve1 (delta_x,i);
1206#ifdef dUSE_MALLOC_FOR_ALLOCA
1207 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1208 UNALLOCA(dummy);
1209 UNALLOCA(C);
1210 UNALLOCA(p);
1211 UNALLOCA(Arows);
1212 UNALLOCA(tmp);
1213 UNALLOCA(ell);
1214 UNALLOCA(Dell);
1215 UNALLOCA(delta_w);
1216 UNALLOCA(delta_x);
1217 UNALLOCA(d);
1218 UNALLOCA(L);
1219 return;
1220 }
1221#endif
1222 delta_x[i] = 1;
1223
1224 // compute: delta_w = A*delta_x
1225 dSetZero (delta_w,n);
1226 lcp.pN_equals_ANC_times_qC (delta_w,delta_x);
1227 lcp.pN_plusequals_ANi (delta_w,i);
1228 delta_w[i] = lcp.AiC_times_qC (i,delta_x) + lcp.Aii(i);
1229
1230 // find index to switch
1231 int si = i; // si = switch index
1232 int si_in_N = 0; // set to 1 if si in N
1233 dReal s = -w[i]/delta_w[i];
1234
1235 if (s <= 0) {
1236 dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s);
1237 if (i < (n-1)) {
1238 dSetZero (x+i,n-i);
1239 dSetZero (w+i,n-i);
1240 }
1241 goto done;
1242 }
1243
1244 for (k=0; k < lcp.numN(); k++) {
1245 if (delta_w[lcp.indexN(k)] < 0) {
1246 dReal s2 = -w[lcp.indexN(k)] / delta_w[lcp.indexN(k)];
1247 if (s2 < s) {
1248 s = s2;
1249 si = lcp.indexN(k);
1250 si_in_N = 1;
1251 }
1252 }
1253 }
1254 for (k=0; k < lcp.numC(); k++) {
1255 if (delta_x[lcp.indexC(k)] < 0) {
1256 dReal s2 = -x[lcp.indexC(k)] / delta_x[lcp.indexC(k)];
1257 if (s2 < s) {
1258 s = s2;
1259 si = lcp.indexC(k);
1260 si_in_N = 0;
1261 }
1262 }
1263 }
1264
1265 // apply x = x + s * delta_x
1266 lcp.pC_plusequals_s_times_qC (x,s,delta_x);
1267 x[i] += s;
1268 lcp.pN_plusequals_s_times_qN (w,s,delta_w);
1269 w[i] += s * delta_w[i];
1270
1271 // switch indexes between sets if necessary
1272 if (si==i) {
1273 w[i] = 0;
1274 lcp.transfer_i_to_C (i);
1275 break;
1276 }
1277 if (si_in_N) {
1278 w[si] = 0;
1279 lcp.transfer_i_from_N_to_C (si);
1280 }
1281 else {
1282 x[si] = 0;
1283 lcp.transfer_i_from_C_to_N (si);
1284 }
1285 }
1286 }
1287 }
1288
1289 done:
1290 lcp.unpermute();
1291
1292 UNALLOCA (L);
1293 UNALLOCA (d);
1294 UNALLOCA (delta_x);
1295 UNALLOCA (delta_w);
1296 UNALLOCA (Dell);
1297 UNALLOCA (ell);
1298 UNALLOCA (tmp);
1299 UNALLOCA (Arows);
1300 UNALLOCA (p);
1301 UNALLOCA (C);
1302 UNALLOCA (dummy);
1303}
1304
1305//***************************************************************************
1306// an optimized Dantzig LCP driver routine for the lo-hi LCP problem.
1307
1308void dSolveLCP (int n, dReal *A, dReal *x, dReal *b,
1309 dReal *w, int nub, dReal *lo, dReal *hi, int *findex)
1310{
1311 dAASSERT (n>0 && A && x && b && w && lo && hi && nub >= 0 && nub <= n);
1312
1313 int i,k,hit_first_friction_index = 0;
1314 int nskip = dPAD(n);
1315
1316 // if all the variables are unbounded then we can just factor, solve,
1317 // and return
1318 if (nub >= n) {
1319 dFactorLDLT (A,w,n,nskip); // use w for d
1320 dSolveLDLT (A,w,b,n,nskip);
1321 memcpy (x,b,n*sizeof(dReal));
1322 dSetZero (w,n);
1323
1324 return;
1325 }
1326# ifndef dNODEBUG
1327 // check restrictions on lo and hi
1328 for (k=0; k<n; k++) dIASSERT (lo[k] <= 0 && hi[k] >= 0);
1329# endif
1330 ALLOCA (dReal,L,n*nskip*sizeof(dReal));
1331#ifdef dUSE_MALLOC_FOR_ALLOCA
1332 if (L == NULL) {
1333 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1334 return;
1335 }
1336#endif
1337 ALLOCA (dReal,d,n*sizeof(dReal));
1338#ifdef dUSE_MALLOC_FOR_ALLOCA
1339 if (d == NULL) {
1340 UNALLOCA(L);
1341 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1342 return;
1343 }
1344#endif
1345 ALLOCA (dReal,delta_x,n*sizeof(dReal));
1346#ifdef dUSE_MALLOC_FOR_ALLOCA
1347 if (delta_x == NULL) {
1348 UNALLOCA(d);
1349 UNALLOCA(L);
1350 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1351 return;
1352 }
1353#endif
1354 ALLOCA (dReal,delta_w,n*sizeof(dReal));
1355#ifdef dUSE_MALLOC_FOR_ALLOCA
1356 if (delta_w == NULL) {
1357 UNALLOCA(delta_x);
1358 UNALLOCA(d);
1359 UNALLOCA(L);
1360 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1361 return;
1362 }
1363#endif
1364 ALLOCA (dReal,Dell,n*sizeof(dReal));
1365#ifdef dUSE_MALLOC_FOR_ALLOCA
1366 if (Dell == NULL) {
1367 UNALLOCA(delta_w);
1368 UNALLOCA(delta_x);
1369 UNALLOCA(d);
1370 UNALLOCA(L);
1371 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1372 return;
1373 }
1374#endif
1375 ALLOCA (dReal,ell,n*sizeof(dReal));
1376#ifdef dUSE_MALLOC_FOR_ALLOCA
1377 if (ell == NULL) {
1378 UNALLOCA(Dell);
1379 UNALLOCA(delta_w);
1380 UNALLOCA(delta_x);
1381 UNALLOCA(d);
1382 UNALLOCA(L);
1383 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1384 return;
1385 }
1386#endif
1387 ALLOCA (dReal*,Arows,n*sizeof(dReal*));
1388#ifdef dUSE_MALLOC_FOR_ALLOCA
1389 if (Arows == NULL) {
1390 UNALLOCA(ell);
1391 UNALLOCA(Dell);
1392 UNALLOCA(delta_w);
1393 UNALLOCA(delta_x);
1394 UNALLOCA(d);
1395 UNALLOCA(L);
1396 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1397 return;
1398 }
1399#endif
1400 ALLOCA (int,p,n*sizeof(int));
1401#ifdef dUSE_MALLOC_FOR_ALLOCA
1402 if (p == NULL) {
1403 UNALLOCA(Arows);
1404 UNALLOCA(ell);
1405 UNALLOCA(Dell);
1406 UNALLOCA(delta_w);
1407 UNALLOCA(delta_x);
1408 UNALLOCA(d);
1409 UNALLOCA(L);
1410 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1411 return;
1412 }
1413#endif
1414 ALLOCA (int,C,n*sizeof(int));
1415#ifdef dUSE_MALLOC_FOR_ALLOCA
1416 if (C == NULL) {
1417 UNALLOCA(p);
1418 UNALLOCA(Arows);
1419 UNALLOCA(ell);
1420 UNALLOCA(Dell);
1421 UNALLOCA(delta_w);
1422 UNALLOCA(delta_x);
1423 UNALLOCA(d);
1424 UNALLOCA(L);
1425 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1426 return;
1427 }
1428#endif
1429
1430 int dir;
1431 dReal dirf;
1432
1433 // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i)
1434 ALLOCA (int,state,n*sizeof(int));
1435#ifdef dUSE_MALLOC_FOR_ALLOCA
1436 if (state == NULL) {
1437 UNALLOCA(C);
1438 UNALLOCA(p);
1439 UNALLOCA(Arows);
1440 UNALLOCA(ell);
1441 UNALLOCA(Dell);
1442 UNALLOCA(delta_w);
1443 UNALLOCA(delta_x);
1444 UNALLOCA(d);
1445 UNALLOCA(L);
1446 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1447 return;
1448 }
1449#endif
1450
1451 // create LCP object. note that tmp is set to delta_w to save space, this
1452 // optimization relies on knowledge of how tmp is used, so be careful!
1453 dLCP *lcp=new dLCP(n,nub,A,x,b,w,lo,hi,L,d,Dell,ell,delta_w,state,findex,p,C,Arows);
1454 nub = lcp->getNub();
1455
1456 // loop over all indexes nub..n-1. for index i, if x(i),w(i) satisfy the
1457 // LCP conditions then i is added to the appropriate index set. otherwise
1458 // x(i),w(i) is driven either +ve or -ve to force it to the valid region.
1459 // as we drive x(i), x(C) is also adjusted to keep w(C) at zero.
1460 // while driving x(i) we maintain the LCP conditions on the other variables
1461 // 0..i-1. we do this by watching out for other x(i),w(i) values going
1462 // outside the valid region, and then switching them between index sets
1463 // when that happens.
1464
1465 for (i=nub; i<n; i++) {
1466 // the index i is the driving index and indexes i+1..n-1 are "dont care",
1467 // i.e. when we make changes to the system those x's will be zero and we
1468 // don't care what happens to those w's. in other words, we only consider
1469 // an (i+1)*(i+1) sub-problem of A*x=b+w.
1470
1471 // if we've hit the first friction index, we have to compute the lo and
1472 // hi values based on the values of x already computed. we have been
1473 // permuting the indexes, so the values stored in the findex vector are
1474 // no longer valid. thus we have to temporarily unpermute the x vector.
1475 // for the purposes of this computation, 0*infinity = 0 ... so if the
1476 // contact constraint's normal force is 0, there should be no tangential
1477 // force applied.
1478
1479 if (hit_first_friction_index == 0 && findex && findex[i] >= 0) {
1480 // un-permute x into delta_w, which is not being used at the moment
1481 for (k=0; k<n; k++) delta_w[p[k]] = x[k];
1482
1483 // set lo and hi values
1484 for (k=i; k<n; k++) {
1485 dReal wfk = delta_w[findex[k]];
1486 if (wfk == 0) {
1487 hi[k] = 0;
1488 lo[k] = 0;
1489 }
1490 else {
1491 hi[k] = dFabs (hi[k] * wfk);
1492 lo[k] = -hi[k];
1493 }
1494 }
1495 hit_first_friction_index = 1;
1496 }
1497
1498 // thus far we have not even been computing the w values for indexes
1499 // greater than i, so compute w[i] now.
1500 w[i] = lcp->AiC_times_qC (i,x) + lcp->AiN_times_qN (i,x) - b[i];
1501
1502 // if lo=hi=0 (which can happen for tangential friction when normals are
1503 // 0) then the index will be assigned to set N with some state. however,
1504 // set C's line has zero size, so the index will always remain in set N.
1505 // with the "normal" switching logic, if w changed sign then the index
1506 // would have to switch to set C and then back to set N with an inverted
1507 // state. this is pointless, and also computationally expensive. to
1508 // prevent this from happening, we use the rule that indexes with lo=hi=0
1509 // will never be checked for set changes. this means that the state for
1510 // these indexes may be incorrect, but that doesn't matter.
1511
1512 // see if x(i),w(i) is in a valid region
1513 if (lo[i]==0 && w[i] >= 0) {
1514 lcp->transfer_i_to_N (i);
1515 state[i] = 0;
1516 }
1517 else if (hi[i]==0 && w[i] <= 0) {
1518 lcp->transfer_i_to_N (i);
1519 state[i] = 1;
1520 }
1521 else if (w[i]==0) {
1522 // this is a degenerate case. by the time we get to this test we know
1523 // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve,
1524 // and similarly that hi > 0. this means that the line segment
1525 // corresponding to set C is at least finite in extent, and we are on it.
1526 // NOTE: we must call lcp->solve1() before lcp->transfer_i_to_C()
1527 lcp->solve1 (delta_x,i,0,1);
1528
1529#ifdef dUSE_MALLOC_FOR_ALLOCA
1530 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1531 UNALLOCA(state);
1532 UNALLOCA(C);
1533 UNALLOCA(p);
1534 UNALLOCA(Arows);
1535 UNALLOCA(ell);
1536 UNALLOCA(Dell);
1537 UNALLOCA(delta_w);
1538 UNALLOCA(delta_x);
1539 UNALLOCA(d);
1540 UNALLOCA(L);
1541 return;
1542 }
1543#endif
1544
1545 lcp->transfer_i_to_C (i);
1546 }
1547 else {
1548 // we must push x(i) and w(i)
1549 for (;;) {
1550 // find direction to push on x(i)
1551 if (w[i] <= 0) {
1552 dir = 1;
1553 dirf = REAL(1.0);
1554 }
1555 else {
1556 dir = -1;
1557 dirf = REAL(-1.0);
1558 }
1559
1560 // compute: delta_x(C) = -dir*A(C,C)\A(C,i)
1561 lcp->solve1 (delta_x,i,dir);
1562
1563#ifdef dUSE_MALLOC_FOR_ALLOCA
1564 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1565 UNALLOCA(state);
1566 UNALLOCA(C);
1567 UNALLOCA(p);
1568 UNALLOCA(Arows);
1569 UNALLOCA(ell);
1570 UNALLOCA(Dell);
1571 UNALLOCA(delta_w);
1572 UNALLOCA(delta_x);
1573 UNALLOCA(d);
1574 UNALLOCA(L);
1575 return;
1576 }
1577#endif
1578
1579 // note that delta_x[i] = dirf, but we wont bother to set it
1580
1581 // compute: delta_w = A*delta_x ... note we only care about
1582 // delta_w(N) and delta_w(i), the rest is ignored
1583 lcp->pN_equals_ANC_times_qC (delta_w,delta_x);
1584 lcp->pN_plusequals_ANi (delta_w,i,dir);
1585 delta_w[i] = lcp->AiC_times_qC (i,delta_x) + lcp->Aii(i)*dirf;
1586
1587 // find largest step we can take (size=s), either to drive x(i),w(i)
1588 // to the valid LCP region or to drive an already-valid variable
1589 // outside the valid region.
1590
1591 int cmd = 1; // index switching command
1592 int si = 0; // si = index to switch if cmd>3
1593 dReal s = -w[i]/delta_w[i];
1594 if (dir > 0) {
1595 if (hi[i] < dInfinity) {
1596 dReal s2 = (hi[i]-x[i])/dirf; // step to x(i)=hi(i)
1597 if (s2 < s) {
1598 s = s2;
1599 cmd = 3;
1600 }
1601 }
1602 }
1603 else {
1604 if (lo[i] > -dInfinity) {
1605 dReal s2 = (lo[i]-x[i])/dirf; // step to x(i)=lo(i)
1606 if (s2 < s) {
1607 s = s2;
1608 cmd = 2;
1609 }
1610 }
1611 }
1612
1613 for (k=0; k < lcp->numN(); k++) {
1614 if ((state[lcp->indexN(k)]==0 && delta_w[lcp->indexN(k)] < 0) ||
1615 (state[lcp->indexN(k)]!=0 && delta_w[lcp->indexN(k)] > 0)) {
1616 // don't bother checking if lo=hi=0
1617 if (lo[lcp->indexN(k)] == 0 && hi[lcp->indexN(k)] == 0) continue;
1618 dReal s2 = -w[lcp->indexN(k)] / delta_w[lcp->indexN(k)];
1619 if (s2 < s) {
1620 s = s2;
1621 cmd = 4;
1622 si = lcp->indexN(k);
1623 }
1624 }
1625 }
1626
1627 for (k=nub; k < lcp->numC(); k++) {
1628 if (delta_x[lcp->indexC(k)] < 0 && lo[lcp->indexC(k)] > -dInfinity) {
1629 dReal s2 = (lo[lcp->indexC(k)]-x[lcp->indexC(k)]) /
1630 delta_x[lcp->indexC(k)];
1631 if (s2 < s) {
1632 s = s2;
1633 cmd = 5;
1634 si = lcp->indexC(k);
1635 }
1636 }
1637 if (delta_x[lcp->indexC(k)] > 0 && hi[lcp->indexC(k)] < dInfinity) {
1638 dReal s2 = (hi[lcp->indexC(k)]-x[lcp->indexC(k)]) /
1639 delta_x[lcp->indexC(k)];
1640 if (s2 < s) {
1641 s = s2;
1642 cmd = 6;
1643 si = lcp->indexC(k);
1644 }
1645 }
1646 }
1647
1648 //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C",
1649 // "C->NL","C->NH"};
1650 //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i);
1651
1652 // if s <= 0 then we've got a problem. if we just keep going then
1653 // we're going to get stuck in an infinite loop. instead, just cross
1654 // our fingers and exit with the current solution.
1655 if (s <= 0) {
1656 dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s);
1657 if (i < (n-1)) {
1658 dSetZero (x+i,n-i);
1659 dSetZero (w+i,n-i);
1660 }
1661 goto done;
1662 }
1663
1664 // apply x = x + s * delta_x
1665 lcp->pC_plusequals_s_times_qC (x,s,delta_x);
1666 x[i] += s * dirf;
1667
1668 // apply w = w + s * delta_w
1669 lcp->pN_plusequals_s_times_qN (w,s,delta_w);
1670 w[i] += s * delta_w[i];
1671
1672 // switch indexes between sets if necessary
1673 switch (cmd) {
1674 case 1: // done
1675 w[i] = 0;
1676 lcp->transfer_i_to_C (i);
1677 break;
1678 case 2: // done
1679 x[i] = lo[i];
1680 state[i] = 0;
1681 lcp->transfer_i_to_N (i);
1682 break;
1683 case 3: // done
1684 x[i] = hi[i];
1685 state[i] = 1;
1686 lcp->transfer_i_to_N (i);
1687 break;
1688 case 4: // keep going
1689 w[si] = 0;
1690 lcp->transfer_i_from_N_to_C (si);
1691 break;
1692 case 5: // keep going
1693 x[si] = lo[si];
1694 state[si] = 0;
1695 lcp->transfer_i_from_C_to_N (si);
1696 break;
1697 case 6: // keep going
1698 x[si] = hi[si];
1699 state[si] = 1;
1700 lcp->transfer_i_from_C_to_N (si);
1701 break;
1702 }
1703
1704 if (cmd <= 3) break;
1705 }
1706 }
1707 }
1708
1709 done:
1710 lcp->unpermute();
1711 delete lcp;
1712
1713 UNALLOCA (L);
1714 UNALLOCA (d);
1715 UNALLOCA (delta_x);
1716 UNALLOCA (delta_w);
1717 UNALLOCA (Dell);
1718 UNALLOCA (ell);
1719 UNALLOCA (Arows);
1720 UNALLOCA (p);
1721 UNALLOCA (C);
1722 UNALLOCA (state);
1723}
1724
1725//***************************************************************************
1726// accuracy and timing test
1727
1728extern "C" ODE_API void dTestSolveLCP()
1729{
1730 int n = 100;
1731 int i,nskip = dPAD(n);
1732#ifdef dDOUBLE
1733 const dReal tol = REAL(1e-9);
1734#endif
1735#ifdef dSINGLE
1736 const dReal tol = REAL(1e-4);
1737#endif
1738 printf ("dTestSolveLCP()\n");
1739
1740 ALLOCA (dReal,A,n*nskip*sizeof(dReal));
1741#ifdef dUSE_MALLOC_FOR_ALLOCA
1742 if (A == NULL) {
1743 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1744 return;
1745 }
1746#endif
1747 ALLOCA (dReal,x,n*sizeof(dReal));
1748#ifdef dUSE_MALLOC_FOR_ALLOCA
1749 if (x == NULL) {
1750 UNALLOCA (A);
1751 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1752 return;
1753 }
1754#endif
1755 ALLOCA (dReal,b,n*sizeof(dReal));
1756#ifdef dUSE_MALLOC_FOR_ALLOCA
1757 if (b == NULL) {
1758 UNALLOCA (x);
1759 UNALLOCA (A);
1760 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1761 return;
1762 }
1763#endif
1764 ALLOCA (dReal,w,n*sizeof(dReal));
1765#ifdef dUSE_MALLOC_FOR_ALLOCA
1766 if (w == NULL) {
1767 UNALLOCA (b);
1768 UNALLOCA (x);
1769 UNALLOCA (A);
1770 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1771 return;
1772 }
1773#endif
1774 ALLOCA (dReal,lo,n*sizeof(dReal));
1775#ifdef dUSE_MALLOC_FOR_ALLOCA
1776 if (lo == NULL) {
1777 UNALLOCA (w);
1778 UNALLOCA (b);
1779 UNALLOCA (x);
1780 UNALLOCA (A);
1781 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1782 return;
1783 }
1784#endif
1785 ALLOCA (dReal,hi,n*sizeof(dReal));
1786#ifdef dUSE_MALLOC_FOR_ALLOCA
1787 if (hi == NULL) {
1788 UNALLOCA (lo);
1789 UNALLOCA (w);
1790 UNALLOCA (b);
1791 UNALLOCA (x);
1792 UNALLOCA (A);
1793 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1794 return;
1795 }
1796#endif
1797
1798 ALLOCA (dReal,A2,n*nskip*sizeof(dReal));
1799#ifdef dUSE_MALLOC_FOR_ALLOCA
1800 if (A2 == NULL) {
1801 UNALLOCA (hi);
1802 UNALLOCA (lo);
1803 UNALLOCA (w);
1804 UNALLOCA (b);
1805 UNALLOCA (x);
1806 UNALLOCA (A);
1807 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1808 return;
1809 }
1810#endif
1811 ALLOCA (dReal,b2,n*sizeof(dReal));
1812#ifdef dUSE_MALLOC_FOR_ALLOCA
1813 if (b2 == NULL) {
1814 UNALLOCA (A2);
1815 UNALLOCA (hi);
1816 UNALLOCA (lo);
1817 UNALLOCA (w);
1818 UNALLOCA (b);
1819 UNALLOCA (x);
1820 UNALLOCA (A);
1821 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1822 return;
1823 }
1824#endif
1825 ALLOCA (dReal,lo2,n*sizeof(dReal));
1826#ifdef dUSE_MALLOC_FOR_ALLOCA
1827 if (lo2 == NULL) {
1828 UNALLOCA (b2);
1829 UNALLOCA (A2);
1830 UNALLOCA (hi);
1831 UNALLOCA (lo);
1832 UNALLOCA (w);
1833 UNALLOCA (b);
1834 UNALLOCA (x);
1835 UNALLOCA (A);
1836 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1837 return;
1838 }
1839#endif
1840 ALLOCA (dReal,hi2,n*sizeof(dReal));
1841#ifdef dUSE_MALLOC_FOR_ALLOCA
1842 if (hi2 == NULL) {
1843 UNALLOCA (lo2);
1844 UNALLOCA (b2);
1845 UNALLOCA (A2);
1846 UNALLOCA (hi);
1847 UNALLOCA (lo);
1848 UNALLOCA (w);
1849 UNALLOCA (b);
1850 UNALLOCA (x);
1851 UNALLOCA (A);
1852 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1853 return;
1854 }
1855#endif
1856 ALLOCA (dReal,tmp1,n*sizeof(dReal));
1857#ifdef dUSE_MALLOC_FOR_ALLOCA
1858 if (tmp1 == NULL) {
1859 UNALLOCA (hi2);
1860 UNALLOCA (lo2);
1861 UNALLOCA (b2);
1862 UNALLOCA (A2);
1863 UNALLOCA (hi);
1864 UNALLOCA (lo);
1865 UNALLOCA (w);
1866 UNALLOCA (b);
1867 UNALLOCA (x);
1868 UNALLOCA (A);
1869 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1870 return;
1871 }
1872#endif
1873 ALLOCA (dReal,tmp2,n*sizeof(dReal));
1874#ifdef dUSE_MALLOC_FOR_ALLOCA
1875 if (tmp2 == NULL) {
1876 UNALLOCA (tmp1);
1877 UNALLOCA (hi2);
1878 UNALLOCA (lo2);
1879 UNALLOCA (b2);
1880 UNALLOCA (A2);
1881 UNALLOCA (hi);
1882 UNALLOCA (lo);
1883 UNALLOCA (w);
1884 UNALLOCA (b);
1885 UNALLOCA (x);
1886 UNALLOCA (A);
1887 dMemoryFlag = d_MEMORY_OUT_OF_MEMORY;
1888 return;
1889 }
1890#endif
1891
1892 double total_time = 0;
1893 for (int count=0; count < 1000; count++) {
1894
1895 // form (A,b) = a random positive definite LCP problem
1896 dMakeRandomMatrix (A2,n,n,1.0);
1897 dMultiply2 (A,A2,A2,n,n,n);
1898 dMakeRandomMatrix (x,n,1,1.0);
1899 dMultiply0 (b,A,x,n,n,1);
1900 for (i=0; i<n; i++) b[i] += (dRandReal()*REAL(0.2))-REAL(0.1);
1901
1902 // choose `nub' in the range 0..n-1
1903 int nub = 50; //dRandInt (n);
1904
1905 // make limits
1906 for (i=0; i<nub; i++) lo[i] = -dInfinity;
1907 for (i=0; i<nub; i++) hi[i] = dInfinity;
1908 //for (i=nub; i<n; i++) lo[i] = 0;
1909 //for (i=nub; i<n; i++) hi[i] = dInfinity;
1910 //for (i=nub; i<n; i++) lo[i] = -dInfinity;
1911 //for (i=nub; i<n; i++) hi[i] = 0;
1912 for (i=nub; i<n; i++) lo[i] = -(dRandReal()*REAL(1.0))-REAL(0.01);
1913 for (i=nub; i<n; i++) hi[i] = (dRandReal()*REAL(1.0))+REAL(0.01);
1914
1915 // set a few limits to lo=hi=0
1916 /*
1917 for (i=0; i<10; i++) {
1918 int j = dRandInt (n-nub) + nub;
1919 lo[j] = 0;
1920 hi[j] = 0;
1921 }
1922 */
1923
1924 // solve the LCP. we must make copy of A,b,lo,hi (A2,b2,lo2,hi2) for
1925 // SolveLCP() to permute. also, we'll clear the upper triangle of A2 to
1926 // ensure that it doesn't get referenced (if it does, the answer will be
1927 // wrong).
1928
1929 memcpy (A2,A,n*nskip*sizeof(dReal));
1930 dClearUpperTriangle (A2,n);
1931 memcpy (b2,b,n*sizeof(dReal));
1932 memcpy (lo2,lo,n*sizeof(dReal));
1933 memcpy (hi2,hi,n*sizeof(dReal));
1934 dSetZero (x,n);
1935 dSetZero (w,n);
1936
1937 dStopwatch sw;
1938 dStopwatchReset (&sw);
1939 dStopwatchStart (&sw);
1940
1941 dSolveLCP (n,A2,x,b2,w,nub,lo2,hi2,0);
1942#ifdef dUSE_MALLOC_FOR_ALLOCA
1943 if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) {
1944 UNALLOCA (tmp2);
1945 UNALLOCA (tmp1);
1946 UNALLOCA (hi2);
1947 UNALLOCA (lo2);
1948 UNALLOCA (b2);
1949 UNALLOCA (A2);
1950 UNALLOCA (hi);
1951 UNALLOCA (lo);
1952 UNALLOCA (w);
1953 UNALLOCA (b);
1954 UNALLOCA (x);
1955 UNALLOCA (A);
1956 return;
1957 }
1958#endif
1959
1960 dStopwatchStop (&sw);
1961 double time = dStopwatchTime(&sw);
1962 total_time += time;
1963 double average = total_time / double(count+1) * 1000.0;
1964
1965 // check the solution
1966
1967 dMultiply0 (tmp1,A,x,n,n,1);
1968 for (i=0; i<n; i++) tmp2[i] = b[i] + w[i];
1969 dReal diff = dMaxDifference (tmp1,tmp2,n,1);
1970 // printf ("\tA*x = b+w, maximum difference = %.6e - %s (1)\n",diff,
1971 // diff > tol ? "FAILED" : "passed");
1972 if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff);
1973 int n1=0,n2=0,n3=0;
1974 for (i=0; i<n; i++) {
1975 if (x[i]==lo[i] && w[i] >= 0) {
1976 n1++; // ok
1977 }
1978 else if (x[i]==hi[i] && w[i] <= 0) {
1979 n2++; // ok
1980 }
1981 else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) {
1982 n3++; // ok
1983 }
1984 else {
1985 dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i,
1986 x[i],w[i],lo[i],hi[i]);
1987 }
1988 }
1989
1990 // pacifier
1991 printf ("passed: NL=%3d NH=%3d C=%3d ",n1,n2,n3);
1992 printf ("time=%10.3f ms avg=%10.4f\n",time * 1000.0,average);
1993 }
1994
1995 UNALLOCA (A);
1996 UNALLOCA (x);
1997 UNALLOCA (b);
1998 UNALLOCA (w);
1999 UNALLOCA (lo);
2000 UNALLOCA (hi);
2001 UNALLOCA (A2);
2002 UNALLOCA (b2);
2003 UNALLOCA (lo2);
2004 UNALLOCA (hi2);
2005 UNALLOCA (tmp1);
2006 UNALLOCA (tmp2);
2007}