From 79eca25c945a535a7a0325999034bae17da92412 Mon Sep 17 00:00:00 2001 From: dan miller Date: Fri, 19 Oct 2007 05:15:33 +0000 Subject: resubmitting ode --- libraries/ode-0.9/ode/src/lcp.cpp | 2007 +++++++++++++++++++++++++++++++++++++ 1 file changed, 2007 insertions(+) create mode 100644 libraries/ode-0.9/ode/src/lcp.cpp (limited to 'libraries/ode-0.9/ode/src/lcp.cpp') diff --git a/libraries/ode-0.9/ode/src/lcp.cpp b/libraries/ode-0.9/ode/src/lcp.cpp new file mode 100644 index 0000000..d03d3e8 --- /dev/null +++ b/libraries/ode-0.9/ode/src/lcp.cpp @@ -0,0 +1,2007 @@ +/************************************************************************* + * * + * Open Dynamics Engine, Copyright (C) 2001,2002 Russell L. Smith. * + * All rights reserved. Email: russ@q12.org Web: www.q12.org * + * * + * This library is free software; you can redistribute it and/or * + * modify it under the terms of EITHER: * + * (1) The GNU Lesser General Public License as published by the Free * + * Software Foundation; either version 2.1 of the License, or (at * + * your option) any later version. The text of the GNU Lesser * + * General Public License is included with this library in the * + * file LICENSE.TXT. * + * (2) The BSD-style license that is included with this library in * + * the file LICENSE-BSD.TXT. * + * * + * This library is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the files * + * LICENSE.TXT and LICENSE-BSD.TXT for more details. * + * * + *************************************************************************/ + +/* + + +THE ALGORITHM +------------- + +solve A*x = b+w, with x and w subject to certain LCP conditions. +each x(i),w(i) must lie on one of the three line segments in the following +diagram. each line segment corresponds to one index set : + + w(i) + /|\ | : + | | : + | |i in N : + w>0 | |state[i]=0 : + | | : + | | : i in C + w=0 + +-----------------------+ + | : | + | : | + w<0 | : |i in N + | : |state[i]=1 + | : | + | : | + +-------|-----------|-----------|----------> x(i) + lo 0 hi + +the Dantzig algorithm proceeds as follows: + for i=1:n + * if (x(i),w(i)) is not on the line, push x(i) and w(i) positive or + negative towards the line. as this is done, the other (x(j),w(j)) + for j= 0. this makes the algorithm a bit +simpler, because the starting point for x(i),w(i) is always on the dotted +line x=0 and x will only ever increase in one direction, so it can only hit +two out of the three line segments. + + +NOTES +----- + +this is an implementation of "lcp_dantzig2_ldlt.m" and "lcp_dantzig_lohi.m". +the implementation is split into an LCP problem object (dLCP) and an LCP +driver function. most optimization occurs in the dLCP object. + +a naive implementation of the algorithm requires either a lot of data motion +or a lot of permutation-array lookup, because we are constantly re-ordering +rows and columns. to avoid this and make a more optimized algorithm, a +non-trivial data structure is used to represent the matrix A (this is +implemented in the fast version of the dLCP object). + +during execution of this algorithm, some indexes in A are clamped (set C), +some are non-clamped (set N), and some are "don't care" (where x=0). +A,x,b,w (and other problem vectors) are permuted such that the clamped +indexes are first, the unclamped indexes are next, and the don't-care +indexes are last. this permutation is recorded in the array `p'. +initially p = 0..n-1, and as the rows and columns of A,x,b,w are swapped, +the corresponding elements of p are swapped. + +because the C and N elements are grouped together in the rows of A, we can do +lots of work with a fast dot product function. if A,x,etc were not permuted +and we only had a permutation array, then those dot products would be much +slower as we would have a permutation array lookup in some inner loops. + +A is accessed through an array of row pointers, so that element (i,j) of the +permuted matrix is A[i][j]. this makes row swapping fast. for column swapping +we still have to actually move the data. + +during execution of this algorithm we maintain an L*D*L' factorization of +the clamped submatrix of A (call it `AC') which is the top left nC*nC +submatrix of A. there are two ways we could arrange the rows/columns in AC. + +(1) AC is always permuted such that L*D*L' = AC. this causes a problem + when a row/column is removed from C, because then all the rows/columns of A + between the deleted index and the end of C need to be rotated downward. + this results in a lot of data motion and slows things down. +(2) L*D*L' is actually a factorization of a *permutation* of AC (which is + itself a permutation of the underlying A). this is what we do - the + permutation is recorded in the vector C. call this permutation A[C,C]. + when a row/column is removed from C, all we have to do is swap two + rows/columns and manipulate C. + +*/ + +#include +#include "lcp.h" +#include +#include +#include "mat.h" // for testing +#include // for testing + +//*************************************************************************** +// code generation parameters + +// LCP debugging (mosty for fast dLCP) - this slows things down a lot +//#define DEBUG_LCP + +//#define dLCP_SLOW // use slow dLCP object +#define dLCP_FAST // use fast dLCP object + +// option 1 : matrix row pointers (less data copying) +#define ROWPTRS +#define ATYPE dReal ** +#define AROW(i) (A[i]) + +// option 2 : no matrix row pointers (slightly faster inner loops) +//#define NOROWPTRS +//#define ATYPE dReal * +//#define AROW(i) (A+(i)*nskip) + +// use protected, non-stack memory allocation system + +#ifdef dUSE_MALLOC_FOR_ALLOCA +extern unsigned int dMemoryFlag; + +#define ALLOCA(t,v,s) t* v = (t*) malloc(s) +#define UNALLOCA(t) free(t) + +#else + +#define ALLOCA(t,v,s) t* v =(t*)dALLOCA16(s) +#define UNALLOCA(t) /* nothing */ + +#endif + +#define NUB_OPTIMIZATIONS + +//*************************************************************************** + +// swap row/column i1 with i2 in the n*n matrix A. the leading dimension of +// A is nskip. this only references and swaps the lower triangle. +// if `do_fast_row_swaps' is nonzero and row pointers are being used, then +// rows will be swapped by exchanging row pointers. otherwise the data will +// be copied. + +static void swapRowsAndCols (ATYPE A, int n, int i1, int i2, int nskip, + int do_fast_row_swaps) +{ + int i; + dAASSERT (A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n && + nskip >= n && i1 < i2); + +# ifdef ROWPTRS + for (i=i1+1; i 0) { + memcpy (tmprow,A+i1*nskip,i1*sizeof(dReal)); + memcpy (A+i1*nskip,A+i2*nskip,i1*sizeof(dReal)); + memcpy (A+i2*nskip,tmprow,i1*sizeof(dReal)); + } + for (i=i1+1; i0 && i1 >=0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && + i1 <= i2); + if (i1==i2) return; + swapRowsAndCols (A,n,i1,i2,nskip,do_fast_row_swaps); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) + return; +#endif + tmp = x[i1]; + x[i1] = x[i2]; + x[i2] = tmp; + tmp = b[i1]; + b[i1] = b[i2]; + b[i2] = tmp; + tmp = w[i1]; + w[i1] = w[i2]; + w[i2] = tmp; + tmp = lo[i1]; + lo[i1] = lo[i2]; + lo[i2] = tmp; + tmp = hi[i1]; + hi[i1] = hi[i2]; + hi[i2] = tmp; + tmpi = p[i1]; + p[i1] = p[i2]; + p[i2] = tmpi; + tmpi = state[i1]; + state[i1] = state[i2]; + state[i2] = tmpi; + if (findex) { + tmpi = findex[i1]; + findex[i1] = findex[i2]; + findex[i2] = tmpi; + } +} + + +// for debugging - check that L,d is the factorization of A[C,C]. +// A[C,C] has size nC*nC and leading dimension nskip. +// L has size nC*nC and leading dimension nskip. +// d has size nC. + +#ifdef DEBUG_LCP + +static void checkFactorization (ATYPE A, dReal *_L, dReal *_d, + int nC, int *C, int nskip) +{ + int i,j; + if (nC==0) return; + + // get A1=A, copy the lower triangle to the upper triangle, get A2=A[C,C] + dMatrix A1 (nC,nC); + for (i=0; i 1e-8) + dDebug (0,"L*D*L' check, maximum difference = %.6e\n",diff); +} + +#endif + + +// for debugging + +#ifdef DEBUG_LCP + +static void checkPermutations (int i, int n, int nC, int nN, int *p, int *C) +{ + int j,k; + dIASSERT (nC>=0 && nN>=0 && (nC+nN)==i && i < n); + for (k=0; k= 0 && p[k] < i); + for (k=i; k C,N; // index sets + int last_i_for_solve1; // last i value given to solve1 + + dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w, + dReal *_lo, dReal *_hi, dReal *_L, dReal *_d, + dReal *_Dell, dReal *_ell, dReal *_tmp, + int *_state, int *_findex, int *_p, int *_C, dReal **Arows); + // the constructor is given an initial problem description (A,x,b,w) and + // space for other working data (which the caller may allocate on the stack). + // some of this data is specific to the fast dLCP implementation. + // the matrices A and L have size n*n, vectors have size n*1. + // A represents a symmetric matrix but only the lower triangle is valid. + // `nub' is the number of unbounded indexes at the start. all the indexes + // 0..nub-1 will be put into C. + + ~dLCP(); + + int getNub() { return nub; } + // return the value of `nub'. the constructor may want to change it, + // so the caller should find out its new value. + + // transfer functions: transfer index i to the given set (C or N). indexes + // less than `nub' can never be given. A,x,b,w,etc may be permuted by these + // functions, the caller must be robust to this. + + void transfer_i_to_C (int i); + // this assumes C and N span 1:i-1. this also assumes that solve1() has + // been recently called for the same i without any other transfer + // functions in between (thereby allowing some data reuse for the fast + // implementation). + void transfer_i_to_N (int i); + // this assumes C and N span 1:i-1. + void transfer_i_from_N_to_C (int i); + void transfer_i_from_C_to_N (int i); + + int numC(); + int numN(); + // return the number of indexes in set C/N + + int indexC (int i); + int indexN (int i); + // return index i in set C/N. + + // accessor and arithmetic functions. Aij translates as A(i,j), etc. + // make sure that only the lower triangle of A is ever referenced. + + dReal Aii (int i); + dReal AiC_times_qC (int i, dReal *q); + dReal AiN_times_qN (int i, dReal *q); // for all Nj + void pN_equals_ANC_times_qC (dReal *p, dReal *q); // for all Nj + void pN_plusequals_ANi (dReal *p, int i, int sign=1); + // for all Nj. sign = +1,-1. assumes i > maximum index in N. + void pC_plusequals_s_times_qC (dReal *p, dReal s, dReal *q); + void pN_plusequals_s_times_qN (dReal *p, dReal s, dReal *q); // for all Nj + void solve1 (dReal *a, int i, int dir=1, int only_transfer=0); + // get a(C) = - dir * A(C,C) \ A(C,i). dir must be +/- 1. + // the fast version of this function computes some data that is needed by + // transfer_i_to_C(). if only_transfer is nonzero then this function + // *only* computes that data, it does not set a(C). + + void unpermute(); + // call this at the end of the LCP function. if the x/w values have been + // permuted then this will unscramble them. +}; + + +dLCP::dLCP (int _n, int _nub, dReal *_Adata, dReal *_x, dReal *_b, dReal *_w, + dReal *_lo, dReal *_hi, dReal *_L, dReal *_d, + dReal *_Dell, dReal *_ell, dReal *_tmp, + int *_state, int *_findex, int *_p, int *_C, dReal **Arows) +{ + dUASSERT (_findex==0,"slow dLCP object does not support findex array"); + + n = _n; + nub = _nub; + Adata = _Adata; + A = 0; + x = _x; + b = _b; + w = _w; + lo = _lo; + hi = _hi; + nskip = dPAD(n); + dSetZero (x,n); + last_i_for_solve1 = -1; + + int i,j; + C.setSize (n); + N.setSize (n); + for (i=0; i0, put all indexes 0..nub-1 into C and solve for x + if (nub > 0) { + for (i=0; i= i) dDebug (0,"N assumption violated"); + if (sign > 0) { + for (k=0; k 0) { + for (ii=0; ii nub + if (nub < n) { + for (k=0; k<100; k++) { + int i1,i2; + do { + i1 = dRandInt(n-nub)+nub; + i2 = dRandInt(n-nub)+nub; + } + while (i1 > i2); + //printf ("--> %d %d\n",i1,i2); + swapProblem (A,x,b,w,lo,hi,p,state,findex,n,i1,i2,nskip,0); + } + } + */ + + // permute the problem so that *all* the unbounded variables are at the + // start, i.e. look for unbounded variables not included in `nub'. we can + // potentially push up `nub' this way and get a bigger initial factorization. + // note that when we swap rows/cols here we must not just swap row pointers, + // as the initial factorization relies on the data being all in one chunk. + // variables that have findex >= 0 are *not* considered to be unbounded even + // if lo=-inf and hi=inf - this is because these limits may change during the + // solution process. + + for (k=nub; k= 0) continue; + if (lo[k]==-dInfinity && hi[k]==dInfinity) { + swapProblem (A,x,b,w,lo,hi,p,state,findex,n,nub,k,nskip,0); + nub++; + } + } + + // if there are unbounded variables at the start, factorize A up to that + // point and solve for x. this puts all indexes 0..nub-1 into C. + if (nub > 0) { + for (k=0; k nub such that all findex variables are at the end + if (findex) { + int num_at_end = 0; + for (k=n-1; k >= nub; k--) { + if (findex[k] >= 0) { + swapProblem (A,x,b,w,lo,hi,p,state,findex,n,k,n-1-num_at_end,nskip,1); + num_at_end++; + } + } + } + + // print info about indexes + /* + for (k=0; k 0) { + // ell,Dell were computed by solve1(). note, ell = D \ L1solve (L,A(i,C)) + for (j=0; j 0) { + dReal *aptr = AROW(i); +# ifdef NUB_OPTIMIZATIONS + // if nub>0, initial part of aptr unpermuted + for (j=0; j 0) { + for (int i=0; i 0) { + dReal *aptr = AROW(i); +# ifdef NUB_OPTIMIZATIONS + // if nub>0, initial part of aptr[] is guaranteed unpermuted + for (j=0; j 0) { + for (j=0; j0 && A && x && b && w && nub == 0); + + int i,k; + int nskip = dPAD(n); + ALLOCA (dReal,L,n*nskip*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (L == NULL) { + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,d,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (d == NULL) { + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,delta_x,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (delta_x == NULL) { + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,delta_w,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (delta_w == NULL) { + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,Dell,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (Dell == NULL) { + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,ell,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (ell == NULL) { + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,tmp,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (tmp == NULL) { + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal*,Arows,n*sizeof(dReal*)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (Arows == NULL) { + UNALLOCA(tmp); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (int,p,n*sizeof(int)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (p == NULL) { + UNALLOCA(Arows); + UNALLOCA(tmp); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (int,C,n*sizeof(int)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (C == NULL) { + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(tmp); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (int,dummy,n*sizeof(int)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (dummy == NULL) { + UNALLOCA(C); + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(tmp); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + + + dLCP lcp (n,0,A,x,b,w,tmp,tmp,L,d,Dell,ell,tmp,dummy,dummy,p,C,Arows); + nub = lcp.getNub(); + + for (i=0; i= 0) { + lcp.transfer_i_to_N (i); + } + else { + for (;;) { + // compute: delta_x(C) = -A(C,C)\A(C,i) + dSetZero (delta_x,n); + lcp.solve1 (delta_x,i); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) { + UNALLOCA(dummy); + UNALLOCA(C); + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(tmp); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + return; + } +#endif + delta_x[i] = 1; + + // compute: delta_w = A*delta_x + dSetZero (delta_w,n); + lcp.pN_equals_ANC_times_qC (delta_w,delta_x); + lcp.pN_plusequals_ANi (delta_w,i); + delta_w[i] = lcp.AiC_times_qC (i,delta_x) + lcp.Aii(i); + + // find index to switch + int si = i; // si = switch index + int si_in_N = 0; // set to 1 if si in N + dReal s = -w[i]/delta_w[i]; + + if (s <= 0) { + dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s); + if (i < (n-1)) { + dSetZero (x+i,n-i); + dSetZero (w+i,n-i); + } + goto done; + } + + for (k=0; k < lcp.numN(); k++) { + if (delta_w[lcp.indexN(k)] < 0) { + dReal s2 = -w[lcp.indexN(k)] / delta_w[lcp.indexN(k)]; + if (s2 < s) { + s = s2; + si = lcp.indexN(k); + si_in_N = 1; + } + } + } + for (k=0; k < lcp.numC(); k++) { + if (delta_x[lcp.indexC(k)] < 0) { + dReal s2 = -x[lcp.indexC(k)] / delta_x[lcp.indexC(k)]; + if (s2 < s) { + s = s2; + si = lcp.indexC(k); + si_in_N = 0; + } + } + } + + // apply x = x + s * delta_x + lcp.pC_plusequals_s_times_qC (x,s,delta_x); + x[i] += s; + lcp.pN_plusequals_s_times_qN (w,s,delta_w); + w[i] += s * delta_w[i]; + + // switch indexes between sets if necessary + if (si==i) { + w[i] = 0; + lcp.transfer_i_to_C (i); + break; + } + if (si_in_N) { + w[si] = 0; + lcp.transfer_i_from_N_to_C (si); + } + else { + x[si] = 0; + lcp.transfer_i_from_C_to_N (si); + } + } + } + } + + done: + lcp.unpermute(); + + UNALLOCA (L); + UNALLOCA (d); + UNALLOCA (delta_x); + UNALLOCA (delta_w); + UNALLOCA (Dell); + UNALLOCA (ell); + UNALLOCA (tmp); + UNALLOCA (Arows); + UNALLOCA (p); + UNALLOCA (C); + UNALLOCA (dummy); +} + +//*************************************************************************** +// an optimized Dantzig LCP driver routine for the lo-hi LCP problem. + +void dSolveLCP (int n, dReal *A, dReal *x, dReal *b, + dReal *w, int nub, dReal *lo, dReal *hi, int *findex) +{ + dAASSERT (n>0 && A && x && b && w && lo && hi && nub >= 0 && nub <= n); + + int i,k,hit_first_friction_index = 0; + int nskip = dPAD(n); + + // if all the variables are unbounded then we can just factor, solve, + // and return + if (nub >= n) { + dFactorLDLT (A,w,n,nskip); // use w for d + dSolveLDLT (A,w,b,n,nskip); + memcpy (x,b,n*sizeof(dReal)); + dSetZero (w,n); + + return; + } +# ifndef dNODEBUG + // check restrictions on lo and hi + for (k=0; k= 0); +# endif + ALLOCA (dReal,L,n*nskip*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (L == NULL) { + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,d,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (d == NULL) { + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,delta_x,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (delta_x == NULL) { + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,delta_w,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (delta_w == NULL) { + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,Dell,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (Dell == NULL) { + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,ell,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (ell == NULL) { + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal*,Arows,n*sizeof(dReal*)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (Arows == NULL) { + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (int,p,n*sizeof(int)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (p == NULL) { + UNALLOCA(Arows); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (int,C,n*sizeof(int)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (C == NULL) { + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + + int dir; + dReal dirf; + + // for i in N, state[i] is 0 if x(i)==lo(i) or 1 if x(i)==hi(i) + ALLOCA (int,state,n*sizeof(int)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (state == NULL) { + UNALLOCA(C); + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + + // create LCP object. note that tmp is set to delta_w to save space, this + // optimization relies on knowledge of how tmp is used, so be careful! + dLCP *lcp=new dLCP(n,nub,A,x,b,w,lo,hi,L,d,Dell,ell,delta_w,state,findex,p,C,Arows); + nub = lcp->getNub(); + + // loop over all indexes nub..n-1. for index i, if x(i),w(i) satisfy the + // LCP conditions then i is added to the appropriate index set. otherwise + // x(i),w(i) is driven either +ve or -ve to force it to the valid region. + // as we drive x(i), x(C) is also adjusted to keep w(C) at zero. + // while driving x(i) we maintain the LCP conditions on the other variables + // 0..i-1. we do this by watching out for other x(i),w(i) values going + // outside the valid region, and then switching them between index sets + // when that happens. + + for (i=nub; i= 0) { + // un-permute x into delta_w, which is not being used at the moment + for (k=0; kAiC_times_qC (i,x) + lcp->AiN_times_qN (i,x) - b[i]; + + // if lo=hi=0 (which can happen for tangential friction when normals are + // 0) then the index will be assigned to set N with some state. however, + // set C's line has zero size, so the index will always remain in set N. + // with the "normal" switching logic, if w changed sign then the index + // would have to switch to set C and then back to set N with an inverted + // state. this is pointless, and also computationally expensive. to + // prevent this from happening, we use the rule that indexes with lo=hi=0 + // will never be checked for set changes. this means that the state for + // these indexes may be incorrect, but that doesn't matter. + + // see if x(i),w(i) is in a valid region + if (lo[i]==0 && w[i] >= 0) { + lcp->transfer_i_to_N (i); + state[i] = 0; + } + else if (hi[i]==0 && w[i] <= 0) { + lcp->transfer_i_to_N (i); + state[i] = 1; + } + else if (w[i]==0) { + // this is a degenerate case. by the time we get to this test we know + // that lo != 0, which means that lo < 0 as lo is not allowed to be +ve, + // and similarly that hi > 0. this means that the line segment + // corresponding to set C is at least finite in extent, and we are on it. + // NOTE: we must call lcp->solve1() before lcp->transfer_i_to_C() + lcp->solve1 (delta_x,i,0,1); + +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) { + UNALLOCA(state); + UNALLOCA(C); + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + return; + } +#endif + + lcp->transfer_i_to_C (i); + } + else { + // we must push x(i) and w(i) + for (;;) { + // find direction to push on x(i) + if (w[i] <= 0) { + dir = 1; + dirf = REAL(1.0); + } + else { + dir = -1; + dirf = REAL(-1.0); + } + + // compute: delta_x(C) = -dir*A(C,C)\A(C,i) + lcp->solve1 (delta_x,i,dir); + +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (dMemoryFlag == d_MEMORY_OUT_OF_MEMORY) { + UNALLOCA(state); + UNALLOCA(C); + UNALLOCA(p); + UNALLOCA(Arows); + UNALLOCA(ell); + UNALLOCA(Dell); + UNALLOCA(delta_w); + UNALLOCA(delta_x); + UNALLOCA(d); + UNALLOCA(L); + return; + } +#endif + + // note that delta_x[i] = dirf, but we wont bother to set it + + // compute: delta_w = A*delta_x ... note we only care about + // delta_w(N) and delta_w(i), the rest is ignored + lcp->pN_equals_ANC_times_qC (delta_w,delta_x); + lcp->pN_plusequals_ANi (delta_w,i,dir); + delta_w[i] = lcp->AiC_times_qC (i,delta_x) + lcp->Aii(i)*dirf; + + // find largest step we can take (size=s), either to drive x(i),w(i) + // to the valid LCP region or to drive an already-valid variable + // outside the valid region. + + int cmd = 1; // index switching command + int si = 0; // si = index to switch if cmd>3 + dReal s = -w[i]/delta_w[i]; + if (dir > 0) { + if (hi[i] < dInfinity) { + dReal s2 = (hi[i]-x[i])/dirf; // step to x(i)=hi(i) + if (s2 < s) { + s = s2; + cmd = 3; + } + } + } + else { + if (lo[i] > -dInfinity) { + dReal s2 = (lo[i]-x[i])/dirf; // step to x(i)=lo(i) + if (s2 < s) { + s = s2; + cmd = 2; + } + } + } + + for (k=0; k < lcp->numN(); k++) { + if ((state[lcp->indexN(k)]==0 && delta_w[lcp->indexN(k)] < 0) || + (state[lcp->indexN(k)]!=0 && delta_w[lcp->indexN(k)] > 0)) { + // don't bother checking if lo=hi=0 + if (lo[lcp->indexN(k)] == 0 && hi[lcp->indexN(k)] == 0) continue; + dReal s2 = -w[lcp->indexN(k)] / delta_w[lcp->indexN(k)]; + if (s2 < s) { + s = s2; + cmd = 4; + si = lcp->indexN(k); + } + } + } + + for (k=nub; k < lcp->numC(); k++) { + if (delta_x[lcp->indexC(k)] < 0 && lo[lcp->indexC(k)] > -dInfinity) { + dReal s2 = (lo[lcp->indexC(k)]-x[lcp->indexC(k)]) / + delta_x[lcp->indexC(k)]; + if (s2 < s) { + s = s2; + cmd = 5; + si = lcp->indexC(k); + } + } + if (delta_x[lcp->indexC(k)] > 0 && hi[lcp->indexC(k)] < dInfinity) { + dReal s2 = (hi[lcp->indexC(k)]-x[lcp->indexC(k)]) / + delta_x[lcp->indexC(k)]; + if (s2 < s) { + s = s2; + cmd = 6; + si = lcp->indexC(k); + } + } + } + + //static char* cmdstring[8] = {0,"->C","->NL","->NH","N->C", + // "C->NL","C->NH"}; + //printf ("cmd=%d (%s), si=%d\n",cmd,cmdstring[cmd],(cmd>3) ? si : i); + + // if s <= 0 then we've got a problem. if we just keep going then + // we're going to get stuck in an infinite loop. instead, just cross + // our fingers and exit with the current solution. + if (s <= 0) { + dMessage (d_ERR_LCP, "LCP internal error, s <= 0 (s=%.4e)",s); + if (i < (n-1)) { + dSetZero (x+i,n-i); + dSetZero (w+i,n-i); + } + goto done; + } + + // apply x = x + s * delta_x + lcp->pC_plusequals_s_times_qC (x,s,delta_x); + x[i] += s * dirf; + + // apply w = w + s * delta_w + lcp->pN_plusequals_s_times_qN (w,s,delta_w); + w[i] += s * delta_w[i]; + + // switch indexes between sets if necessary + switch (cmd) { + case 1: // done + w[i] = 0; + lcp->transfer_i_to_C (i); + break; + case 2: // done + x[i] = lo[i]; + state[i] = 0; + lcp->transfer_i_to_N (i); + break; + case 3: // done + x[i] = hi[i]; + state[i] = 1; + lcp->transfer_i_to_N (i); + break; + case 4: // keep going + w[si] = 0; + lcp->transfer_i_from_N_to_C (si); + break; + case 5: // keep going + x[si] = lo[si]; + state[si] = 0; + lcp->transfer_i_from_C_to_N (si); + break; + case 6: // keep going + x[si] = hi[si]; + state[si] = 1; + lcp->transfer_i_from_C_to_N (si); + break; + } + + if (cmd <= 3) break; + } + } + } + + done: + lcp->unpermute(); + delete lcp; + + UNALLOCA (L); + UNALLOCA (d); + UNALLOCA (delta_x); + UNALLOCA (delta_w); + UNALLOCA (Dell); + UNALLOCA (ell); + UNALLOCA (Arows); + UNALLOCA (p); + UNALLOCA (C); + UNALLOCA (state); +} + +//*************************************************************************** +// accuracy and timing test + +extern "C" ODE_API void dTestSolveLCP() +{ + int n = 100; + int i,nskip = dPAD(n); +#ifdef dDOUBLE + const dReal tol = REAL(1e-9); +#endif +#ifdef dSINGLE + const dReal tol = REAL(1e-4); +#endif + printf ("dTestSolveLCP()\n"); + + ALLOCA (dReal,A,n*nskip*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (A == NULL) { + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,x,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (x == NULL) { + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,b,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (b == NULL) { + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,w,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (w == NULL) { + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,lo,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (lo == NULL) { + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,hi,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (hi == NULL) { + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + + ALLOCA (dReal,A2,n*nskip*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (A2 == NULL) { + UNALLOCA (hi); + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,b2,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (b2 == NULL) { + UNALLOCA (A2); + UNALLOCA (hi); + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,lo2,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (lo2 == NULL) { + UNALLOCA (b2); + UNALLOCA (A2); + UNALLOCA (hi); + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,hi2,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (hi2 == NULL) { + UNALLOCA (lo2); + UNALLOCA (b2); + UNALLOCA (A2); + UNALLOCA (hi); + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,tmp1,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (tmp1 == NULL) { + UNALLOCA (hi2); + UNALLOCA (lo2); + UNALLOCA (b2); + UNALLOCA (A2); + UNALLOCA (hi); + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + ALLOCA (dReal,tmp2,n*sizeof(dReal)); +#ifdef dUSE_MALLOC_FOR_ALLOCA + if (tmp2 == NULL) { + UNALLOCA (tmp1); + UNALLOCA (hi2); + UNALLOCA (lo2); + UNALLOCA (b2); + UNALLOCA (A2); + UNALLOCA (hi); + UNALLOCA (lo); + UNALLOCA (w); + UNALLOCA (b); + UNALLOCA (x); + UNALLOCA (A); + dMemoryFlag = d_MEMORY_OUT_OF_MEMORY; + return; + } +#endif + + double total_time = 0; + for (int count=0; count < 1000; count++) { + + // form (A,b) = a random positive definite LCP problem + dMakeRandomMatrix (A2,n,n,1.0); + dMultiply2 (A,A2,A2,n,n,n); + dMakeRandomMatrix (x,n,1,1.0); + dMultiply0 (b,A,x,n,n,1); + for (i=0; i tol ? "FAILED" : "passed"); + if (diff > tol) dDebug (0,"A*x = b+w, maximum difference = %.6e",diff); + int n1=0,n2=0,n3=0; + for (i=0; i= 0) { + n1++; // ok + } + else if (x[i]==hi[i] && w[i] <= 0) { + n2++; // ok + } + else if (x[i] >= lo[i] && x[i] <= hi[i] && w[i] == 0) { + n3++; // ok + } + else { + dDebug (0,"FAILED: i=%d x=%.4e w=%.4e lo=%.4e hi=%.4e",i, + x[i],w[i],lo[i],hi[i]); + } + } + + // pacifier + printf ("passed: NL=%3d NH=%3d C=%3d ",n1,n2,n3); + printf ("time=%10.3f ms avg=%10.4f\n",time * 1000.0,average); + } + + UNALLOCA (A); + UNALLOCA (x); + UNALLOCA (b); + UNALLOCA (w); + UNALLOCA (lo); + UNALLOCA (hi); + UNALLOCA (A2); + UNALLOCA (b2); + UNALLOCA (lo2); + UNALLOCA (hi2); + UNALLOCA (tmp1); + UNALLOCA (tmp2); +} -- cgit v1.1