diff options
author | dan miller | 2007-10-19 05:20:48 +0000 |
---|---|---|
committer | dan miller | 2007-10-19 05:20:48 +0000 |
commit | d48ea5bb797037069d641da41da0f195f0124491 (patch) | |
tree | 40ff433d94859d629aac933d5ec73b382f62ba1a /libraries/ode-0.9/OPCODE/Ice/IceRevisitedRadix.cpp | |
parent | dont ask (diff) | |
download | opensim-SC_OLD-d48ea5bb797037069d641da41da0f195f0124491.zip opensim-SC_OLD-d48ea5bb797037069d641da41da0f195f0124491.tar.gz opensim-SC_OLD-d48ea5bb797037069d641da41da0f195f0124491.tar.bz2 opensim-SC_OLD-d48ea5bb797037069d641da41da0f195f0124491.tar.xz |
one more for the gipper
Diffstat (limited to 'libraries/ode-0.9/OPCODE/Ice/IceRevisitedRadix.cpp')
-rw-r--r-- | libraries/ode-0.9/OPCODE/Ice/IceRevisitedRadix.cpp | 520 |
1 files changed, 520 insertions, 0 deletions
diff --git a/libraries/ode-0.9/OPCODE/Ice/IceRevisitedRadix.cpp b/libraries/ode-0.9/OPCODE/Ice/IceRevisitedRadix.cpp new file mode 100644 index 0000000..3e351da --- /dev/null +++ b/libraries/ode-0.9/OPCODE/Ice/IceRevisitedRadix.cpp | |||
@@ -0,0 +1,520 @@ | |||
1 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
2 | /** | ||
3 | * Contains source code from the article "Radix Sort Revisited". | ||
4 | * \file IceRevisitedRadix.cpp | ||
5 | * \author Pierre Terdiman | ||
6 | * \date April, 4, 2000 | ||
7 | */ | ||
8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
9 | |||
10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
11 | /** | ||
12 | * Revisited Radix Sort. | ||
13 | * This is my new radix routine: | ||
14 | * - it uses indices and doesn't recopy the values anymore, hence wasting less ram | ||
15 | * - it creates all the histograms in one run instead of four | ||
16 | * - it sorts words faster than dwords and bytes faster than words | ||
17 | * - it correctly sorts negative floating-point values by patching the offsets | ||
18 | * - it automatically takes advantage of temporal coherence | ||
19 | * - multiple keys support is a side effect of temporal coherence | ||
20 | * - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway] | ||
21 | * | ||
22 | * History: | ||
23 | * - 08.15.98: very first version | ||
24 | * - 04.04.00: recoded for the radix article | ||
25 | * - 12.xx.00: code lifting | ||
26 | * - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here) | ||
27 | * - 10.11.01: added local ram support | ||
28 | * - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting...... | ||
29 | * - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all. | ||
30 | * - ranks are not "reset" anymore, but implicit on first calls | ||
31 | * - 07.05.02: - offsets rewritten with one less indirection. | ||
32 | * - 11.03.02: - "bool" replaced with RadixHint enum | ||
33 | * | ||
34 | * \class RadixSort | ||
35 | * \author Pierre Terdiman | ||
36 | * \version 1.4 | ||
37 | * \date August, 15, 1998 | ||
38 | */ | ||
39 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
40 | |||
41 | /* | ||
42 | To do: | ||
43 | - add an offset parameter between two input values (avoid some data recopy sometimes) | ||
44 | - unroll ? asm ? | ||
45 | - 11 bits trick & 3 passes as Michael did | ||
46 | - prefetch stuff the day I have a P3 | ||
47 | - make a version with 16-bits indices ? | ||
48 | */ | ||
49 | |||
50 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
51 | // Precompiled Header | ||
52 | #include "Stdafx.h" | ||
53 | |||
54 | using namespace IceCore; | ||
55 | |||
56 | #define INVALIDATE_RANKS mCurrentSize|=0x80000000 | ||
57 | #define VALIDATE_RANKS mCurrentSize&=0x7fffffff | ||
58 | #define CURRENT_SIZE (mCurrentSize&0x7fffffff) | ||
59 | #define INVALID_RANKS (mCurrentSize&0x80000000) | ||
60 | |||
61 | #define CHECK_RESIZE(n) \ | ||
62 | if(n!=mPreviousSize) \ | ||
63 | { \ | ||
64 | if(n>mCurrentSize) Resize(n); \ | ||
65 | else ResetRanks(); \ | ||
66 | mPreviousSize = n; \ | ||
67 | } | ||
68 | |||
69 | #define CREATE_HISTOGRAMS(type, buffer) \ | ||
70 | /* Clear counters/histograms */ \ | ||
71 | ZeroMemory(mHistogram, 256*4*sizeof(udword)); \ | ||
72 | \ | ||
73 | /* Prepare to count */ \ | ||
74 | ubyte* p = (ubyte*)input; \ | ||
75 | ubyte* pe = &p[nb*4]; \ | ||
76 | udword* h0= &mHistogram[0]; /* Histogram for first pass (LSB) */ \ | ||
77 | udword* h1= &mHistogram[256]; /* Histogram for second pass */ \ | ||
78 | udword* h2= &mHistogram[512]; /* Histogram for third pass */ \ | ||
79 | udword* h3= &mHistogram[768]; /* Histogram for last pass (MSB) */ \ | ||
80 | \ | ||
81 | bool AlreadySorted = true; /* Optimism... */ \ | ||
82 | \ | ||
83 | if(INVALID_RANKS) \ | ||
84 | { \ | ||
85 | /* Prepare for temporal coherence */ \ | ||
86 | type* Running = (type*)buffer; \ | ||
87 | type PrevVal = *Running; \ | ||
88 | \ | ||
89 | while(p!=pe) \ | ||
90 | { \ | ||
91 | /* Read input buffer in previous sorted order */ \ | ||
92 | type Val = *Running++; \ | ||
93 | /* Check whether already sorted or not */ \ | ||
94 | if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \ | ||
95 | /* Update for next iteration */ \ | ||
96 | PrevVal = Val; \ | ||
97 | \ | ||
98 | /* Create histograms */ \ | ||
99 | h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \ | ||
100 | } \ | ||
101 | \ | ||
102 | /* If all input values are already sorted, we just have to return and leave the */ \ | ||
103 | /* previous list unchanged. That way the routine may take advantage of temporal */ \ | ||
104 | /* coherence, for example when used to sort transparent faces. */ \ | ||
105 | if(AlreadySorted) \ | ||
106 | { \ | ||
107 | mNbHits++; \ | ||
108 | for(udword i=0;i<nb;i++) mRanks[i] = i; \ | ||
109 | return *this; \ | ||
110 | } \ | ||
111 | } \ | ||
112 | else \ | ||
113 | { \ | ||
114 | /* Prepare for temporal coherence */ \ | ||
115 | udword* Indices = mRanks; \ | ||
116 | type PrevVal = (type)buffer[*Indices]; \ | ||
117 | \ | ||
118 | while(p!=pe) \ | ||
119 | { \ | ||
120 | /* Read input buffer in previous sorted order */ \ | ||
121 | type Val = (type)buffer[*Indices++]; \ | ||
122 | /* Check whether already sorted or not */ \ | ||
123 | if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \ | ||
124 | /* Update for next iteration */ \ | ||
125 | PrevVal = Val; \ | ||
126 | \ | ||
127 | /* Create histograms */ \ | ||
128 | h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \ | ||
129 | } \ | ||
130 | \ | ||
131 | /* If all input values are already sorted, we just have to return and leave the */ \ | ||
132 | /* previous list unchanged. That way the routine may take advantage of temporal */ \ | ||
133 | /* coherence, for example when used to sort transparent faces. */ \ | ||
134 | if(AlreadySorted) { mNbHits++; return *this; } \ | ||
135 | } \ | ||
136 | \ | ||
137 | /* Else there has been an early out and we must finish computing the histograms */ \ | ||
138 | while(p!=pe) \ | ||
139 | { \ | ||
140 | /* Create histograms without the previous overhead */ \ | ||
141 | h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \ | ||
142 | } | ||
143 | |||
144 | #define CHECK_PASS_VALIDITY(pass) \ | ||
145 | /* Shortcut to current counters */ \ | ||
146 | udword* CurCount = &mHistogram[pass<<8]; \ | ||
147 | \ | ||
148 | /* Reset flag. The sorting pass is supposed to be performed. (default) */ \ | ||
149 | bool PerformPass = true; \ | ||
150 | \ | ||
151 | /* Check pass validity */ \ | ||
152 | \ | ||
153 | /* If all values have the same byte, sorting is useless. */ \ | ||
154 | /* It may happen when sorting bytes or words instead of dwords. */ \ | ||
155 | /* This routine actually sorts words faster than dwords, and bytes */ \ | ||
156 | /* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \ | ||
157 | /* for words and O(n) for bytes. Running time for floats depends on actual values... */ \ | ||
158 | \ | ||
159 | /* Get first byte */ \ | ||
160 | ubyte UniqueVal = *(((ubyte*)input)+pass); \ | ||
161 | \ | ||
162 | /* Check that byte's counter */ \ | ||
163 | if(CurCount[UniqueVal]==nb) PerformPass=false; | ||
164 | |||
165 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
166 | /** | ||
167 | * Constructor. | ||
168 | */ | ||
169 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
170 | RadixSort::RadixSort() : mRanks(null), mRanks2(null), mCurrentSize(0), mTotalCalls(0), mNbHits(0) | ||
171 | { | ||
172 | #ifndef RADIX_LOCAL_RAM | ||
173 | // Allocate input-independent ram | ||
174 | mHistogram = new udword[256*4]; | ||
175 | mOffset = new udword[256]; | ||
176 | #endif | ||
177 | // Initialize indices | ||
178 | INVALIDATE_RANKS; | ||
179 | } | ||
180 | |||
181 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
182 | /** | ||
183 | * Destructor. | ||
184 | */ | ||
185 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
186 | RadixSort::~RadixSort() | ||
187 | { | ||
188 | // Release everything | ||
189 | #ifndef RADIX_LOCAL_RAM | ||
190 | DELETEARRAY(mOffset); | ||
191 | DELETEARRAY(mHistogram); | ||
192 | #endif | ||
193 | DELETEARRAY(mRanks2); | ||
194 | DELETEARRAY(mRanks); | ||
195 | } | ||
196 | |||
197 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
198 | /** | ||
199 | * Resizes the inner lists. | ||
200 | * \param nb [in] new size (number of dwords) | ||
201 | * \return true if success | ||
202 | */ | ||
203 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
204 | bool RadixSort::Resize(udword nb) | ||
205 | { | ||
206 | // Free previously used ram | ||
207 | DELETEARRAY(mRanks2); | ||
208 | DELETEARRAY(mRanks); | ||
209 | |||
210 | // Get some fresh one | ||
211 | mRanks = new udword[nb]; CHECKALLOC(mRanks); | ||
212 | mRanks2 = new udword[nb]; CHECKALLOC(mRanks2); | ||
213 | |||
214 | return true; | ||
215 | } | ||
216 | |||
217 | inline_ void RadixSort::CheckResize(udword nb) | ||
218 | { | ||
219 | udword CurSize = CURRENT_SIZE; | ||
220 | if(nb!=CurSize) | ||
221 | { | ||
222 | if(nb>CurSize) Resize(nb); | ||
223 | mCurrentSize = nb; | ||
224 | INVALIDATE_RANKS; | ||
225 | } | ||
226 | } | ||
227 | |||
228 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
229 | /** | ||
230 | * Main sort routine. | ||
231 | * This one is for integer values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data. | ||
232 | * \param input [in] a list of integer values to sort | ||
233 | * \param nb [in] number of values to sort, must be < 2^31 | ||
234 | * \param hint [in] RADIX_SIGNED to handle negative values, RADIX_UNSIGNED if you know your input buffer only contains positive values | ||
235 | * \return Self-Reference | ||
236 | */ | ||
237 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
238 | RadixSort& RadixSort::Sort(const udword* input, udword nb, RadixHint hint) | ||
239 | { | ||
240 | // Checkings | ||
241 | if(!input || !nb || nb&0x80000000) return *this; | ||
242 | |||
243 | // Stats | ||
244 | mTotalCalls++; | ||
245 | |||
246 | // Resize lists if needed | ||
247 | CheckResize(nb); | ||
248 | |||
249 | #ifdef RADIX_LOCAL_RAM | ||
250 | // Allocate histograms & offsets on the stack | ||
251 | udword mHistogram[256*4]; | ||
252 | // udword mOffset[256]; | ||
253 | udword* mLink[256]; | ||
254 | #endif | ||
255 | |||
256 | // Create histograms (counters). Counters for all passes are created in one run. | ||
257 | // Pros: read input buffer once instead of four times | ||
258 | // Cons: mHistogram is 4Kb instead of 1Kb | ||
259 | // We must take care of signed/unsigned values for temporal coherence.... I just | ||
260 | // have 2 code paths even if just a single opcode changes. Self-modifying code, someone? | ||
261 | if(hint==RADIX_UNSIGNED) { CREATE_HISTOGRAMS(udword, input); } | ||
262 | else { CREATE_HISTOGRAMS(sdword, input); } | ||
263 | |||
264 | // Compute #negative values involved if needed | ||
265 | udword NbNegativeValues = 0; | ||
266 | if(hint==RADIX_SIGNED) | ||
267 | { | ||
268 | // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128 | ||
269 | // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte, | ||
270 | // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers. | ||
271 | udword* h3= &mHistogram[768]; | ||
272 | for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part | ||
273 | } | ||
274 | |||
275 | // Radix sort, j is the pass number (0=LSB, 3=MSB) | ||
276 | for(udword j=0;j<4;j++) | ||
277 | { | ||
278 | CHECK_PASS_VALIDITY(j); | ||
279 | |||
280 | // Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is | ||
281 | // not a problem, numbers are correctly sorted anyway. | ||
282 | if(PerformPass) | ||
283 | { | ||
284 | // Should we care about negative values? | ||
285 | if(j!=3 || hint==RADIX_UNSIGNED) | ||
286 | { | ||
287 | // Here we deal with positive values only | ||
288 | |||
289 | // Create offsets | ||
290 | // mOffset[0] = 0; | ||
291 | // for(udword i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; | ||
292 | mLink[0] = mRanks2; | ||
293 | for(udword i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; | ||
294 | } | ||
295 | else | ||
296 | { | ||
297 | // This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place. | ||
298 | |||
299 | // Create biased offsets, in order for negative numbers to be sorted as well | ||
300 | // mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones | ||
301 | mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones | ||
302 | // for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers | ||
303 | for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers | ||
304 | |||
305 | // Fixing the wrong place for negative values | ||
306 | // mOffset[128] = 0; | ||
307 | mLink[128] = mRanks2; | ||
308 | // for(i=129;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; | ||
309 | for(udword i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; | ||
310 | } | ||
311 | |||
312 | // Perform Radix Sort | ||
313 | ubyte* InputBytes = (ubyte*)input; | ||
314 | InputBytes += j; | ||
315 | if(INVALID_RANKS) | ||
316 | { | ||
317 | // for(udword i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i; | ||
318 | for(udword i=0;i<nb;i++) *mLink[InputBytes[i<<2]]++ = i; | ||
319 | VALIDATE_RANKS; | ||
320 | } | ||
321 | else | ||
322 | { | ||
323 | udword* Indices = mRanks; | ||
324 | udword* IndicesEnd = &mRanks[nb]; | ||
325 | while(Indices!=IndicesEnd) | ||
326 | { | ||
327 | udword id = *Indices++; | ||
328 | // mRanks2[mOffset[InputBytes[id<<2]]++] = id; | ||
329 | *mLink[InputBytes[id<<2]]++ = id; | ||
330 | } | ||
331 | } | ||
332 | |||
333 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. | ||
334 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; | ||
335 | } | ||
336 | } | ||
337 | return *this; | ||
338 | } | ||
339 | |||
340 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
341 | /** | ||
342 | * Main sort routine. | ||
343 | * This one is for floating-point values. After the call, mRanks contains a list of indices in sorted order, i.e. in the order you may process your data. | ||
344 | * \param input [in] a list of floating-point values to sort | ||
345 | * \param nb [in] number of values to sort, must be < 2^31 | ||
346 | * \return Self-Reference | ||
347 | * \warning only sorts IEEE floating-point values | ||
348 | */ | ||
349 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
350 | RadixSort& RadixSort::Sort(const float* input2, udword nb) | ||
351 | { | ||
352 | // Checkings | ||
353 | if(!input2 || !nb || nb&0x80000000) return *this; | ||
354 | |||
355 | // Stats | ||
356 | mTotalCalls++; | ||
357 | |||
358 | udword* input = (udword*)input2; | ||
359 | |||
360 | // Resize lists if needed | ||
361 | CheckResize(nb); | ||
362 | |||
363 | #ifdef RADIX_LOCAL_RAM | ||
364 | // Allocate histograms & offsets on the stack | ||
365 | udword mHistogram[256*4]; | ||
366 | // udword mOffset[256]; | ||
367 | udword* mLink[256]; | ||
368 | #endif | ||
369 | |||
370 | // Create histograms (counters). Counters for all passes are created in one run. | ||
371 | // Pros: read input buffer once instead of four times | ||
372 | // Cons: mHistogram is 4Kb instead of 1Kb | ||
373 | // Floating-point values are always supposed to be signed values, so there's only one code path there. | ||
374 | // Please note the floating point comparison needed for temporal coherence! Although the resulting asm code | ||
375 | // is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first | ||
376 | // generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just | ||
377 | // wouldn't work with mixed positive/negative values.... | ||
378 | { CREATE_HISTOGRAMS(float, input2); } | ||
379 | |||
380 | // Compute #negative values involved if needed | ||
381 | udword NbNegativeValues = 0; | ||
382 | // An efficient way to compute the number of negatives values we'll have to deal with is simply to sum the 128 | ||
383 | // last values of the last histogram. Last histogram because that's the one for the Most Significant Byte, | ||
384 | // responsible for the sign. 128 last values because the 128 first ones are related to positive numbers. | ||
385 | udword* h3= &mHistogram[768]; | ||
386 | for(udword i=128;i<256;i++) NbNegativeValues += h3[i]; // 768 for last histogram, 128 for negative part | ||
387 | |||
388 | // Radix sort, j is the pass number (0=LSB, 3=MSB) | ||
389 | for(udword j=0;j<4;j++) | ||
390 | { | ||
391 | // Should we care about negative values? | ||
392 | if(j!=3) | ||
393 | { | ||
394 | // Here we deal with positive values only | ||
395 | CHECK_PASS_VALIDITY(j); | ||
396 | |||
397 | if(PerformPass) | ||
398 | { | ||
399 | // Create offsets | ||
400 | // mOffset[0] = 0; | ||
401 | mLink[0] = mRanks2; | ||
402 | // for(udword i=1;i<256;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; | ||
403 | for(udword i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; | ||
404 | |||
405 | // Perform Radix Sort | ||
406 | ubyte* InputBytes = (ubyte*)input; | ||
407 | InputBytes += j; | ||
408 | if(INVALID_RANKS) | ||
409 | { | ||
410 | // for(i=0;i<nb;i++) mRanks2[mOffset[InputBytes[i<<2]]++] = i; | ||
411 | for(udword i=0;i<nb;i++) *mLink[InputBytes[i<<2]]++ = i; | ||
412 | VALIDATE_RANKS; | ||
413 | } | ||
414 | else | ||
415 | { | ||
416 | udword* Indices = mRanks; | ||
417 | udword* IndicesEnd = &mRanks[nb]; | ||
418 | while(Indices!=IndicesEnd) | ||
419 | { | ||
420 | udword id = *Indices++; | ||
421 | // mRanks2[mOffset[InputBytes[id<<2]]++] = id; | ||
422 | *mLink[InputBytes[id<<2]]++ = id; | ||
423 | } | ||
424 | } | ||
425 | |||
426 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. | ||
427 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; | ||
428 | } | ||
429 | } | ||
430 | else | ||
431 | { | ||
432 | // This is a special case to correctly handle negative values | ||
433 | CHECK_PASS_VALIDITY(j); | ||
434 | |||
435 | if(PerformPass) | ||
436 | { | ||
437 | // Create biased offsets, in order for negative numbers to be sorted as well | ||
438 | // mOffset[0] = NbNegativeValues; // First positive number takes place after the negative ones | ||
439 | mLink[0] = &mRanks2[NbNegativeValues]; // First positive number takes place after the negative ones | ||
440 | // for(udword i=1;i<128;i++) mOffset[i] = mOffset[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers | ||
441 | for(udword i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1]; // 1 to 128 for positive numbers | ||
442 | |||
443 | // We must reverse the sorting order for negative numbers! | ||
444 | // mOffset[255] = 0; | ||
445 | mLink[255] = mRanks2; | ||
446 | // for(i=0;i<127;i++) mOffset[254-i] = mOffset[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values | ||
447 | for(udword i=0;i<127;i++) mLink[254-i] = mLink[255-i] + CurCount[255-i]; // Fixing the wrong order for negative values | ||
448 | // for(i=128;i<256;i++) mOffset[i] += CurCount[i]; // Fixing the wrong place for negative values | ||
449 | for(udword i=128;i<256;i++) mLink[i] += CurCount[i]; // Fixing the wrong place for negative values | ||
450 | |||
451 | // Perform Radix Sort | ||
452 | if(INVALID_RANKS) | ||
453 | { | ||
454 | for(udword i=0;i<nb;i++) | ||
455 | { | ||
456 | udword Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (udword). | ||
457 | // ### cmp to be killed. Not good. Later. | ||
458 | // if(Radix<128) mRanks2[mOffset[Radix]++] = i; // Number is positive, same as above | ||
459 | // else mRanks2[--mOffset[Radix]] = i; // Number is negative, flip the sorting order | ||
460 | if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above | ||
461 | else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order | ||
462 | } | ||
463 | VALIDATE_RANKS; | ||
464 | } | ||
465 | else | ||
466 | { | ||
467 | for(udword i=0;i<nb;i++) | ||
468 | { | ||
469 | udword Radix = input[mRanks[i]]>>24; // Radix byte, same as above. AND is useless here (udword). | ||
470 | // ### cmp to be killed. Not good. Later. | ||
471 | // if(Radix<128) mRanks2[mOffset[Radix]++] = mRanks[i]; // Number is positive, same as above | ||
472 | // else mRanks2[--mOffset[Radix]] = mRanks[i]; // Number is negative, flip the sorting order | ||
473 | if(Radix<128) *mLink[Radix]++ = mRanks[i]; // Number is positive, same as above | ||
474 | else *(--mLink[Radix]) = mRanks[i]; // Number is negative, flip the sorting order | ||
475 | } | ||
476 | } | ||
477 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. | ||
478 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; | ||
479 | } | ||
480 | else | ||
481 | { | ||
482 | // The pass is useless, yet we still have to reverse the order of current list if all values are negative. | ||
483 | if(UniqueVal>=128) | ||
484 | { | ||
485 | if(INVALID_RANKS) | ||
486 | { | ||
487 | // ###Possible? | ||
488 | for(udword i=0;i<nb;i++) mRanks2[i] = nb-i-1; | ||
489 | VALIDATE_RANKS; | ||
490 | } | ||
491 | else | ||
492 | { | ||
493 | for(udword i=0;i<nb;i++) mRanks2[i] = mRanks[nb-i-1]; | ||
494 | } | ||
495 | |||
496 | // Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap. | ||
497 | udword* Tmp = mRanks; mRanks = mRanks2; mRanks2 = Tmp; | ||
498 | } | ||
499 | } | ||
500 | } | ||
501 | } | ||
502 | return *this; | ||
503 | } | ||
504 | |||
505 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
506 | /** | ||
507 | * Gets the ram used. | ||
508 | * \return memory used in bytes | ||
509 | */ | ||
510 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||
511 | udword RadixSort::GetUsedRam() const | ||
512 | { | ||
513 | udword UsedRam = sizeof(RadixSort); | ||
514 | #ifndef RADIX_LOCAL_RAM | ||
515 | UsedRam += 256*4*sizeof(udword); // Histograms | ||
516 | UsedRam += 256*sizeof(udword); // Offsets | ||
517 | #endif | ||
518 | UsedRam += 2*CURRENT_SIZE*sizeof(udword); // 2 lists of indices | ||
519 | return UsedRam; | ||
520 | } | ||