diff options
Diffstat (limited to 'linden/indra/llmessage/patch_idct.cpp')
-rw-r--r-- | linden/indra/llmessage/patch_idct.cpp | 685 |
1 files changed, 685 insertions, 0 deletions
diff --git a/linden/indra/llmessage/patch_idct.cpp b/linden/indra/llmessage/patch_idct.cpp new file mode 100644 index 0000000..a20641f --- /dev/null +++ b/linden/indra/llmessage/patch_idct.cpp | |||
@@ -0,0 +1,685 @@ | |||
1 | /** | ||
2 | * @file patch_idct.cpp | ||
3 | * @brief IDCT patch. | ||
4 | * | ||
5 | * Copyright (c) 2000-2007, Linden Research, Inc. | ||
6 | * | ||
7 | * The source code in this file ("Source Code") is provided by Linden Lab | ||
8 | * to you under the terms of the GNU General Public License, version 2.0 | ||
9 | * ("GPL"), unless you have obtained a separate licensing agreement | ||
10 | * ("Other License"), formally executed by you and Linden Lab. Terms of | ||
11 | * the GPL can be found in doc/GPL-license.txt in this distribution, or | ||
12 | * online at http://secondlife.com/developers/opensource/gplv2 | ||
13 | * | ||
14 | * There are special exceptions to the terms and conditions of the GPL as | ||
15 | * it is applied to this Source Code. View the full text of the exception | ||
16 | * in the file doc/FLOSS-exception.txt in this software distribution, or | ||
17 | * online at http://secondlife.com/developers/opensource/flossexception | ||
18 | * | ||
19 | * By copying, modifying or distributing this software, you acknowledge | ||
20 | * that you have read and understood your obligations described above, | ||
21 | * and agree to abide by those obligations. | ||
22 | * | ||
23 | * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO | ||
24 | * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, | ||
25 | * COMPLETENESS OR PERFORMANCE. | ||
26 | */ | ||
27 | |||
28 | #include "linden_common.h" | ||
29 | |||
30 | #include "llmath.h" | ||
31 | //#include "vmath.h" | ||
32 | #include "v3math.h" | ||
33 | #include "patch_dct.h" | ||
34 | |||
35 | LLGroupHeader *gGOPP; | ||
36 | |||
37 | void set_group_of_patch_header(LLGroupHeader *gopp) | ||
38 | { | ||
39 | gGOPP = gopp; | ||
40 | } | ||
41 | |||
42 | F32 gPatchDequantizeTable[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE]; | ||
43 | void build_patch_dequantize_table(S32 size) | ||
44 | { | ||
45 | S32 i, j; | ||
46 | for (j = 0; j < size; j++) | ||
47 | { | ||
48 | for (i = 0; i < size; i++) | ||
49 | { | ||
50 | gPatchDequantizeTable[j*size + i] = (1.f + 2.f*(i+j)); | ||
51 | } | ||
52 | } | ||
53 | } | ||
54 | |||
55 | S32 gCurrentDeSize = 0; | ||
56 | |||
57 | F32 gPatchICosines[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE]; | ||
58 | |||
59 | void setup_patch_icosines(S32 size) | ||
60 | { | ||
61 | S32 n, u; | ||
62 | F32 oosob = F_PI*0.5f/size; | ||
63 | |||
64 | for (u = 0; u < size; u++) | ||
65 | { | ||
66 | for (n = 0; n < size; n++) | ||
67 | { | ||
68 | gPatchICosines[u*size+n] = cosf((2.f*n+1.f)*u*oosob); | ||
69 | } | ||
70 | } | ||
71 | } | ||
72 | |||
73 | S32 gDeCopyMatrix[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE]; | ||
74 | |||
75 | void build_decopy_matrix(S32 size) | ||
76 | { | ||
77 | S32 i, j, count; | ||
78 | BOOL b_diag = FALSE; | ||
79 | BOOL b_right = TRUE; | ||
80 | |||
81 | i = 0; | ||
82 | j = 0; | ||
83 | count = 0; | ||
84 | |||
85 | while ( (i < size) | ||
86 | &&(j < size)) | ||
87 | { | ||
88 | gDeCopyMatrix[j*size + i] = count; | ||
89 | |||
90 | count++; | ||
91 | |||
92 | if (!b_diag) | ||
93 | { | ||
94 | if (b_right) | ||
95 | { | ||
96 | if (i < size - 1) | ||
97 | i++; | ||
98 | else | ||
99 | j++; | ||
100 | b_right = FALSE; | ||
101 | b_diag = TRUE; | ||
102 | } | ||
103 | else | ||
104 | { | ||
105 | if (j < size - 1) | ||
106 | j++; | ||
107 | else | ||
108 | i++; | ||
109 | b_right = TRUE; | ||
110 | b_diag = TRUE; | ||
111 | } | ||
112 | } | ||
113 | else | ||
114 | { | ||
115 | if (b_right) | ||
116 | { | ||
117 | i++; | ||
118 | j--; | ||
119 | if ( (i == size - 1) | ||
120 | ||(j == 0)) | ||
121 | { | ||
122 | b_diag = FALSE; | ||
123 | } | ||
124 | } | ||
125 | else | ||
126 | { | ||
127 | i--; | ||
128 | j++; | ||
129 | if ( (i == 0) | ||
130 | ||(j == size - 1)) | ||
131 | { | ||
132 | b_diag = FALSE; | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | } | ||
137 | } | ||
138 | |||
139 | void init_patch_decompressor(S32 size) | ||
140 | { | ||
141 | if (size != gCurrentDeSize) | ||
142 | { | ||
143 | gCurrentDeSize = size; | ||
144 | build_patch_dequantize_table(size); | ||
145 | setup_patch_icosines(size); | ||
146 | build_decopy_matrix(size); | ||
147 | } | ||
148 | } | ||
149 | |||
150 | inline void idct_line(F32 *linein, F32 *lineout, S32 line) | ||
151 | { | ||
152 | S32 n; | ||
153 | F32 total; | ||
154 | F32 *pcp = gPatchICosines; | ||
155 | |||
156 | #ifdef _PATCH_SIZE_16_AND_32_ONLY | ||
157 | F32 oosob = 2.f/16.f; | ||
158 | S32 line_size = line*NORMAL_PATCH_SIZE; | ||
159 | F32 *tlinein, *tpcp; | ||
160 | |||
161 | |||
162 | for (n = 0; n < NORMAL_PATCH_SIZE; n++) | ||
163 | { | ||
164 | tpcp = pcp + n; | ||
165 | tlinein = linein + line_size; | ||
166 | |||
167 | total = OO_SQRT2*(*(tlinein++)); | ||
168 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
169 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
170 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
171 | |||
172 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
173 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
174 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
175 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
176 | |||
177 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
178 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
179 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
180 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
181 | |||
182 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
183 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
184 | total += *(tlinein++)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
185 | total += *(tlinein)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
186 | |||
187 | *(lineout + line_size + n) = total*oosob; | ||
188 | } | ||
189 | #else | ||
190 | F32 oosob = 2.f/size; | ||
191 | S32 size = gGOPP->patch_size; | ||
192 | S32 line_size = line*size; | ||
193 | S32 u; | ||
194 | for (n = 0; n < size; n++) | ||
195 | { | ||
196 | total = OO_SQRT2*linein[line_size]; | ||
197 | for (u = 1; u < size; u++) | ||
198 | { | ||
199 | total += linein[line_size + u]*pcp[u*size+n]; | ||
200 | } | ||
201 | lineout[line_size + n] = total*oosob; | ||
202 | } | ||
203 | #endif | ||
204 | } | ||
205 | |||
206 | inline void idct_line_large_slow(F32 *linein, F32 *lineout, S32 line) | ||
207 | { | ||
208 | S32 n; | ||
209 | F32 total; | ||
210 | F32 *pcp = gPatchICosines; | ||
211 | |||
212 | F32 oosob = 2.f/32.f; | ||
213 | S32 line_size = line*LARGE_PATCH_SIZE; | ||
214 | F32 *tlinein, *tpcp; | ||
215 | |||
216 | |||
217 | for (n = 0; n < LARGE_PATCH_SIZE; n++) | ||
218 | { | ||
219 | tpcp = pcp + n; | ||
220 | tlinein = linein + line_size; | ||
221 | |||
222 | total = OO_SQRT2*(*(tlinein++)); | ||
223 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
224 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
225 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
226 | |||
227 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
228 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
229 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
230 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
231 | |||
232 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
233 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
234 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
235 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
236 | |||
237 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
238 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
239 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
240 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
241 | |||
242 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
243 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
244 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
245 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
246 | |||
247 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
248 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
249 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
250 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
251 | |||
252 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
253 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
254 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
255 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
256 | |||
257 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
258 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
259 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
260 | total += *(tlinein)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
261 | |||
262 | *(lineout + line_size + n) = total*oosob; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | // Nota Bene: assumes that coefficients beyond 128 are 0! | ||
267 | |||
268 | void idct_line_large(F32 *linein, F32 *lineout, S32 line) | ||
269 | { | ||
270 | S32 n; | ||
271 | F32 total; | ||
272 | F32 *pcp = gPatchICosines; | ||
273 | |||
274 | F32 oosob = 2.f/32.f; | ||
275 | S32 line_size = line*LARGE_PATCH_SIZE; | ||
276 | F32 *tlinein, *tpcp; | ||
277 | F32 *baselinein = linein + line_size; | ||
278 | F32 *baselineout = lineout + line_size; | ||
279 | |||
280 | |||
281 | for (n = 0; n < LARGE_PATCH_SIZE; n++) | ||
282 | { | ||
283 | tpcp = pcp++; | ||
284 | tlinein = baselinein; | ||
285 | |||
286 | total = OO_SQRT2*(*(tlinein++)); | ||
287 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
288 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
289 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
290 | |||
291 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
292 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
293 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
294 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
295 | |||
296 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
297 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
298 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
299 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
300 | |||
301 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
302 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
303 | total += *(tlinein++)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
304 | total += *(tlinein)*(*(tpcp)); | ||
305 | |||
306 | *baselineout++ = total*oosob; | ||
307 | } | ||
308 | } | ||
309 | |||
310 | inline void idct_column(F32 *linein, F32 *lineout, S32 column) | ||
311 | { | ||
312 | S32 n; | ||
313 | F32 total; | ||
314 | F32 *pcp = gPatchICosines; | ||
315 | |||
316 | #ifdef _PATCH_SIZE_16_AND_32_ONLY | ||
317 | F32 *tlinein, *tpcp; | ||
318 | |||
319 | for (n = 0; n < NORMAL_PATCH_SIZE; n++) | ||
320 | { | ||
321 | tpcp = pcp + n; | ||
322 | tlinein = linein + column; | ||
323 | |||
324 | total = OO_SQRT2*(*tlinein); | ||
325 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
326 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
327 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
328 | |||
329 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
330 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
331 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
332 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
333 | |||
334 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
335 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
336 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
337 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
338 | |||
339 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
340 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
341 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
342 | total += *(tlinein += NORMAL_PATCH_SIZE)*(*(tpcp += NORMAL_PATCH_SIZE)); | ||
343 | |||
344 | *(lineout + (n<<4) + column) = total; | ||
345 | } | ||
346 | |||
347 | #else | ||
348 | S32 size = gGOPP->patch_size; | ||
349 | S32 u; | ||
350 | S32 u_size; | ||
351 | |||
352 | for (n = 0; n < size; n++) | ||
353 | { | ||
354 | total = OO_SQRT2*linein[column]; | ||
355 | for (u = 1; u < size; u++) | ||
356 | { | ||
357 | u_size = u*size; | ||
358 | total += linein[u_size + column]*pcp[u_size+n]; | ||
359 | } | ||
360 | lineout[size*n + column] = total; | ||
361 | } | ||
362 | #endif | ||
363 | } | ||
364 | |||
365 | inline void idct_column_large_slow(F32 *linein, F32 *lineout, S32 column) | ||
366 | { | ||
367 | S32 n; | ||
368 | F32 total; | ||
369 | F32 *pcp = gPatchICosines; | ||
370 | |||
371 | F32 *tlinein, *tpcp; | ||
372 | |||
373 | for (n = 0; n < LARGE_PATCH_SIZE; n++) | ||
374 | { | ||
375 | tpcp = pcp + n; | ||
376 | tlinein = linein + column; | ||
377 | |||
378 | total = OO_SQRT2*(*tlinein); | ||
379 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
380 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
381 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
382 | |||
383 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
384 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
385 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
386 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
387 | |||
388 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
389 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
390 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
391 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
392 | |||
393 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
394 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
395 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
396 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
397 | |||
398 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
399 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
400 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
401 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
402 | |||
403 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
404 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
405 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
406 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
407 | |||
408 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
409 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
410 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
411 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
412 | |||
413 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
414 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
415 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
416 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
417 | |||
418 | *(lineout + (n<<5) + column) = total; | ||
419 | } | ||
420 | } | ||
421 | |||
422 | // Nota Bene: assumes that coefficients beyond 128 are 0! | ||
423 | |||
424 | void idct_column_large(F32 *linein, F32 *lineout, S32 column) | ||
425 | { | ||
426 | S32 n, m; | ||
427 | F32 total; | ||
428 | F32 *pcp = gPatchICosines; | ||
429 | |||
430 | F32 *tlinein, *tpcp; | ||
431 | F32 *baselinein = linein + column; | ||
432 | F32 *baselineout = lineout + column; | ||
433 | |||
434 | for (n = 0; n < LARGE_PATCH_SIZE; n++) | ||
435 | { | ||
436 | tpcp = pcp++; | ||
437 | tlinein = baselinein; | ||
438 | |||
439 | total = OO_SQRT2*(*tlinein); | ||
440 | for (m = 1; m < NORMAL_PATCH_SIZE; m++) | ||
441 | total += *(tlinein += LARGE_PATCH_SIZE)*(*(tpcp += LARGE_PATCH_SIZE)); | ||
442 | |||
443 | *(baselineout + (n<<5)) = total; | ||
444 | } | ||
445 | } | ||
446 | |||
447 | inline void idct_patch(F32 *block) | ||
448 | { | ||
449 | F32 temp[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE]; | ||
450 | |||
451 | #ifdef _PATCH_SIZE_16_AND_32_ONLY | ||
452 | idct_column(block, temp, 0); | ||
453 | idct_column(block, temp, 1); | ||
454 | idct_column(block, temp, 2); | ||
455 | idct_column(block, temp, 3); | ||
456 | |||
457 | idct_column(block, temp, 4); | ||
458 | idct_column(block, temp, 5); | ||
459 | idct_column(block, temp, 6); | ||
460 | idct_column(block, temp, 7); | ||
461 | |||
462 | idct_column(block, temp, 8); | ||
463 | idct_column(block, temp, 9); | ||
464 | idct_column(block, temp, 10); | ||
465 | idct_column(block, temp, 11); | ||
466 | |||
467 | idct_column(block, temp, 12); | ||
468 | idct_column(block, temp, 13); | ||
469 | idct_column(block, temp, 14); | ||
470 | idct_column(block, temp, 15); | ||
471 | |||
472 | idct_line(temp, block, 0); | ||
473 | idct_line(temp, block, 1); | ||
474 | idct_line(temp, block, 2); | ||
475 | idct_line(temp, block, 3); | ||
476 | |||
477 | idct_line(temp, block, 4); | ||
478 | idct_line(temp, block, 5); | ||
479 | idct_line(temp, block, 6); | ||
480 | idct_line(temp, block, 7); | ||
481 | |||
482 | idct_line(temp, block, 8); | ||
483 | idct_line(temp, block, 9); | ||
484 | idct_line(temp, block, 10); | ||
485 | idct_line(temp, block, 11); | ||
486 | |||
487 | idct_line(temp, block, 12); | ||
488 | idct_line(temp, block, 13); | ||
489 | idct_line(temp, block, 14); | ||
490 | idct_line(temp, block, 15); | ||
491 | #else | ||
492 | S32 i; | ||
493 | S32 size = gGOPP->patch_size; | ||
494 | for (i = 0; i < size; i++) | ||
495 | { | ||
496 | idct_column(block, temp, i); | ||
497 | } | ||
498 | for (i = 0; i < size; i++) | ||
499 | { | ||
500 | idct_line(temp, block, i); | ||
501 | } | ||
502 | #endif | ||
503 | } | ||
504 | |||
505 | inline void idct_patch_large(F32 *block) | ||
506 | { | ||
507 | F32 temp[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE]; | ||
508 | |||
509 | idct_column_large_slow(block, temp, 0); | ||
510 | idct_column_large_slow(block, temp, 1); | ||
511 | idct_column_large_slow(block, temp, 2); | ||
512 | idct_column_large_slow(block, temp, 3); | ||
513 | |||
514 | idct_column_large_slow(block, temp, 4); | ||
515 | idct_column_large_slow(block, temp, 5); | ||
516 | idct_column_large_slow(block, temp, 6); | ||
517 | idct_column_large_slow(block, temp, 7); | ||
518 | |||
519 | idct_column_large_slow(block, temp, 8); | ||
520 | idct_column_large_slow(block, temp, 9); | ||
521 | idct_column_large_slow(block, temp, 10); | ||
522 | idct_column_large_slow(block, temp, 11); | ||
523 | |||
524 | idct_column_large_slow(block, temp, 12); | ||
525 | idct_column_large_slow(block, temp, 13); | ||
526 | idct_column_large_slow(block, temp, 14); | ||
527 | idct_column_large_slow(block, temp, 15); | ||
528 | |||
529 | idct_column_large_slow(block, temp, 16); | ||
530 | idct_column_large_slow(block, temp, 17); | ||
531 | idct_column_large_slow(block, temp, 18); | ||
532 | idct_column_large_slow(block, temp, 19); | ||
533 | |||
534 | idct_column_large_slow(block, temp, 20); | ||
535 | idct_column_large_slow(block, temp, 21); | ||
536 | idct_column_large_slow(block, temp, 22); | ||
537 | idct_column_large_slow(block, temp, 23); | ||
538 | |||
539 | idct_column_large_slow(block, temp, 24); | ||
540 | idct_column_large_slow(block, temp, 25); | ||
541 | idct_column_large_slow(block, temp, 26); | ||
542 | idct_column_large_slow(block, temp, 27); | ||
543 | |||
544 | idct_column_large_slow(block, temp, 28); | ||
545 | idct_column_large_slow(block, temp, 29); | ||
546 | idct_column_large_slow(block, temp, 30); | ||
547 | idct_column_large_slow(block, temp, 31); | ||
548 | |||
549 | idct_line_large_slow(temp, block, 0); | ||
550 | idct_line_large_slow(temp, block, 1); | ||
551 | idct_line_large_slow(temp, block, 2); | ||
552 | idct_line_large_slow(temp, block, 3); | ||
553 | |||
554 | idct_line_large_slow(temp, block, 4); | ||
555 | idct_line_large_slow(temp, block, 5); | ||
556 | idct_line_large_slow(temp, block, 6); | ||
557 | idct_line_large_slow(temp, block, 7); | ||
558 | |||
559 | idct_line_large_slow(temp, block, 8); | ||
560 | idct_line_large_slow(temp, block, 9); | ||
561 | idct_line_large_slow(temp, block, 10); | ||
562 | idct_line_large_slow(temp, block, 11); | ||
563 | |||
564 | idct_line_large_slow(temp, block, 12); | ||
565 | idct_line_large_slow(temp, block, 13); | ||
566 | idct_line_large_slow(temp, block, 14); | ||
567 | idct_line_large_slow(temp, block, 15); | ||
568 | |||
569 | idct_line_large_slow(temp, block, 16); | ||
570 | idct_line_large_slow(temp, block, 17); | ||
571 | idct_line_large_slow(temp, block, 18); | ||
572 | idct_line_large_slow(temp, block, 19); | ||
573 | |||
574 | idct_line_large_slow(temp, block, 20); | ||
575 | idct_line_large_slow(temp, block, 21); | ||
576 | idct_line_large_slow(temp, block, 22); | ||
577 | idct_line_large_slow(temp, block, 23); | ||
578 | |||
579 | idct_line_large_slow(temp, block, 24); | ||
580 | idct_line_large_slow(temp, block, 25); | ||
581 | idct_line_large_slow(temp, block, 26); | ||
582 | idct_line_large_slow(temp, block, 27); | ||
583 | |||
584 | idct_line_large_slow(temp, block, 28); | ||
585 | idct_line_large_slow(temp, block, 29); | ||
586 | idct_line_large_slow(temp, block, 30); | ||
587 | idct_line_large_slow(temp, block, 31); | ||
588 | } | ||
589 | |||
590 | S32 gDitherNoise = 128; | ||
591 | |||
592 | void decompress_patch(F32 *patch, S32 *cpatch, LLPatchHeader *ph) | ||
593 | { | ||
594 | S32 i, j; | ||
595 | |||
596 | F32 block[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE], *tblock = block; | ||
597 | F32 *tpatch; | ||
598 | |||
599 | LLGroupHeader *gopp = gGOPP; | ||
600 | S32 size = gopp->patch_size; | ||
601 | F32 range = ph->range; | ||
602 | S32 prequant = (ph->quant_wbits >> 4) + 2; | ||
603 | S32 quantize = 1<<prequant; | ||
604 | F32 hmin = ph->dc_offset; | ||
605 | S32 stride = gopp->stride; | ||
606 | |||
607 | F32 ooq = 1.f/(F32)quantize; | ||
608 | F32 *dq = gPatchDequantizeTable; | ||
609 | S32 *decopy_matrix = gDeCopyMatrix; | ||
610 | |||
611 | F32 mult = ooq*range; | ||
612 | F32 addval = mult*(F32)(1<<(prequant - 1))+hmin; | ||
613 | |||
614 | for (i = 0; i < size*size; i++) | ||
615 | { | ||
616 | *(tblock++) = *(cpatch + *(decopy_matrix++))*(*dq++); | ||
617 | } | ||
618 | |||
619 | if (size == 16) | ||
620 | { | ||
621 | idct_patch(block); | ||
622 | } | ||
623 | else | ||
624 | { | ||
625 | idct_patch_large(block); | ||
626 | } | ||
627 | |||
628 | for (j = 0; j < size; j++) | ||
629 | { | ||
630 | tpatch = patch + j*stride; | ||
631 | tblock = block + j*size; | ||
632 | for (i = 0; i < size; i++) | ||
633 | { | ||
634 | *(tpatch++) = *(tblock++)*mult+addval; | ||
635 | } | ||
636 | } | ||
637 | } | ||
638 | |||
639 | |||
640 | void decompress_patchv(LLVector3 *v, S32 *cpatch, LLPatchHeader *ph) | ||
641 | { | ||
642 | S32 i, j; | ||
643 | |||
644 | F32 block[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE], *tblock = block; | ||
645 | LLVector3 *tvec; | ||
646 | |||
647 | LLGroupHeader *gopp = gGOPP; | ||
648 | S32 size = gopp->patch_size; | ||
649 | F32 range = ph->range; | ||
650 | S32 prequant = (ph->quant_wbits >> 4) + 2; | ||
651 | S32 quantize = 1<<prequant; | ||
652 | F32 hmin = ph->dc_offset; | ||
653 | S32 stride = gopp->stride; | ||
654 | |||
655 | F32 ooq = 1.f/(F32)quantize; | ||
656 | F32 *dq = gPatchDequantizeTable; | ||
657 | S32 *decopy_matrix = gDeCopyMatrix; | ||
658 | |||
659 | F32 mult = ooq*range; | ||
660 | F32 addval = mult*(F32)(1<<(prequant - 1))+hmin; | ||
661 | |||
662 | // BOOL b_diag = FALSE; | ||
663 | // BOOL b_right = TRUE; | ||
664 | |||
665 | for (i = 0; i < size*size; i++) | ||
666 | { | ||
667 | *(tblock++) = *(cpatch + *(decopy_matrix++))*(*dq++); | ||
668 | } | ||
669 | |||
670 | if (size == 16) | ||
671 | idct_patch(block); | ||
672 | else | ||
673 | idct_patch_large(block); | ||
674 | |||
675 | for (j = 0; j < size; j++) | ||
676 | { | ||
677 | tvec = v + j*stride; | ||
678 | tblock = block + j*size; | ||
679 | for (i = 0; i < size; i++) | ||
680 | { | ||
681 | (*tvec++).mV[VZ] = *(tblock++)*mult+addval; | ||
682 | } | ||
683 | } | ||
684 | } | ||
685 | |||