diff options
Diffstat (limited to 'libraries/irrlicht-1.8/source/Irrlicht/jpeglib/jfdctint.c')
-rw-r--r-- | libraries/irrlicht-1.8/source/Irrlicht/jpeglib/jfdctint.c | 4348 |
1 files changed, 4348 insertions, 0 deletions
diff --git a/libraries/irrlicht-1.8/source/Irrlicht/jpeglib/jfdctint.c b/libraries/irrlicht-1.8/source/Irrlicht/jpeglib/jfdctint.c new file mode 100644 index 0000000..529eaf8 --- /dev/null +++ b/libraries/irrlicht-1.8/source/Irrlicht/jpeglib/jfdctint.c | |||
@@ -0,0 +1,4348 @@ | |||
1 | /* | ||
2 | * jfdctint.c | ||
3 | * | ||
4 | * Copyright (C) 1991-1996, Thomas G. Lane. | ||
5 | * Modification developed 2003-2009 by Guido Vollbeding. | ||
6 | * This file is part of the Independent JPEG Group's software. | ||
7 | * For conditions of distribution and use, see the accompanying README file. | ||
8 | * | ||
9 | * This file contains a slow-but-accurate integer implementation of the | ||
10 | * forward DCT (Discrete Cosine Transform). | ||
11 | * | ||
12 | * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT | ||
13 | * on each column. Direct algorithms are also available, but they are | ||
14 | * much more complex and seem not to be any faster when reduced to code. | ||
15 | * | ||
16 | * This implementation is based on an algorithm described in | ||
17 | * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT | ||
18 | * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, | ||
19 | * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. | ||
20 | * The primary algorithm described there uses 11 multiplies and 29 adds. | ||
21 | * We use their alternate method with 12 multiplies and 32 adds. | ||
22 | * The advantage of this method is that no data path contains more than one | ||
23 | * multiplication; this allows a very simple and accurate implementation in | ||
24 | * scaled fixed-point arithmetic, with a minimal number of shifts. | ||
25 | * | ||
26 | * We also provide FDCT routines with various input sample block sizes for | ||
27 | * direct resolution reduction or enlargement and for direct resolving the | ||
28 | * common 2x1 and 1x2 subsampling cases without additional resampling: NxN | ||
29 | * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block. | ||
30 | * | ||
31 | * For N<8 we fill the remaining block coefficients with zero. | ||
32 | * For N>8 we apply a partial N-point FDCT on the input samples, computing | ||
33 | * just the lower 8 frequency coefficients and discarding the rest. | ||
34 | * | ||
35 | * We must scale the output coefficients of the N-point FDCT appropriately | ||
36 | * to the standard 8-point FDCT level by 8/N per 1-D pass. This scaling | ||
37 | * is folded into the constant multipliers (pass 2) and/or final/initial | ||
38 | * shifting. | ||
39 | * | ||
40 | * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases | ||
41 | * since there would be too many additional constants to pre-calculate. | ||
42 | */ | ||
43 | |||
44 | #define JPEG_INTERNALS | ||
45 | #include "jinclude.h" | ||
46 | #include "jpeglib.h" | ||
47 | #include "jdct.h" /* Private declarations for DCT subsystem */ | ||
48 | |||
49 | #ifdef DCT_ISLOW_SUPPORTED | ||
50 | |||
51 | |||
52 | /* | ||
53 | * This module is specialized to the case DCTSIZE = 8. | ||
54 | */ | ||
55 | |||
56 | #if DCTSIZE != 8 | ||
57 | Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ | ||
58 | #endif | ||
59 | |||
60 | |||
61 | /* | ||
62 | * The poop on this scaling stuff is as follows: | ||
63 | * | ||
64 | * Each 1-D DCT step produces outputs which are a factor of sqrt(N) | ||
65 | * larger than the true DCT outputs. The final outputs are therefore | ||
66 | * a factor of N larger than desired; since N=8 this can be cured by | ||
67 | * a simple right shift at the end of the algorithm. The advantage of | ||
68 | * this arrangement is that we save two multiplications per 1-D DCT, | ||
69 | * because the y0 and y4 outputs need not be divided by sqrt(N). | ||
70 | * In the IJG code, this factor of 8 is removed by the quantization step | ||
71 | * (in jcdctmgr.c), NOT in this module. | ||
72 | * | ||
73 | * We have to do addition and subtraction of the integer inputs, which | ||
74 | * is no problem, and multiplication by fractional constants, which is | ||
75 | * a problem to do in integer arithmetic. We multiply all the constants | ||
76 | * by CONST_SCALE and convert them to integer constants (thus retaining | ||
77 | * CONST_BITS bits of precision in the constants). After doing a | ||
78 | * multiplication we have to divide the product by CONST_SCALE, with proper | ||
79 | * rounding, to produce the correct output. This division can be done | ||
80 | * cheaply as a right shift of CONST_BITS bits. We postpone shifting | ||
81 | * as long as possible so that partial sums can be added together with | ||
82 | * full fractional precision. | ||
83 | * | ||
84 | * The outputs of the first pass are scaled up by PASS1_BITS bits so that | ||
85 | * they are represented to better-than-integral precision. These outputs | ||
86 | * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word | ||
87 | * with the recommended scaling. (For 12-bit sample data, the intermediate | ||
88 | * array is INT32 anyway.) | ||
89 | * | ||
90 | * To avoid overflow of the 32-bit intermediate results in pass 2, we must | ||
91 | * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis | ||
92 | * shows that the values given below are the most effective. | ||
93 | */ | ||
94 | |||
95 | #if BITS_IN_JSAMPLE == 8 | ||
96 | #define CONST_BITS 13 | ||
97 | #define PASS1_BITS 2 | ||
98 | #else | ||
99 | #define CONST_BITS 13 | ||
100 | #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ | ||
101 | #endif | ||
102 | |||
103 | /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus | ||
104 | * causing a lot of useless floating-point operations at run time. | ||
105 | * To get around this we use the following pre-calculated constants. | ||
106 | * If you change CONST_BITS you may want to add appropriate values. | ||
107 | * (With a reasonable C compiler, you can just rely on the FIX() macro...) | ||
108 | */ | ||
109 | |||
110 | #if CONST_BITS == 13 | ||
111 | #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ | ||
112 | #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ | ||
113 | #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ | ||
114 | #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ | ||
115 | #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ | ||
116 | #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ | ||
117 | #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ | ||
118 | #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ | ||
119 | #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ | ||
120 | #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ | ||
121 | #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ | ||
122 | #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ | ||
123 | #else | ||
124 | #define FIX_0_298631336 FIX(0.298631336) | ||
125 | #define FIX_0_390180644 FIX(0.390180644) | ||
126 | #define FIX_0_541196100 FIX(0.541196100) | ||
127 | #define FIX_0_765366865 FIX(0.765366865) | ||
128 | #define FIX_0_899976223 FIX(0.899976223) | ||
129 | #define FIX_1_175875602 FIX(1.175875602) | ||
130 | #define FIX_1_501321110 FIX(1.501321110) | ||
131 | #define FIX_1_847759065 FIX(1.847759065) | ||
132 | #define FIX_1_961570560 FIX(1.961570560) | ||
133 | #define FIX_2_053119869 FIX(2.053119869) | ||
134 | #define FIX_2_562915447 FIX(2.562915447) | ||
135 | #define FIX_3_072711026 FIX(3.072711026) | ||
136 | #endif | ||
137 | |||
138 | |||
139 | /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. | ||
140 | * For 8-bit samples with the recommended scaling, all the variable | ||
141 | * and constant values involved are no more than 16 bits wide, so a | ||
142 | * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. | ||
143 | * For 12-bit samples, a full 32-bit multiplication will be needed. | ||
144 | */ | ||
145 | |||
146 | #if BITS_IN_JSAMPLE == 8 | ||
147 | #define MULTIPLY(var,const) MULTIPLY16C16(var,const) | ||
148 | #else | ||
149 | #define MULTIPLY(var,const) ((var) * (const)) | ||
150 | #endif | ||
151 | |||
152 | |||
153 | /* | ||
154 | * Perform the forward DCT on one block of samples. | ||
155 | */ | ||
156 | |||
157 | GLOBAL(void) | ||
158 | jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
159 | { | ||
160 | INT32 tmp0, tmp1, tmp2, tmp3; | ||
161 | INT32 tmp10, tmp11, tmp12, tmp13; | ||
162 | INT32 z1; | ||
163 | DCTELEM *dataptr; | ||
164 | JSAMPROW elemptr; | ||
165 | int ctr; | ||
166 | SHIFT_TEMPS | ||
167 | |||
168 | /* Pass 1: process rows. */ | ||
169 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
170 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
171 | |||
172 | dataptr = data; | ||
173 | for (ctr = 0; ctr < DCTSIZE; ctr++) { | ||
174 | elemptr = sample_data[ctr] + start_col; | ||
175 | |||
176 | /* Even part per LL&M figure 1 --- note that published figure is faulty; | ||
177 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | ||
178 | */ | ||
179 | |||
180 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); | ||
181 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); | ||
182 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); | ||
183 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); | ||
184 | |||
185 | tmp10 = tmp0 + tmp3; | ||
186 | tmp12 = tmp0 - tmp3; | ||
187 | tmp11 = tmp1 + tmp2; | ||
188 | tmp13 = tmp1 - tmp2; | ||
189 | |||
190 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); | ||
191 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); | ||
192 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); | ||
193 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); | ||
194 | |||
195 | /* Apply unsigned->signed conversion */ | ||
196 | dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); | ||
197 | dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); | ||
198 | |||
199 | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||
200 | /* Add fudge factor here for final descale. */ | ||
201 | z1 += ONE << (CONST_BITS-PASS1_BITS-1); | ||
202 | dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), | ||
203 | CONST_BITS-PASS1_BITS); | ||
204 | dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), | ||
205 | CONST_BITS-PASS1_BITS); | ||
206 | |||
207 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||
208 | * cK represents sqrt(2) * cos(K*pi/16). | ||
209 | * i0..i3 in the paper are tmp0..tmp3 here. | ||
210 | */ | ||
211 | |||
212 | tmp10 = tmp0 + tmp3; | ||
213 | tmp11 = tmp1 + tmp2; | ||
214 | tmp12 = tmp0 + tmp2; | ||
215 | tmp13 = tmp1 + tmp3; | ||
216 | z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ | ||
217 | /* Add fudge factor here for final descale. */ | ||
218 | z1 += ONE << (CONST_BITS-PASS1_BITS-1); | ||
219 | |||
220 | tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ | ||
221 | tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ | ||
222 | tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ | ||
223 | tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ | ||
224 | tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ | ||
225 | tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ | ||
226 | tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ | ||
227 | tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ | ||
228 | |||
229 | tmp12 += z1; | ||
230 | tmp13 += z1; | ||
231 | |||
232 | dataptr[1] = (DCTELEM) | ||
233 | RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); | ||
234 | dataptr[3] = (DCTELEM) | ||
235 | RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); | ||
236 | dataptr[5] = (DCTELEM) | ||
237 | RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); | ||
238 | dataptr[7] = (DCTELEM) | ||
239 | RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); | ||
240 | |||
241 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
242 | } | ||
243 | |||
244 | /* Pass 2: process columns. | ||
245 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
246 | * by an overall factor of 8. | ||
247 | */ | ||
248 | |||
249 | dataptr = data; | ||
250 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
251 | /* Even part per LL&M figure 1 --- note that published figure is faulty; | ||
252 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | ||
253 | */ | ||
254 | |||
255 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; | ||
256 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; | ||
257 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; | ||
258 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; | ||
259 | |||
260 | /* Add fudge factor here for final descale. */ | ||
261 | tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); | ||
262 | tmp12 = tmp0 - tmp3; | ||
263 | tmp11 = tmp1 + tmp2; | ||
264 | tmp13 = tmp1 - tmp2; | ||
265 | |||
266 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; | ||
267 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; | ||
268 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; | ||
269 | tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; | ||
270 | |||
271 | dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); | ||
272 | dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); | ||
273 | |||
274 | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||
275 | /* Add fudge factor here for final descale. */ | ||
276 | z1 += ONE << (CONST_BITS+PASS1_BITS-1); | ||
277 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
278 | RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); | ||
279 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
280 | RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); | ||
281 | |||
282 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||
283 | * cK represents sqrt(2) * cos(K*pi/16). | ||
284 | * i0..i3 in the paper are tmp0..tmp3 here. | ||
285 | */ | ||
286 | |||
287 | tmp10 = tmp0 + tmp3; | ||
288 | tmp11 = tmp1 + tmp2; | ||
289 | tmp12 = tmp0 + tmp2; | ||
290 | tmp13 = tmp1 + tmp3; | ||
291 | z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ | ||
292 | /* Add fudge factor here for final descale. */ | ||
293 | z1 += ONE << (CONST_BITS+PASS1_BITS-1); | ||
294 | |||
295 | tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ | ||
296 | tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ | ||
297 | tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ | ||
298 | tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ | ||
299 | tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ | ||
300 | tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ | ||
301 | tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ | ||
302 | tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ | ||
303 | |||
304 | tmp12 += z1; | ||
305 | tmp13 += z1; | ||
306 | |||
307 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
308 | RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); | ||
309 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
310 | RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); | ||
311 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
312 | RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); | ||
313 | dataptr[DCTSIZE*7] = (DCTELEM) | ||
314 | RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); | ||
315 | |||
316 | dataptr++; /* advance pointer to next column */ | ||
317 | } | ||
318 | } | ||
319 | |||
320 | #ifdef DCT_SCALING_SUPPORTED | ||
321 | |||
322 | |||
323 | /* | ||
324 | * Perform the forward DCT on a 7x7 sample block. | ||
325 | */ | ||
326 | |||
327 | GLOBAL(void) | ||
328 | jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
329 | { | ||
330 | INT32 tmp0, tmp1, tmp2, tmp3; | ||
331 | INT32 tmp10, tmp11, tmp12; | ||
332 | INT32 z1, z2, z3; | ||
333 | DCTELEM *dataptr; | ||
334 | JSAMPROW elemptr; | ||
335 | int ctr; | ||
336 | SHIFT_TEMPS | ||
337 | |||
338 | /* Pre-zero output coefficient block. */ | ||
339 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
340 | |||
341 | /* Pass 1: process rows. */ | ||
342 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
343 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
344 | /* cK represents sqrt(2) * cos(K*pi/14). */ | ||
345 | |||
346 | dataptr = data; | ||
347 | for (ctr = 0; ctr < 7; ctr++) { | ||
348 | elemptr = sample_data[ctr] + start_col; | ||
349 | |||
350 | /* Even part */ | ||
351 | |||
352 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); | ||
353 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); | ||
354 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); | ||
355 | tmp3 = GETJSAMPLE(elemptr[3]); | ||
356 | |||
357 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); | ||
358 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); | ||
359 | tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); | ||
360 | |||
361 | z1 = tmp0 + tmp2; | ||
362 | /* Apply unsigned->signed conversion */ | ||
363 | dataptr[0] = (DCTELEM) | ||
364 | ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); | ||
365 | tmp3 += tmp3; | ||
366 | z1 -= tmp3; | ||
367 | z1 -= tmp3; | ||
368 | z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ | ||
369 | z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ | ||
370 | z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ | ||
371 | dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); | ||
372 | z1 -= z2; | ||
373 | z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ | ||
374 | dataptr[4] = (DCTELEM) | ||
375 | DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ | ||
376 | CONST_BITS-PASS1_BITS); | ||
377 | dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); | ||
378 | |||
379 | /* Odd part */ | ||
380 | |||
381 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ | ||
382 | tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ | ||
383 | tmp0 = tmp1 - tmp2; | ||
384 | tmp1 += tmp2; | ||
385 | tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ | ||
386 | tmp1 += tmp2; | ||
387 | tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ | ||
388 | tmp0 += tmp3; | ||
389 | tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ | ||
390 | |||
391 | dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); | ||
392 | dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); | ||
393 | dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); | ||
394 | |||
395 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
396 | } | ||
397 | |||
398 | /* Pass 2: process columns. | ||
399 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
400 | * by an overall factor of 8. | ||
401 | * We must also scale the output by (8/7)**2 = 64/49, which we fold | ||
402 | * into the constant multipliers: | ||
403 | * cK now represents sqrt(2) * cos(K*pi/14) * 64/49. | ||
404 | */ | ||
405 | |||
406 | dataptr = data; | ||
407 | for (ctr = 0; ctr < 7; ctr++) { | ||
408 | /* Even part */ | ||
409 | |||
410 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; | ||
411 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; | ||
412 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; | ||
413 | tmp3 = dataptr[DCTSIZE*3]; | ||
414 | |||
415 | tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; | ||
416 | tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; | ||
417 | tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; | ||
418 | |||
419 | z1 = tmp0 + tmp2; | ||
420 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
421 | DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ | ||
422 | CONST_BITS+PASS1_BITS); | ||
423 | tmp3 += tmp3; | ||
424 | z1 -= tmp3; | ||
425 | z1 -= tmp3; | ||
426 | z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ | ||
427 | z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ | ||
428 | z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ | ||
429 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS); | ||
430 | z1 -= z2; | ||
431 | z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ | ||
432 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
433 | DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ | ||
434 | CONST_BITS+PASS1_BITS); | ||
435 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS); | ||
436 | |||
437 | /* Odd part */ | ||
438 | |||
439 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ | ||
440 | tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ | ||
441 | tmp0 = tmp1 - tmp2; | ||
442 | tmp1 += tmp2; | ||
443 | tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ | ||
444 | tmp1 += tmp2; | ||
445 | tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ | ||
446 | tmp0 += tmp3; | ||
447 | tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ | ||
448 | |||
449 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS); | ||
450 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS); | ||
451 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS); | ||
452 | |||
453 | dataptr++; /* advance pointer to next column */ | ||
454 | } | ||
455 | } | ||
456 | |||
457 | |||
458 | /* | ||
459 | * Perform the forward DCT on a 6x6 sample block. | ||
460 | */ | ||
461 | |||
462 | GLOBAL(void) | ||
463 | jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
464 | { | ||
465 | INT32 tmp0, tmp1, tmp2; | ||
466 | INT32 tmp10, tmp11, tmp12; | ||
467 | DCTELEM *dataptr; | ||
468 | JSAMPROW elemptr; | ||
469 | int ctr; | ||
470 | SHIFT_TEMPS | ||
471 | |||
472 | /* Pre-zero output coefficient block. */ | ||
473 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
474 | |||
475 | /* Pass 1: process rows. */ | ||
476 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
477 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
478 | /* cK represents sqrt(2) * cos(K*pi/12). */ | ||
479 | |||
480 | dataptr = data; | ||
481 | for (ctr = 0; ctr < 6; ctr++) { | ||
482 | elemptr = sample_data[ctr] + start_col; | ||
483 | |||
484 | /* Even part */ | ||
485 | |||
486 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); | ||
487 | tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); | ||
488 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); | ||
489 | |||
490 | tmp10 = tmp0 + tmp2; | ||
491 | tmp12 = tmp0 - tmp2; | ||
492 | |||
493 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); | ||
494 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); | ||
495 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); | ||
496 | |||
497 | /* Apply unsigned->signed conversion */ | ||
498 | dataptr[0] = (DCTELEM) | ||
499 | ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); | ||
500 | dataptr[2] = (DCTELEM) | ||
501 | DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ | ||
502 | CONST_BITS-PASS1_BITS); | ||
503 | dataptr[4] = (DCTELEM) | ||
504 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ | ||
505 | CONST_BITS-PASS1_BITS); | ||
506 | |||
507 | /* Odd part */ | ||
508 | |||
509 | tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ | ||
510 | CONST_BITS-PASS1_BITS); | ||
511 | |||
512 | dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); | ||
513 | dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); | ||
514 | dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); | ||
515 | |||
516 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
517 | } | ||
518 | |||
519 | /* Pass 2: process columns. | ||
520 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
521 | * by an overall factor of 8. | ||
522 | * We must also scale the output by (8/6)**2 = 16/9, which we fold | ||
523 | * into the constant multipliers: | ||
524 | * cK now represents sqrt(2) * cos(K*pi/12) * 16/9. | ||
525 | */ | ||
526 | |||
527 | dataptr = data; | ||
528 | for (ctr = 0; ctr < 6; ctr++) { | ||
529 | /* Even part */ | ||
530 | |||
531 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; | ||
532 | tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; | ||
533 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; | ||
534 | |||
535 | tmp10 = tmp0 + tmp2; | ||
536 | tmp12 = tmp0 - tmp2; | ||
537 | |||
538 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; | ||
539 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; | ||
540 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; | ||
541 | |||
542 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
543 | DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ | ||
544 | CONST_BITS+PASS1_BITS); | ||
545 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
546 | DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ | ||
547 | CONST_BITS+PASS1_BITS); | ||
548 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
549 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ | ||
550 | CONST_BITS+PASS1_BITS); | ||
551 | |||
552 | /* Odd part */ | ||
553 | |||
554 | tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ | ||
555 | |||
556 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
557 | DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ | ||
558 | CONST_BITS+PASS1_BITS); | ||
559 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
560 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ | ||
561 | CONST_BITS+PASS1_BITS); | ||
562 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
563 | DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ | ||
564 | CONST_BITS+PASS1_BITS); | ||
565 | |||
566 | dataptr++; /* advance pointer to next column */ | ||
567 | } | ||
568 | } | ||
569 | |||
570 | |||
571 | /* | ||
572 | * Perform the forward DCT on a 5x5 sample block. | ||
573 | */ | ||
574 | |||
575 | GLOBAL(void) | ||
576 | jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
577 | { | ||
578 | INT32 tmp0, tmp1, tmp2; | ||
579 | INT32 tmp10, tmp11; | ||
580 | DCTELEM *dataptr; | ||
581 | JSAMPROW elemptr; | ||
582 | int ctr; | ||
583 | SHIFT_TEMPS | ||
584 | |||
585 | /* Pre-zero output coefficient block. */ | ||
586 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
587 | |||
588 | /* Pass 1: process rows. */ | ||
589 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
590 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
591 | /* We scale the results further by 2 as part of output adaption */ | ||
592 | /* scaling for different DCT size. */ | ||
593 | /* cK represents sqrt(2) * cos(K*pi/10). */ | ||
594 | |||
595 | dataptr = data; | ||
596 | for (ctr = 0; ctr < 5; ctr++) { | ||
597 | elemptr = sample_data[ctr] + start_col; | ||
598 | |||
599 | /* Even part */ | ||
600 | |||
601 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); | ||
602 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); | ||
603 | tmp2 = GETJSAMPLE(elemptr[2]); | ||
604 | |||
605 | tmp10 = tmp0 + tmp1; | ||
606 | tmp11 = tmp0 - tmp1; | ||
607 | |||
608 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); | ||
609 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); | ||
610 | |||
611 | /* Apply unsigned->signed conversion */ | ||
612 | dataptr[0] = (DCTELEM) | ||
613 | ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1)); | ||
614 | tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ | ||
615 | tmp10 -= tmp2 << 2; | ||
616 | tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ | ||
617 | dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1); | ||
618 | dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1); | ||
619 | |||
620 | /* Odd part */ | ||
621 | |||
622 | tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ | ||
623 | |||
624 | dataptr[1] = (DCTELEM) | ||
625 | DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ | ||
626 | CONST_BITS-PASS1_BITS-1); | ||
627 | dataptr[3] = (DCTELEM) | ||
628 | DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ | ||
629 | CONST_BITS-PASS1_BITS-1); | ||
630 | |||
631 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
632 | } | ||
633 | |||
634 | /* Pass 2: process columns. | ||
635 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
636 | * by an overall factor of 8. | ||
637 | * We must also scale the output by (8/5)**2 = 64/25, which we partially | ||
638 | * fold into the constant multipliers (other part was done in pass 1): | ||
639 | * cK now represents sqrt(2) * cos(K*pi/10) * 32/25. | ||
640 | */ | ||
641 | |||
642 | dataptr = data; | ||
643 | for (ctr = 0; ctr < 5; ctr++) { | ||
644 | /* Even part */ | ||
645 | |||
646 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; | ||
647 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; | ||
648 | tmp2 = dataptr[DCTSIZE*2]; | ||
649 | |||
650 | tmp10 = tmp0 + tmp1; | ||
651 | tmp11 = tmp0 - tmp1; | ||
652 | |||
653 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; | ||
654 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; | ||
655 | |||
656 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
657 | DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ | ||
658 | CONST_BITS+PASS1_BITS); | ||
659 | tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ | ||
660 | tmp10 -= tmp2 << 2; | ||
661 | tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ | ||
662 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); | ||
663 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); | ||
664 | |||
665 | /* Odd part */ | ||
666 | |||
667 | tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ | ||
668 | |||
669 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
670 | DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ | ||
671 | CONST_BITS+PASS1_BITS); | ||
672 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
673 | DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ | ||
674 | CONST_BITS+PASS1_BITS); | ||
675 | |||
676 | dataptr++; /* advance pointer to next column */ | ||
677 | } | ||
678 | } | ||
679 | |||
680 | |||
681 | /* | ||
682 | * Perform the forward DCT on a 4x4 sample block. | ||
683 | */ | ||
684 | |||
685 | GLOBAL(void) | ||
686 | jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
687 | { | ||
688 | INT32 tmp0, tmp1; | ||
689 | INT32 tmp10, tmp11; | ||
690 | DCTELEM *dataptr; | ||
691 | JSAMPROW elemptr; | ||
692 | int ctr; | ||
693 | SHIFT_TEMPS | ||
694 | |||
695 | /* Pre-zero output coefficient block. */ | ||
696 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
697 | |||
698 | /* Pass 1: process rows. */ | ||
699 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
700 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
701 | /* We must also scale the output by (8/4)**2 = 2**2, which we add here. */ | ||
702 | /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ | ||
703 | |||
704 | dataptr = data; | ||
705 | for (ctr = 0; ctr < 4; ctr++) { | ||
706 | elemptr = sample_data[ctr] + start_col; | ||
707 | |||
708 | /* Even part */ | ||
709 | |||
710 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); | ||
711 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); | ||
712 | |||
713 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); | ||
714 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); | ||
715 | |||
716 | /* Apply unsigned->signed conversion */ | ||
717 | dataptr[0] = (DCTELEM) | ||
718 | ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2)); | ||
719 | dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2)); | ||
720 | |||
721 | /* Odd part */ | ||
722 | |||
723 | tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ | ||
724 | /* Add fudge factor here for final descale. */ | ||
725 | tmp0 += ONE << (CONST_BITS-PASS1_BITS-3); | ||
726 | |||
727 | dataptr[1] = (DCTELEM) | ||
728 | RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ | ||
729 | CONST_BITS-PASS1_BITS-2); | ||
730 | dataptr[3] = (DCTELEM) | ||
731 | RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ | ||
732 | CONST_BITS-PASS1_BITS-2); | ||
733 | |||
734 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
735 | } | ||
736 | |||
737 | /* Pass 2: process columns. | ||
738 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
739 | * by an overall factor of 8. | ||
740 | */ | ||
741 | |||
742 | dataptr = data; | ||
743 | for (ctr = 0; ctr < 4; ctr++) { | ||
744 | /* Even part */ | ||
745 | |||
746 | /* Add fudge factor here for final descale. */ | ||
747 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); | ||
748 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; | ||
749 | |||
750 | tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; | ||
751 | tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; | ||
752 | |||
753 | dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); | ||
754 | dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); | ||
755 | |||
756 | /* Odd part */ | ||
757 | |||
758 | tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ | ||
759 | /* Add fudge factor here for final descale. */ | ||
760 | tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); | ||
761 | |||
762 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
763 | RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ | ||
764 | CONST_BITS+PASS1_BITS); | ||
765 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
766 | RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ | ||
767 | CONST_BITS+PASS1_BITS); | ||
768 | |||
769 | dataptr++; /* advance pointer to next column */ | ||
770 | } | ||
771 | } | ||
772 | |||
773 | |||
774 | /* | ||
775 | * Perform the forward DCT on a 3x3 sample block. | ||
776 | */ | ||
777 | |||
778 | GLOBAL(void) | ||
779 | jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
780 | { | ||
781 | INT32 tmp0, tmp1, tmp2; | ||
782 | DCTELEM *dataptr; | ||
783 | JSAMPROW elemptr; | ||
784 | int ctr; | ||
785 | SHIFT_TEMPS | ||
786 | |||
787 | /* Pre-zero output coefficient block. */ | ||
788 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
789 | |||
790 | /* Pass 1: process rows. */ | ||
791 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
792 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
793 | /* We scale the results further by 2**2 as part of output adaption */ | ||
794 | /* scaling for different DCT size. */ | ||
795 | /* cK represents sqrt(2) * cos(K*pi/6). */ | ||
796 | |||
797 | dataptr = data; | ||
798 | for (ctr = 0; ctr < 3; ctr++) { | ||
799 | elemptr = sample_data[ctr] + start_col; | ||
800 | |||
801 | /* Even part */ | ||
802 | |||
803 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); | ||
804 | tmp1 = GETJSAMPLE(elemptr[1]); | ||
805 | |||
806 | tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); | ||
807 | |||
808 | /* Apply unsigned->signed conversion */ | ||
809 | dataptr[0] = (DCTELEM) | ||
810 | ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2)); | ||
811 | dataptr[2] = (DCTELEM) | ||
812 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ | ||
813 | CONST_BITS-PASS1_BITS-2); | ||
814 | |||
815 | /* Odd part */ | ||
816 | |||
817 | dataptr[1] = (DCTELEM) | ||
818 | DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ | ||
819 | CONST_BITS-PASS1_BITS-2); | ||
820 | |||
821 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
822 | } | ||
823 | |||
824 | /* Pass 2: process columns. | ||
825 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
826 | * by an overall factor of 8. | ||
827 | * We must also scale the output by (8/3)**2 = 64/9, which we partially | ||
828 | * fold into the constant multipliers (other part was done in pass 1): | ||
829 | * cK now represents sqrt(2) * cos(K*pi/6) * 16/9. | ||
830 | */ | ||
831 | |||
832 | dataptr = data; | ||
833 | for (ctr = 0; ctr < 3; ctr++) { | ||
834 | /* Even part */ | ||
835 | |||
836 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; | ||
837 | tmp1 = dataptr[DCTSIZE*1]; | ||
838 | |||
839 | tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; | ||
840 | |||
841 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
842 | DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ | ||
843 | CONST_BITS+PASS1_BITS); | ||
844 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
845 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ | ||
846 | CONST_BITS+PASS1_BITS); | ||
847 | |||
848 | /* Odd part */ | ||
849 | |||
850 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
851 | DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ | ||
852 | CONST_BITS+PASS1_BITS); | ||
853 | |||
854 | dataptr++; /* advance pointer to next column */ | ||
855 | } | ||
856 | } | ||
857 | |||
858 | |||
859 | /* | ||
860 | * Perform the forward DCT on a 2x2 sample block. | ||
861 | */ | ||
862 | |||
863 | GLOBAL(void) | ||
864 | jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
865 | { | ||
866 | INT32 tmp0, tmp1, tmp2, tmp3; | ||
867 | JSAMPROW elemptr; | ||
868 | |||
869 | /* Pre-zero output coefficient block. */ | ||
870 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
871 | |||
872 | /* Pass 1: process rows. */ | ||
873 | /* Note results are scaled up by sqrt(8) compared to a true DCT. */ | ||
874 | |||
875 | /* Row 0 */ | ||
876 | elemptr = sample_data[0] + start_col; | ||
877 | |||
878 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); | ||
879 | tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); | ||
880 | |||
881 | /* Row 1 */ | ||
882 | elemptr = sample_data[1] + start_col; | ||
883 | |||
884 | tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]); | ||
885 | tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]); | ||
886 | |||
887 | /* Pass 2: process columns. | ||
888 | * We leave the results scaled up by an overall factor of 8. | ||
889 | * We must also scale the output by (8/2)**2 = 2**4. | ||
890 | */ | ||
891 | |||
892 | /* Column 0 */ | ||
893 | /* Apply unsigned->signed conversion */ | ||
894 | data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4); | ||
895 | data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp2) << 4); | ||
896 | |||
897 | /* Column 1 */ | ||
898 | data[DCTSIZE*0+1] = (DCTELEM) ((tmp1 + tmp3) << 4); | ||
899 | data[DCTSIZE*1+1] = (DCTELEM) ((tmp1 - tmp3) << 4); | ||
900 | } | ||
901 | |||
902 | |||
903 | /* | ||
904 | * Perform the forward DCT on a 1x1 sample block. | ||
905 | */ | ||
906 | |||
907 | GLOBAL(void) | ||
908 | jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
909 | { | ||
910 | /* Pre-zero output coefficient block. */ | ||
911 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
912 | |||
913 | /* We leave the result scaled up by an overall factor of 8. */ | ||
914 | /* We must also scale the output by (8/1)**2 = 2**6. */ | ||
915 | /* Apply unsigned->signed conversion */ | ||
916 | data[0] = (DCTELEM) | ||
917 | ((GETJSAMPLE(sample_data[0][start_col]) - CENTERJSAMPLE) << 6); | ||
918 | } | ||
919 | |||
920 | |||
921 | /* | ||
922 | * Perform the forward DCT on a 9x9 sample block. | ||
923 | */ | ||
924 | |||
925 | GLOBAL(void) | ||
926 | jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
927 | { | ||
928 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4; | ||
929 | INT32 tmp10, tmp11, tmp12, tmp13; | ||
930 | INT32 z1, z2; | ||
931 | DCTELEM workspace[8]; | ||
932 | DCTELEM *dataptr; | ||
933 | DCTELEM *wsptr; | ||
934 | JSAMPROW elemptr; | ||
935 | int ctr; | ||
936 | SHIFT_TEMPS | ||
937 | |||
938 | /* Pass 1: process rows. */ | ||
939 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
940 | /* we scale the results further by 2 as part of output adaption */ | ||
941 | /* scaling for different DCT size. */ | ||
942 | /* cK represents sqrt(2) * cos(K*pi/18). */ | ||
943 | |||
944 | dataptr = data; | ||
945 | ctr = 0; | ||
946 | for (;;) { | ||
947 | elemptr = sample_data[ctr] + start_col; | ||
948 | |||
949 | /* Even part */ | ||
950 | |||
951 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]); | ||
952 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]); | ||
953 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]); | ||
954 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]); | ||
955 | tmp4 = GETJSAMPLE(elemptr[4]); | ||
956 | |||
957 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]); | ||
958 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]); | ||
959 | tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]); | ||
960 | tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]); | ||
961 | |||
962 | z1 = tmp0 + tmp2 + tmp3; | ||
963 | z2 = tmp1 + tmp4; | ||
964 | /* Apply unsigned->signed conversion */ | ||
965 | dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1); | ||
966 | dataptr[6] = (DCTELEM) | ||
967 | DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)), /* c6 */ | ||
968 | CONST_BITS-1); | ||
969 | z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049)); /* c2 */ | ||
970 | z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */ | ||
971 | dataptr[2] = (DCTELEM) | ||
972 | DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441)) /* c4 */ | ||
973 | + z1 + z2, CONST_BITS-1); | ||
974 | dataptr[4] = (DCTELEM) | ||
975 | DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608)) /* c8 */ | ||
976 | + z1 - z2, CONST_BITS-1); | ||
977 | |||
978 | /* Odd part */ | ||
979 | |||
980 | dataptr[3] = (DCTELEM) | ||
981 | DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */ | ||
982 | CONST_BITS-1); | ||
983 | |||
984 | tmp11 = MULTIPLY(tmp11, FIX(1.224744871)); /* c3 */ | ||
985 | tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */ | ||
986 | tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */ | ||
987 | |||
988 | dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1); | ||
989 | |||
990 | tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */ | ||
991 | |||
992 | dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1); | ||
993 | dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1); | ||
994 | |||
995 | ctr++; | ||
996 | |||
997 | if (ctr != DCTSIZE) { | ||
998 | if (ctr == 9) | ||
999 | break; /* Done. */ | ||
1000 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
1001 | } else | ||
1002 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
1003 | } | ||
1004 | |||
1005 | /* Pass 2: process columns. | ||
1006 | * We leave the results scaled up by an overall factor of 8. | ||
1007 | * We must also scale the output by (8/9)**2 = 64/81, which we partially | ||
1008 | * fold into the constant multipliers and final/initial shifting: | ||
1009 | * cK now represents sqrt(2) * cos(K*pi/18) * 128/81. | ||
1010 | */ | ||
1011 | |||
1012 | dataptr = data; | ||
1013 | wsptr = workspace; | ||
1014 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
1015 | /* Even part */ | ||
1016 | |||
1017 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0]; | ||
1018 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7]; | ||
1019 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6]; | ||
1020 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5]; | ||
1021 | tmp4 = dataptr[DCTSIZE*4]; | ||
1022 | |||
1023 | tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0]; | ||
1024 | tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7]; | ||
1025 | tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6]; | ||
1026 | tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5]; | ||
1027 | |||
1028 | z1 = tmp0 + tmp2 + tmp3; | ||
1029 | z2 = tmp1 + tmp4; | ||
1030 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
1031 | DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)), /* 128/81 */ | ||
1032 | CONST_BITS+2); | ||
1033 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
1034 | DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)), /* c6 */ | ||
1035 | CONST_BITS+2); | ||
1036 | z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287)); /* c2 */ | ||
1037 | z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */ | ||
1038 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
1039 | DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190)) /* c4 */ | ||
1040 | + z1 + z2, CONST_BITS+2); | ||
1041 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
1042 | DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096)) /* c8 */ | ||
1043 | + z1 - z2, CONST_BITS+2); | ||
1044 | |||
1045 | /* Odd part */ | ||
1046 | |||
1047 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
1048 | DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */ | ||
1049 | CONST_BITS+2); | ||
1050 | |||
1051 | tmp11 = MULTIPLY(tmp11, FIX(1.935399303)); /* c3 */ | ||
1052 | tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */ | ||
1053 | tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */ | ||
1054 | |||
1055 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
1056 | DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2); | ||
1057 | |||
1058 | tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */ | ||
1059 | |||
1060 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
1061 | DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2); | ||
1062 | dataptr[DCTSIZE*7] = (DCTELEM) | ||
1063 | DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2); | ||
1064 | |||
1065 | dataptr++; /* advance pointer to next column */ | ||
1066 | wsptr++; /* advance pointer to next column */ | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | |||
1071 | /* | ||
1072 | * Perform the forward DCT on a 10x10 sample block. | ||
1073 | */ | ||
1074 | |||
1075 | GLOBAL(void) | ||
1076 | jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
1077 | { | ||
1078 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4; | ||
1079 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | ||
1080 | DCTELEM workspace[8*2]; | ||
1081 | DCTELEM *dataptr; | ||
1082 | DCTELEM *wsptr; | ||
1083 | JSAMPROW elemptr; | ||
1084 | int ctr; | ||
1085 | SHIFT_TEMPS | ||
1086 | |||
1087 | /* Pass 1: process rows. */ | ||
1088 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
1089 | /* we scale the results further by 2 as part of output adaption */ | ||
1090 | /* scaling for different DCT size. */ | ||
1091 | /* cK represents sqrt(2) * cos(K*pi/20). */ | ||
1092 | |||
1093 | dataptr = data; | ||
1094 | ctr = 0; | ||
1095 | for (;;) { | ||
1096 | elemptr = sample_data[ctr] + start_col; | ||
1097 | |||
1098 | /* Even part */ | ||
1099 | |||
1100 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); | ||
1101 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); | ||
1102 | tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); | ||
1103 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); | ||
1104 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); | ||
1105 | |||
1106 | tmp10 = tmp0 + tmp4; | ||
1107 | tmp13 = tmp0 - tmp4; | ||
1108 | tmp11 = tmp1 + tmp3; | ||
1109 | tmp14 = tmp1 - tmp3; | ||
1110 | |||
1111 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); | ||
1112 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); | ||
1113 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); | ||
1114 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); | ||
1115 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); | ||
1116 | |||
1117 | /* Apply unsigned->signed conversion */ | ||
1118 | dataptr[0] = (DCTELEM) | ||
1119 | ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1); | ||
1120 | tmp12 += tmp12; | ||
1121 | dataptr[4] = (DCTELEM) | ||
1122 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ | ||
1123 | MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ | ||
1124 | CONST_BITS-1); | ||
1125 | tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ | ||
1126 | dataptr[2] = (DCTELEM) | ||
1127 | DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ | ||
1128 | CONST_BITS-1); | ||
1129 | dataptr[6] = (DCTELEM) | ||
1130 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ | ||
1131 | CONST_BITS-1); | ||
1132 | |||
1133 | /* Odd part */ | ||
1134 | |||
1135 | tmp10 = tmp0 + tmp4; | ||
1136 | tmp11 = tmp1 - tmp3; | ||
1137 | dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1); | ||
1138 | tmp2 <<= CONST_BITS; | ||
1139 | dataptr[1] = (DCTELEM) | ||
1140 | DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ | ||
1141 | MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ | ||
1142 | MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ | ||
1143 | MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ | ||
1144 | CONST_BITS-1); | ||
1145 | tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ | ||
1146 | MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ | ||
1147 | tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ | ||
1148 | (tmp11 << (CONST_BITS - 1)) - tmp2; | ||
1149 | dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1); | ||
1150 | dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1); | ||
1151 | |||
1152 | ctr++; | ||
1153 | |||
1154 | if (ctr != DCTSIZE) { | ||
1155 | if (ctr == 10) | ||
1156 | break; /* Done. */ | ||
1157 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
1158 | } else | ||
1159 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
1160 | } | ||
1161 | |||
1162 | /* Pass 2: process columns. | ||
1163 | * We leave the results scaled up by an overall factor of 8. | ||
1164 | * We must also scale the output by (8/10)**2 = 16/25, which we partially | ||
1165 | * fold into the constant multipliers and final/initial shifting: | ||
1166 | * cK now represents sqrt(2) * cos(K*pi/20) * 32/25. | ||
1167 | */ | ||
1168 | |||
1169 | dataptr = data; | ||
1170 | wsptr = workspace; | ||
1171 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
1172 | /* Even part */ | ||
1173 | |||
1174 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; | ||
1175 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; | ||
1176 | tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; | ||
1177 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; | ||
1178 | tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; | ||
1179 | |||
1180 | tmp10 = tmp0 + tmp4; | ||
1181 | tmp13 = tmp0 - tmp4; | ||
1182 | tmp11 = tmp1 + tmp3; | ||
1183 | tmp14 = tmp1 - tmp3; | ||
1184 | |||
1185 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; | ||
1186 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; | ||
1187 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; | ||
1188 | tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; | ||
1189 | tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; | ||
1190 | |||
1191 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
1192 | DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ | ||
1193 | CONST_BITS+2); | ||
1194 | tmp12 += tmp12; | ||
1195 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
1196 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ | ||
1197 | MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ | ||
1198 | CONST_BITS+2); | ||
1199 | tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ | ||
1200 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
1201 | DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ | ||
1202 | CONST_BITS+2); | ||
1203 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
1204 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ | ||
1205 | CONST_BITS+2); | ||
1206 | |||
1207 | /* Odd part */ | ||
1208 | |||
1209 | tmp10 = tmp0 + tmp4; | ||
1210 | tmp11 = tmp1 - tmp3; | ||
1211 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
1212 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ | ||
1213 | CONST_BITS+2); | ||
1214 | tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ | ||
1215 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
1216 | DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ | ||
1217 | MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ | ||
1218 | MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ | ||
1219 | MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ | ||
1220 | CONST_BITS+2); | ||
1221 | tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ | ||
1222 | MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ | ||
1223 | tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ | ||
1224 | MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ | ||
1225 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2); | ||
1226 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2); | ||
1227 | |||
1228 | dataptr++; /* advance pointer to next column */ | ||
1229 | wsptr++; /* advance pointer to next column */ | ||
1230 | } | ||
1231 | } | ||
1232 | |||
1233 | |||
1234 | /* | ||
1235 | * Perform the forward DCT on an 11x11 sample block. | ||
1236 | */ | ||
1237 | |||
1238 | GLOBAL(void) | ||
1239 | jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
1240 | { | ||
1241 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; | ||
1242 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | ||
1243 | INT32 z1, z2, z3; | ||
1244 | DCTELEM workspace[8*3]; | ||
1245 | DCTELEM *dataptr; | ||
1246 | DCTELEM *wsptr; | ||
1247 | JSAMPROW elemptr; | ||
1248 | int ctr; | ||
1249 | SHIFT_TEMPS | ||
1250 | |||
1251 | /* Pass 1: process rows. */ | ||
1252 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
1253 | /* we scale the results further by 2 as part of output adaption */ | ||
1254 | /* scaling for different DCT size. */ | ||
1255 | /* cK represents sqrt(2) * cos(K*pi/22). */ | ||
1256 | |||
1257 | dataptr = data; | ||
1258 | ctr = 0; | ||
1259 | for (;;) { | ||
1260 | elemptr = sample_data[ctr] + start_col; | ||
1261 | |||
1262 | /* Even part */ | ||
1263 | |||
1264 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]); | ||
1265 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]); | ||
1266 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]); | ||
1267 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]); | ||
1268 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]); | ||
1269 | tmp5 = GETJSAMPLE(elemptr[5]); | ||
1270 | |||
1271 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]); | ||
1272 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]); | ||
1273 | tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]); | ||
1274 | tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]); | ||
1275 | tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]); | ||
1276 | |||
1277 | /* Apply unsigned->signed conversion */ | ||
1278 | dataptr[0] = (DCTELEM) | ||
1279 | ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1); | ||
1280 | tmp5 += tmp5; | ||
1281 | tmp0 -= tmp5; | ||
1282 | tmp1 -= tmp5; | ||
1283 | tmp2 -= tmp5; | ||
1284 | tmp3 -= tmp5; | ||
1285 | tmp4 -= tmp5; | ||
1286 | z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) + /* c2 */ | ||
1287 | MULTIPLY(tmp2 + tmp4, FIX(0.201263574)); /* c10 */ | ||
1288 | z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931)); /* c6 */ | ||
1289 | z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156)); /* c4 */ | ||
1290 | dataptr[2] = (DCTELEM) | ||
1291 | DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */ | ||
1292 | - MULTIPLY(tmp4, FIX(1.390975730)), /* c4+c10 */ | ||
1293 | CONST_BITS-1); | ||
1294 | dataptr[4] = (DCTELEM) | ||
1295 | DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */ | ||
1296 | - MULTIPLY(tmp2, FIX(1.356927976)) /* c2 */ | ||
1297 | + MULTIPLY(tmp4, FIX(0.587485545)), /* c8 */ | ||
1298 | CONST_BITS-1); | ||
1299 | dataptr[6] = (DCTELEM) | ||
1300 | DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */ | ||
1301 | - MULTIPLY(tmp2, FIX(0.788749120)), /* c8+c10 */ | ||
1302 | CONST_BITS-1); | ||
1303 | |||
1304 | /* Odd part */ | ||
1305 | |||
1306 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905)); /* c3 */ | ||
1307 | tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298)); /* c5 */ | ||
1308 | tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576)); /* c7 */ | ||
1309 | tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */ | ||
1310 | + MULTIPLY(tmp14, FIX(0.398430003)); /* c9 */ | ||
1311 | tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576)); /* -c7 */ | ||
1312 | tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907)); /* -c1 */ | ||
1313 | tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */ | ||
1314 | - MULTIPLY(tmp14, FIX(1.068791298)); /* c5 */ | ||
1315 | tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003)); /* c9 */ | ||
1316 | tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */ | ||
1317 | + MULTIPLY(tmp14, FIX(1.399818907)); /* c1 */ | ||
1318 | tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */ | ||
1319 | - MULTIPLY(tmp14, FIX(1.286413905)); /* c3 */ | ||
1320 | |||
1321 | dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1); | ||
1322 | dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1); | ||
1323 | dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1); | ||
1324 | dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1); | ||
1325 | |||
1326 | ctr++; | ||
1327 | |||
1328 | if (ctr != DCTSIZE) { | ||
1329 | if (ctr == 11) | ||
1330 | break; /* Done. */ | ||
1331 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
1332 | } else | ||
1333 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
1334 | } | ||
1335 | |||
1336 | /* Pass 2: process columns. | ||
1337 | * We leave the results scaled up by an overall factor of 8. | ||
1338 | * We must also scale the output by (8/11)**2 = 64/121, which we partially | ||
1339 | * fold into the constant multipliers and final/initial shifting: | ||
1340 | * cK now represents sqrt(2) * cos(K*pi/22) * 128/121. | ||
1341 | */ | ||
1342 | |||
1343 | dataptr = data; | ||
1344 | wsptr = workspace; | ||
1345 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
1346 | /* Even part */ | ||
1347 | |||
1348 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2]; | ||
1349 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1]; | ||
1350 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0]; | ||
1351 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7]; | ||
1352 | tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6]; | ||
1353 | tmp5 = dataptr[DCTSIZE*5]; | ||
1354 | |||
1355 | tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2]; | ||
1356 | tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1]; | ||
1357 | tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0]; | ||
1358 | tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7]; | ||
1359 | tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6]; | ||
1360 | |||
1361 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
1362 | DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5, | ||
1363 | FIX(1.057851240)), /* 128/121 */ | ||
1364 | CONST_BITS+2); | ||
1365 | tmp5 += tmp5; | ||
1366 | tmp0 -= tmp5; | ||
1367 | tmp1 -= tmp5; | ||
1368 | tmp2 -= tmp5; | ||
1369 | tmp3 -= tmp5; | ||
1370 | tmp4 -= tmp5; | ||
1371 | z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) + /* c2 */ | ||
1372 | MULTIPLY(tmp2 + tmp4, FIX(0.212906922)); /* c10 */ | ||
1373 | z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713)); /* c6 */ | ||
1374 | z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479)); /* c4 */ | ||
1375 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
1376 | DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */ | ||
1377 | - MULTIPLY(tmp4, FIX(1.471445400)), /* c4+c10 */ | ||
1378 | CONST_BITS+2); | ||
1379 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
1380 | DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */ | ||
1381 | - MULTIPLY(tmp2, FIX(1.435427942)) /* c2 */ | ||
1382 | + MULTIPLY(tmp4, FIX(0.621472312)), /* c8 */ | ||
1383 | CONST_BITS+2); | ||
1384 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
1385 | DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */ | ||
1386 | - MULTIPLY(tmp2, FIX(0.834379234)), /* c8+c10 */ | ||
1387 | CONST_BITS+2); | ||
1388 | |||
1389 | /* Odd part */ | ||
1390 | |||
1391 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544)); /* c3 */ | ||
1392 | tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199)); /* c5 */ | ||
1393 | tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568)); /* c7 */ | ||
1394 | tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */ | ||
1395 | + MULTIPLY(tmp14, FIX(0.421479672)); /* c9 */ | ||
1396 | tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568)); /* -c7 */ | ||
1397 | tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167)); /* -c1 */ | ||
1398 | tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */ | ||
1399 | - MULTIPLY(tmp14, FIX(1.130622199)); /* c5 */ | ||
1400 | tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672)); /* c9 */ | ||
1401 | tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */ | ||
1402 | + MULTIPLY(tmp14, FIX(1.480800167)); /* c1 */ | ||
1403 | tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */ | ||
1404 | - MULTIPLY(tmp14, FIX(1.360834544)); /* c3 */ | ||
1405 | |||
1406 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); | ||
1407 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); | ||
1408 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); | ||
1409 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); | ||
1410 | |||
1411 | dataptr++; /* advance pointer to next column */ | ||
1412 | wsptr++; /* advance pointer to next column */ | ||
1413 | } | ||
1414 | } | ||
1415 | |||
1416 | |||
1417 | /* | ||
1418 | * Perform the forward DCT on a 12x12 sample block. | ||
1419 | */ | ||
1420 | |||
1421 | GLOBAL(void) | ||
1422 | jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
1423 | { | ||
1424 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; | ||
1425 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | ||
1426 | DCTELEM workspace[8*4]; | ||
1427 | DCTELEM *dataptr; | ||
1428 | DCTELEM *wsptr; | ||
1429 | JSAMPROW elemptr; | ||
1430 | int ctr; | ||
1431 | SHIFT_TEMPS | ||
1432 | |||
1433 | /* Pass 1: process rows. */ | ||
1434 | /* Note results are scaled up by sqrt(8) compared to a true DCT. */ | ||
1435 | /* cK represents sqrt(2) * cos(K*pi/24). */ | ||
1436 | |||
1437 | dataptr = data; | ||
1438 | ctr = 0; | ||
1439 | for (;;) { | ||
1440 | elemptr = sample_data[ctr] + start_col; | ||
1441 | |||
1442 | /* Even part */ | ||
1443 | |||
1444 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); | ||
1445 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); | ||
1446 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); | ||
1447 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); | ||
1448 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); | ||
1449 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); | ||
1450 | |||
1451 | tmp10 = tmp0 + tmp5; | ||
1452 | tmp13 = tmp0 - tmp5; | ||
1453 | tmp11 = tmp1 + tmp4; | ||
1454 | tmp14 = tmp1 - tmp4; | ||
1455 | tmp12 = tmp2 + tmp3; | ||
1456 | tmp15 = tmp2 - tmp3; | ||
1457 | |||
1458 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); | ||
1459 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); | ||
1460 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); | ||
1461 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); | ||
1462 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); | ||
1463 | tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); | ||
1464 | |||
1465 | /* Apply unsigned->signed conversion */ | ||
1466 | dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE); | ||
1467 | dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15); | ||
1468 | dataptr[4] = (DCTELEM) | ||
1469 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ | ||
1470 | CONST_BITS); | ||
1471 | dataptr[2] = (DCTELEM) | ||
1472 | DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ | ||
1473 | CONST_BITS); | ||
1474 | |||
1475 | /* Odd part */ | ||
1476 | |||
1477 | tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ | ||
1478 | tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ | ||
1479 | tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ | ||
1480 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ | ||
1481 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ | ||
1482 | tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ | ||
1483 | + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ | ||
1484 | tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ | ||
1485 | tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ | ||
1486 | + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ | ||
1487 | tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ | ||
1488 | - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ | ||
1489 | tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ | ||
1490 | - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ | ||
1491 | |||
1492 | dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS); | ||
1493 | dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS); | ||
1494 | dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS); | ||
1495 | dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS); | ||
1496 | |||
1497 | ctr++; | ||
1498 | |||
1499 | if (ctr != DCTSIZE) { | ||
1500 | if (ctr == 12) | ||
1501 | break; /* Done. */ | ||
1502 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
1503 | } else | ||
1504 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
1505 | } | ||
1506 | |||
1507 | /* Pass 2: process columns. | ||
1508 | * We leave the results scaled up by an overall factor of 8. | ||
1509 | * We must also scale the output by (8/12)**2 = 4/9, which we partially | ||
1510 | * fold into the constant multipliers and final shifting: | ||
1511 | * cK now represents sqrt(2) * cos(K*pi/24) * 8/9. | ||
1512 | */ | ||
1513 | |||
1514 | dataptr = data; | ||
1515 | wsptr = workspace; | ||
1516 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
1517 | /* Even part */ | ||
1518 | |||
1519 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; | ||
1520 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; | ||
1521 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; | ||
1522 | tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; | ||
1523 | tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; | ||
1524 | tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; | ||
1525 | |||
1526 | tmp10 = tmp0 + tmp5; | ||
1527 | tmp13 = tmp0 - tmp5; | ||
1528 | tmp11 = tmp1 + tmp4; | ||
1529 | tmp14 = tmp1 - tmp4; | ||
1530 | tmp12 = tmp2 + tmp3; | ||
1531 | tmp15 = tmp2 - tmp3; | ||
1532 | |||
1533 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; | ||
1534 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; | ||
1535 | tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; | ||
1536 | tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; | ||
1537 | tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; | ||
1538 | tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; | ||
1539 | |||
1540 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
1541 | DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ | ||
1542 | CONST_BITS+1); | ||
1543 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
1544 | DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ | ||
1545 | CONST_BITS+1); | ||
1546 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
1547 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ | ||
1548 | CONST_BITS+1); | ||
1549 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
1550 | DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ | ||
1551 | MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ | ||
1552 | CONST_BITS+1); | ||
1553 | |||
1554 | /* Odd part */ | ||
1555 | |||
1556 | tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ | ||
1557 | tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ | ||
1558 | tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ | ||
1559 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ | ||
1560 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ | ||
1561 | tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ | ||
1562 | + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ | ||
1563 | tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ | ||
1564 | tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ | ||
1565 | + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ | ||
1566 | tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ | ||
1567 | - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ | ||
1568 | tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ | ||
1569 | - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ | ||
1570 | |||
1571 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1); | ||
1572 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1); | ||
1573 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1); | ||
1574 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1); | ||
1575 | |||
1576 | dataptr++; /* advance pointer to next column */ | ||
1577 | wsptr++; /* advance pointer to next column */ | ||
1578 | } | ||
1579 | } | ||
1580 | |||
1581 | |||
1582 | /* | ||
1583 | * Perform the forward DCT on a 13x13 sample block. | ||
1584 | */ | ||
1585 | |||
1586 | GLOBAL(void) | ||
1587 | jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
1588 | { | ||
1589 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | ||
1590 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | ||
1591 | INT32 z1, z2; | ||
1592 | DCTELEM workspace[8*5]; | ||
1593 | DCTELEM *dataptr; | ||
1594 | DCTELEM *wsptr; | ||
1595 | JSAMPROW elemptr; | ||
1596 | int ctr; | ||
1597 | SHIFT_TEMPS | ||
1598 | |||
1599 | /* Pass 1: process rows. */ | ||
1600 | /* Note results are scaled up by sqrt(8) compared to a true DCT. */ | ||
1601 | /* cK represents sqrt(2) * cos(K*pi/26). */ | ||
1602 | |||
1603 | dataptr = data; | ||
1604 | ctr = 0; | ||
1605 | for (;;) { | ||
1606 | elemptr = sample_data[ctr] + start_col; | ||
1607 | |||
1608 | /* Even part */ | ||
1609 | |||
1610 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]); | ||
1611 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]); | ||
1612 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]); | ||
1613 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]); | ||
1614 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]); | ||
1615 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]); | ||
1616 | tmp6 = GETJSAMPLE(elemptr[6]); | ||
1617 | |||
1618 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]); | ||
1619 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]); | ||
1620 | tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]); | ||
1621 | tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]); | ||
1622 | tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]); | ||
1623 | tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]); | ||
1624 | |||
1625 | /* Apply unsigned->signed conversion */ | ||
1626 | dataptr[0] = (DCTELEM) | ||
1627 | (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE); | ||
1628 | tmp6 += tmp6; | ||
1629 | tmp0 -= tmp6; | ||
1630 | tmp1 -= tmp6; | ||
1631 | tmp2 -= tmp6; | ||
1632 | tmp3 -= tmp6; | ||
1633 | tmp4 -= tmp6; | ||
1634 | tmp5 -= tmp6; | ||
1635 | dataptr[2] = (DCTELEM) | ||
1636 | DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) + /* c2 */ | ||
1637 | MULTIPLY(tmp1, FIX(1.058554052)) + /* c6 */ | ||
1638 | MULTIPLY(tmp2, FIX(0.501487041)) - /* c10 */ | ||
1639 | MULTIPLY(tmp3, FIX(0.170464608)) - /* c12 */ | ||
1640 | MULTIPLY(tmp4, FIX(0.803364869)) - /* c8 */ | ||
1641 | MULTIPLY(tmp5, FIX(1.252223920)), /* c4 */ | ||
1642 | CONST_BITS); | ||
1643 | z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */ | ||
1644 | MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */ | ||
1645 | MULTIPLY(tmp1 - tmp5, FIX(0.316450131)); /* (c8-c12)/2 */ | ||
1646 | z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */ | ||
1647 | MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */ | ||
1648 | MULTIPLY(tmp1 + tmp5, FIX(0.486914739)); /* (c8+c12)/2 */ | ||
1649 | |||
1650 | dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS); | ||
1651 | dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS); | ||
1652 | |||
1653 | /* Odd part */ | ||
1654 | |||
1655 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651)); /* c3 */ | ||
1656 | tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945)); /* c5 */ | ||
1657 | tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) + /* c7 */ | ||
1658 | MULTIPLY(tmp14 + tmp15, FIX(0.338443458)); /* c11 */ | ||
1659 | tmp0 = tmp1 + tmp2 + tmp3 - | ||
1660 | MULTIPLY(tmp10, FIX(2.020082300)) + /* c3+c5+c7-c1 */ | ||
1661 | MULTIPLY(tmp14, FIX(0.318774355)); /* c9-c11 */ | ||
1662 | tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) - /* c7 */ | ||
1663 | MULTIPLY(tmp11 + tmp12, FIX(0.338443458)); /* c11 */ | ||
1664 | tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */ | ||
1665 | tmp1 += tmp4 + tmp5 + | ||
1666 | MULTIPLY(tmp11, FIX(0.837223564)) - /* c5+c9+c11-c3 */ | ||
1667 | MULTIPLY(tmp14, FIX(2.341699410)); /* c1+c7 */ | ||
1668 | tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */ | ||
1669 | tmp2 += tmp4 + tmp6 - | ||
1670 | MULTIPLY(tmp12, FIX(1.572116027)) + /* c1+c5-c9-c11 */ | ||
1671 | MULTIPLY(tmp15, FIX(2.260109708)); /* c3+c7 */ | ||
1672 | tmp3 += tmp5 + tmp6 + | ||
1673 | MULTIPLY(tmp13, FIX(2.205608352)) - /* c3+c5+c9-c7 */ | ||
1674 | MULTIPLY(tmp15, FIX(1.742345811)); /* c1+c11 */ | ||
1675 | |||
1676 | dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); | ||
1677 | dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); | ||
1678 | dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); | ||
1679 | dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); | ||
1680 | |||
1681 | ctr++; | ||
1682 | |||
1683 | if (ctr != DCTSIZE) { | ||
1684 | if (ctr == 13) | ||
1685 | break; /* Done. */ | ||
1686 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
1687 | } else | ||
1688 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
1689 | } | ||
1690 | |||
1691 | /* Pass 2: process columns. | ||
1692 | * We leave the results scaled up by an overall factor of 8. | ||
1693 | * We must also scale the output by (8/13)**2 = 64/169, which we partially | ||
1694 | * fold into the constant multipliers and final shifting: | ||
1695 | * cK now represents sqrt(2) * cos(K*pi/26) * 128/169. | ||
1696 | */ | ||
1697 | |||
1698 | dataptr = data; | ||
1699 | wsptr = workspace; | ||
1700 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
1701 | /* Even part */ | ||
1702 | |||
1703 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4]; | ||
1704 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3]; | ||
1705 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2]; | ||
1706 | tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1]; | ||
1707 | tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0]; | ||
1708 | tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7]; | ||
1709 | tmp6 = dataptr[DCTSIZE*6]; | ||
1710 | |||
1711 | tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4]; | ||
1712 | tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3]; | ||
1713 | tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2]; | ||
1714 | tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1]; | ||
1715 | tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0]; | ||
1716 | tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7]; | ||
1717 | |||
1718 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
1719 | DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6, | ||
1720 | FIX(0.757396450)), /* 128/169 */ | ||
1721 | CONST_BITS+1); | ||
1722 | tmp6 += tmp6; | ||
1723 | tmp0 -= tmp6; | ||
1724 | tmp1 -= tmp6; | ||
1725 | tmp2 -= tmp6; | ||
1726 | tmp3 -= tmp6; | ||
1727 | tmp4 -= tmp6; | ||
1728 | tmp5 -= tmp6; | ||
1729 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
1730 | DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) + /* c2 */ | ||
1731 | MULTIPLY(tmp1, FIX(0.801745081)) + /* c6 */ | ||
1732 | MULTIPLY(tmp2, FIX(0.379824504)) - /* c10 */ | ||
1733 | MULTIPLY(tmp3, FIX(0.129109289)) - /* c12 */ | ||
1734 | MULTIPLY(tmp4, FIX(0.608465700)) - /* c8 */ | ||
1735 | MULTIPLY(tmp5, FIX(0.948429952)), /* c4 */ | ||
1736 | CONST_BITS+1); | ||
1737 | z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */ | ||
1738 | MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */ | ||
1739 | MULTIPLY(tmp1 - tmp5, FIX(0.239678205)); /* (c8-c12)/2 */ | ||
1740 | z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */ | ||
1741 | MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */ | ||
1742 | MULTIPLY(tmp1 + tmp5, FIX(0.368787494)); /* (c8+c12)/2 */ | ||
1743 | |||
1744 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1); | ||
1745 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1); | ||
1746 | |||
1747 | /* Odd part */ | ||
1748 | |||
1749 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908)); /* c3 */ | ||
1750 | tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751)); /* c5 */ | ||
1751 | tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) + /* c7 */ | ||
1752 | MULTIPLY(tmp14 + tmp15, FIX(0.256335874)); /* c11 */ | ||
1753 | tmp0 = tmp1 + tmp2 + tmp3 - | ||
1754 | MULTIPLY(tmp10, FIX(1.530003162)) + /* c3+c5+c7-c1 */ | ||
1755 | MULTIPLY(tmp14, FIX(0.241438564)); /* c9-c11 */ | ||
1756 | tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) - /* c7 */ | ||
1757 | MULTIPLY(tmp11 + tmp12, FIX(0.256335874)); /* c11 */ | ||
1758 | tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */ | ||
1759 | tmp1 += tmp4 + tmp5 + | ||
1760 | MULTIPLY(tmp11, FIX(0.634110155)) - /* c5+c9+c11-c3 */ | ||
1761 | MULTIPLY(tmp14, FIX(1.773594819)); /* c1+c7 */ | ||
1762 | tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */ | ||
1763 | tmp2 += tmp4 + tmp6 - | ||
1764 | MULTIPLY(tmp12, FIX(1.190715098)) + /* c1+c5-c9-c11 */ | ||
1765 | MULTIPLY(tmp15, FIX(1.711799069)); /* c3+c7 */ | ||
1766 | tmp3 += tmp5 + tmp6 + | ||
1767 | MULTIPLY(tmp13, FIX(1.670519935)) - /* c3+c5+c9-c7 */ | ||
1768 | MULTIPLY(tmp15, FIX(1.319646532)); /* c1+c11 */ | ||
1769 | |||
1770 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1); | ||
1771 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1); | ||
1772 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1); | ||
1773 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1); | ||
1774 | |||
1775 | dataptr++; /* advance pointer to next column */ | ||
1776 | wsptr++; /* advance pointer to next column */ | ||
1777 | } | ||
1778 | } | ||
1779 | |||
1780 | |||
1781 | /* | ||
1782 | * Perform the forward DCT on a 14x14 sample block. | ||
1783 | */ | ||
1784 | |||
1785 | GLOBAL(void) | ||
1786 | jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
1787 | { | ||
1788 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | ||
1789 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | ||
1790 | DCTELEM workspace[8*6]; | ||
1791 | DCTELEM *dataptr; | ||
1792 | DCTELEM *wsptr; | ||
1793 | JSAMPROW elemptr; | ||
1794 | int ctr; | ||
1795 | SHIFT_TEMPS | ||
1796 | |||
1797 | /* Pass 1: process rows. */ | ||
1798 | /* Note results are scaled up by sqrt(8) compared to a true DCT. */ | ||
1799 | /* cK represents sqrt(2) * cos(K*pi/28). */ | ||
1800 | |||
1801 | dataptr = data; | ||
1802 | ctr = 0; | ||
1803 | for (;;) { | ||
1804 | elemptr = sample_data[ctr] + start_col; | ||
1805 | |||
1806 | /* Even part */ | ||
1807 | |||
1808 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); | ||
1809 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); | ||
1810 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); | ||
1811 | tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); | ||
1812 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); | ||
1813 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); | ||
1814 | tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); | ||
1815 | |||
1816 | tmp10 = tmp0 + tmp6; | ||
1817 | tmp14 = tmp0 - tmp6; | ||
1818 | tmp11 = tmp1 + tmp5; | ||
1819 | tmp15 = tmp1 - tmp5; | ||
1820 | tmp12 = tmp2 + tmp4; | ||
1821 | tmp16 = tmp2 - tmp4; | ||
1822 | |||
1823 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); | ||
1824 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); | ||
1825 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); | ||
1826 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); | ||
1827 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); | ||
1828 | tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); | ||
1829 | tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); | ||
1830 | |||
1831 | /* Apply unsigned->signed conversion */ | ||
1832 | dataptr[0] = (DCTELEM) | ||
1833 | (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE); | ||
1834 | tmp13 += tmp13; | ||
1835 | dataptr[4] = (DCTELEM) | ||
1836 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ | ||
1837 | MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ | ||
1838 | MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ | ||
1839 | CONST_BITS); | ||
1840 | |||
1841 | tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ | ||
1842 | |||
1843 | dataptr[2] = (DCTELEM) | ||
1844 | DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ | ||
1845 | + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ | ||
1846 | CONST_BITS); | ||
1847 | dataptr[6] = (DCTELEM) | ||
1848 | DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ | ||
1849 | - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ | ||
1850 | CONST_BITS); | ||
1851 | |||
1852 | /* Odd part */ | ||
1853 | |||
1854 | tmp10 = tmp1 + tmp2; | ||
1855 | tmp11 = tmp5 - tmp4; | ||
1856 | dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6); | ||
1857 | tmp3 <<= CONST_BITS; | ||
1858 | tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ | ||
1859 | tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ | ||
1860 | tmp10 += tmp11 - tmp3; | ||
1861 | tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ | ||
1862 | MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ | ||
1863 | dataptr[5] = (DCTELEM) | ||
1864 | DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ | ||
1865 | + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ | ||
1866 | CONST_BITS); | ||
1867 | tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ | ||
1868 | MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ | ||
1869 | dataptr[3] = (DCTELEM) | ||
1870 | DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ | ||
1871 | - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ | ||
1872 | CONST_BITS); | ||
1873 | dataptr[1] = (DCTELEM) | ||
1874 | DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - | ||
1875 | MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ | ||
1876 | CONST_BITS); | ||
1877 | |||
1878 | ctr++; | ||
1879 | |||
1880 | if (ctr != DCTSIZE) { | ||
1881 | if (ctr == 14) | ||
1882 | break; /* Done. */ | ||
1883 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
1884 | } else | ||
1885 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
1886 | } | ||
1887 | |||
1888 | /* Pass 2: process columns. | ||
1889 | * We leave the results scaled up by an overall factor of 8. | ||
1890 | * We must also scale the output by (8/14)**2 = 16/49, which we partially | ||
1891 | * fold into the constant multipliers and final shifting: | ||
1892 | * cK now represents sqrt(2) * cos(K*pi/28) * 32/49. | ||
1893 | */ | ||
1894 | |||
1895 | dataptr = data; | ||
1896 | wsptr = workspace; | ||
1897 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
1898 | /* Even part */ | ||
1899 | |||
1900 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; | ||
1901 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; | ||
1902 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; | ||
1903 | tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; | ||
1904 | tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; | ||
1905 | tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; | ||
1906 | tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; | ||
1907 | |||
1908 | tmp10 = tmp0 + tmp6; | ||
1909 | tmp14 = tmp0 - tmp6; | ||
1910 | tmp11 = tmp1 + tmp5; | ||
1911 | tmp15 = tmp1 - tmp5; | ||
1912 | tmp12 = tmp2 + tmp4; | ||
1913 | tmp16 = tmp2 - tmp4; | ||
1914 | |||
1915 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; | ||
1916 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; | ||
1917 | tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; | ||
1918 | tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; | ||
1919 | tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; | ||
1920 | tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; | ||
1921 | tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; | ||
1922 | |||
1923 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
1924 | DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, | ||
1925 | FIX(0.653061224)), /* 32/49 */ | ||
1926 | CONST_BITS+1); | ||
1927 | tmp13 += tmp13; | ||
1928 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
1929 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ | ||
1930 | MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ | ||
1931 | MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ | ||
1932 | CONST_BITS+1); | ||
1933 | |||
1934 | tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ | ||
1935 | |||
1936 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
1937 | DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ | ||
1938 | + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ | ||
1939 | CONST_BITS+1); | ||
1940 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
1941 | DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ | ||
1942 | - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ | ||
1943 | CONST_BITS+1); | ||
1944 | |||
1945 | /* Odd part */ | ||
1946 | |||
1947 | tmp10 = tmp1 + tmp2; | ||
1948 | tmp11 = tmp5 - tmp4; | ||
1949 | dataptr[DCTSIZE*7] = (DCTELEM) | ||
1950 | DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, | ||
1951 | FIX(0.653061224)), /* 32/49 */ | ||
1952 | CONST_BITS+1); | ||
1953 | tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ | ||
1954 | tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ | ||
1955 | tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ | ||
1956 | tmp10 += tmp11 - tmp3; | ||
1957 | tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ | ||
1958 | MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ | ||
1959 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
1960 | DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ | ||
1961 | + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ | ||
1962 | CONST_BITS+1); | ||
1963 | tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ | ||
1964 | MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ | ||
1965 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
1966 | DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ | ||
1967 | - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ | ||
1968 | CONST_BITS+1); | ||
1969 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
1970 | DESCALE(tmp11 + tmp12 + tmp3 | ||
1971 | - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ | ||
1972 | - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ | ||
1973 | CONST_BITS+1); | ||
1974 | |||
1975 | dataptr++; /* advance pointer to next column */ | ||
1976 | wsptr++; /* advance pointer to next column */ | ||
1977 | } | ||
1978 | } | ||
1979 | |||
1980 | |||
1981 | /* | ||
1982 | * Perform the forward DCT on a 15x15 sample block. | ||
1983 | */ | ||
1984 | |||
1985 | GLOBAL(void) | ||
1986 | jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
1987 | { | ||
1988 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | ||
1989 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | ||
1990 | INT32 z1, z2, z3; | ||
1991 | DCTELEM workspace[8*7]; | ||
1992 | DCTELEM *dataptr; | ||
1993 | DCTELEM *wsptr; | ||
1994 | JSAMPROW elemptr; | ||
1995 | int ctr; | ||
1996 | SHIFT_TEMPS | ||
1997 | |||
1998 | /* Pass 1: process rows. */ | ||
1999 | /* Note results are scaled up by sqrt(8) compared to a true DCT. */ | ||
2000 | /* cK represents sqrt(2) * cos(K*pi/30). */ | ||
2001 | |||
2002 | dataptr = data; | ||
2003 | ctr = 0; | ||
2004 | for (;;) { | ||
2005 | elemptr = sample_data[ctr] + start_col; | ||
2006 | |||
2007 | /* Even part */ | ||
2008 | |||
2009 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]); | ||
2010 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]); | ||
2011 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]); | ||
2012 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]); | ||
2013 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]); | ||
2014 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]); | ||
2015 | tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]); | ||
2016 | tmp7 = GETJSAMPLE(elemptr[7]); | ||
2017 | |||
2018 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]); | ||
2019 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]); | ||
2020 | tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]); | ||
2021 | tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]); | ||
2022 | tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]); | ||
2023 | tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]); | ||
2024 | tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]); | ||
2025 | |||
2026 | z1 = tmp0 + tmp4 + tmp5; | ||
2027 | z2 = tmp1 + tmp3 + tmp6; | ||
2028 | z3 = tmp2 + tmp7; | ||
2029 | /* Apply unsigned->signed conversion */ | ||
2030 | dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE); | ||
2031 | z3 += z3; | ||
2032 | dataptr[6] = (DCTELEM) | ||
2033 | DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */ | ||
2034 | MULTIPLY(z2 - z3, FIX(0.437016024)), /* c12 */ | ||
2035 | CONST_BITS); | ||
2036 | tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; | ||
2037 | z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) - /* c2+c14 */ | ||
2038 | MULTIPLY(tmp6 - tmp2, FIX(2.238241955)); /* c4+c8 */ | ||
2039 | z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) - /* c8-c14 */ | ||
2040 | MULTIPLY(tmp0 - tmp2, FIX(0.091361227)); /* c2-c4 */ | ||
2041 | z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) + /* c2 */ | ||
2042 | MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) + /* c8 */ | ||
2043 | MULTIPLY(tmp1 - tmp4, FIX(0.790569415)); /* (c6+c12)/2 */ | ||
2044 | |||
2045 | dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS); | ||
2046 | dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS); | ||
2047 | |||
2048 | /* Odd part */ | ||
2049 | |||
2050 | tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, | ||
2051 | FIX(1.224744871)); /* c5 */ | ||
2052 | tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */ | ||
2053 | MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876)); /* c9 */ | ||
2054 | tmp12 = MULTIPLY(tmp12, FIX(1.224744871)); /* c5 */ | ||
2055 | tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) + /* c1 */ | ||
2056 | MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) + /* c3 */ | ||
2057 | MULTIPLY(tmp13 + tmp15, FIX(0.575212477)); /* c11 */ | ||
2058 | tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) - /* c7-c11 */ | ||
2059 | MULTIPLY(tmp14, FIX(0.513743148)) + /* c3-c9 */ | ||
2060 | MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12; /* c1+c13 */ | ||
2061 | tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) - /* -(c1-c7) */ | ||
2062 | MULTIPLY(tmp11, FIX(2.176250899)) - /* c3+c9 */ | ||
2063 | MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12; /* c11+c13 */ | ||
2064 | |||
2065 | dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS); | ||
2066 | dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS); | ||
2067 | dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS); | ||
2068 | dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS); | ||
2069 | |||
2070 | ctr++; | ||
2071 | |||
2072 | if (ctr != DCTSIZE) { | ||
2073 | if (ctr == 15) | ||
2074 | break; /* Done. */ | ||
2075 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
2076 | } else | ||
2077 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
2078 | } | ||
2079 | |||
2080 | /* Pass 2: process columns. | ||
2081 | * We leave the results scaled up by an overall factor of 8. | ||
2082 | * We must also scale the output by (8/15)**2 = 64/225, which we partially | ||
2083 | * fold into the constant multipliers and final shifting: | ||
2084 | * cK now represents sqrt(2) * cos(K*pi/30) * 256/225. | ||
2085 | */ | ||
2086 | |||
2087 | dataptr = data; | ||
2088 | wsptr = workspace; | ||
2089 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
2090 | /* Even part */ | ||
2091 | |||
2092 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6]; | ||
2093 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5]; | ||
2094 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4]; | ||
2095 | tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3]; | ||
2096 | tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2]; | ||
2097 | tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1]; | ||
2098 | tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0]; | ||
2099 | tmp7 = dataptr[DCTSIZE*7]; | ||
2100 | |||
2101 | tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6]; | ||
2102 | tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5]; | ||
2103 | tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4]; | ||
2104 | tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3]; | ||
2105 | tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2]; | ||
2106 | tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1]; | ||
2107 | tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0]; | ||
2108 | |||
2109 | z1 = tmp0 + tmp4 + tmp5; | ||
2110 | z2 = tmp1 + tmp3 + tmp6; | ||
2111 | z3 = tmp2 + tmp7; | ||
2112 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
2113 | DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */ | ||
2114 | CONST_BITS+2); | ||
2115 | z3 += z3; | ||
2116 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
2117 | DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */ | ||
2118 | MULTIPLY(z2 - z3, FIX(0.497227121)), /* c12 */ | ||
2119 | CONST_BITS+2); | ||
2120 | tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7; | ||
2121 | z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) - /* c2+c14 */ | ||
2122 | MULTIPLY(tmp6 - tmp2, FIX(2.546621957)); /* c4+c8 */ | ||
2123 | z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) - /* c8-c14 */ | ||
2124 | MULTIPLY(tmp0 - tmp2, FIX(0.103948774)); /* c2-c4 */ | ||
2125 | z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) + /* c2 */ | ||
2126 | MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) + /* c8 */ | ||
2127 | MULTIPLY(tmp1 - tmp4, FIX(0.899492312)); /* (c6+c12)/2 */ | ||
2128 | |||
2129 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2); | ||
2130 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2); | ||
2131 | |||
2132 | /* Odd part */ | ||
2133 | |||
2134 | tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16, | ||
2135 | FIX(1.393487498)); /* c5 */ | ||
2136 | tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */ | ||
2137 | MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187)); /* c9 */ | ||
2138 | tmp12 = MULTIPLY(tmp12, FIX(1.393487498)); /* c5 */ | ||
2139 | tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) + /* c1 */ | ||
2140 | MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) + /* c3 */ | ||
2141 | MULTIPLY(tmp13 + tmp15, FIX(0.654463974)); /* c11 */ | ||
2142 | tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) - /* c7-c11 */ | ||
2143 | MULTIPLY(tmp14, FIX(0.584525538)) + /* c3-c9 */ | ||
2144 | MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12; /* c1+c13 */ | ||
2145 | tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) - /* -(c1-c7) */ | ||
2146 | MULTIPLY(tmp11, FIX(2.476089912)) - /* c3+c9 */ | ||
2147 | MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12; /* c11+c13 */ | ||
2148 | |||
2149 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2); | ||
2150 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2); | ||
2151 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2); | ||
2152 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2); | ||
2153 | |||
2154 | dataptr++; /* advance pointer to next column */ | ||
2155 | wsptr++; /* advance pointer to next column */ | ||
2156 | } | ||
2157 | } | ||
2158 | |||
2159 | |||
2160 | /* | ||
2161 | * Perform the forward DCT on a 16x16 sample block. | ||
2162 | */ | ||
2163 | |||
2164 | GLOBAL(void) | ||
2165 | jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
2166 | { | ||
2167 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | ||
2168 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; | ||
2169 | DCTELEM workspace[DCTSIZE2]; | ||
2170 | DCTELEM *dataptr; | ||
2171 | DCTELEM *wsptr; | ||
2172 | JSAMPROW elemptr; | ||
2173 | int ctr; | ||
2174 | SHIFT_TEMPS | ||
2175 | |||
2176 | /* Pass 1: process rows. */ | ||
2177 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
2178 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
2179 | /* cK represents sqrt(2) * cos(K*pi/32). */ | ||
2180 | |||
2181 | dataptr = data; | ||
2182 | ctr = 0; | ||
2183 | for (;;) { | ||
2184 | elemptr = sample_data[ctr] + start_col; | ||
2185 | |||
2186 | /* Even part */ | ||
2187 | |||
2188 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); | ||
2189 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); | ||
2190 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); | ||
2191 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); | ||
2192 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); | ||
2193 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); | ||
2194 | tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); | ||
2195 | tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); | ||
2196 | |||
2197 | tmp10 = tmp0 + tmp7; | ||
2198 | tmp14 = tmp0 - tmp7; | ||
2199 | tmp11 = tmp1 + tmp6; | ||
2200 | tmp15 = tmp1 - tmp6; | ||
2201 | tmp12 = tmp2 + tmp5; | ||
2202 | tmp16 = tmp2 - tmp5; | ||
2203 | tmp13 = tmp3 + tmp4; | ||
2204 | tmp17 = tmp3 - tmp4; | ||
2205 | |||
2206 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); | ||
2207 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); | ||
2208 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); | ||
2209 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); | ||
2210 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); | ||
2211 | tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); | ||
2212 | tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); | ||
2213 | tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); | ||
2214 | |||
2215 | /* Apply unsigned->signed conversion */ | ||
2216 | dataptr[0] = (DCTELEM) | ||
2217 | ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); | ||
2218 | dataptr[4] = (DCTELEM) | ||
2219 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ | ||
2220 | MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ | ||
2221 | CONST_BITS-PASS1_BITS); | ||
2222 | |||
2223 | tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ | ||
2224 | MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ | ||
2225 | |||
2226 | dataptr[2] = (DCTELEM) | ||
2227 | DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ | ||
2228 | + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ | ||
2229 | CONST_BITS-PASS1_BITS); | ||
2230 | dataptr[6] = (DCTELEM) | ||
2231 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ | ||
2232 | - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ | ||
2233 | CONST_BITS-PASS1_BITS); | ||
2234 | |||
2235 | /* Odd part */ | ||
2236 | |||
2237 | tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ | ||
2238 | MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ | ||
2239 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ | ||
2240 | MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ | ||
2241 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ | ||
2242 | MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ | ||
2243 | tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ | ||
2244 | MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ | ||
2245 | tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ | ||
2246 | MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ | ||
2247 | tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ | ||
2248 | MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ | ||
2249 | tmp10 = tmp11 + tmp12 + tmp13 - | ||
2250 | MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ | ||
2251 | MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ | ||
2252 | tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ | ||
2253 | - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ | ||
2254 | tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ | ||
2255 | + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ | ||
2256 | tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ | ||
2257 | + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ | ||
2258 | |||
2259 | dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | ||
2260 | dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | ||
2261 | dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | ||
2262 | dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | ||
2263 | |||
2264 | ctr++; | ||
2265 | |||
2266 | if (ctr != DCTSIZE) { | ||
2267 | if (ctr == DCTSIZE * 2) | ||
2268 | break; /* Done. */ | ||
2269 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
2270 | } else | ||
2271 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
2272 | } | ||
2273 | |||
2274 | /* Pass 2: process columns. | ||
2275 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
2276 | * by an overall factor of 8. | ||
2277 | * We must also scale the output by (8/16)**2 = 1/2**2. | ||
2278 | */ | ||
2279 | |||
2280 | dataptr = data; | ||
2281 | wsptr = workspace; | ||
2282 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
2283 | /* Even part */ | ||
2284 | |||
2285 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; | ||
2286 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; | ||
2287 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; | ||
2288 | tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; | ||
2289 | tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; | ||
2290 | tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; | ||
2291 | tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; | ||
2292 | tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; | ||
2293 | |||
2294 | tmp10 = tmp0 + tmp7; | ||
2295 | tmp14 = tmp0 - tmp7; | ||
2296 | tmp11 = tmp1 + tmp6; | ||
2297 | tmp15 = tmp1 - tmp6; | ||
2298 | tmp12 = tmp2 + tmp5; | ||
2299 | tmp16 = tmp2 - tmp5; | ||
2300 | tmp13 = tmp3 + tmp4; | ||
2301 | tmp17 = tmp3 - tmp4; | ||
2302 | |||
2303 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; | ||
2304 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; | ||
2305 | tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; | ||
2306 | tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; | ||
2307 | tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; | ||
2308 | tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; | ||
2309 | tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; | ||
2310 | tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; | ||
2311 | |||
2312 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
2313 | DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2); | ||
2314 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
2315 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ | ||
2316 | MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ | ||
2317 | CONST_BITS+PASS1_BITS+2); | ||
2318 | |||
2319 | tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ | ||
2320 | MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ | ||
2321 | |||
2322 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
2323 | DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ | ||
2324 | + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+10 */ | ||
2325 | CONST_BITS+PASS1_BITS+2); | ||
2326 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
2327 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ | ||
2328 | - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ | ||
2329 | CONST_BITS+PASS1_BITS+2); | ||
2330 | |||
2331 | /* Odd part */ | ||
2332 | |||
2333 | tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ | ||
2334 | MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ | ||
2335 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ | ||
2336 | MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ | ||
2337 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ | ||
2338 | MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ | ||
2339 | tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ | ||
2340 | MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ | ||
2341 | tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ | ||
2342 | MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ | ||
2343 | tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ | ||
2344 | MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ | ||
2345 | tmp10 = tmp11 + tmp12 + tmp13 - | ||
2346 | MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ | ||
2347 | MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ | ||
2348 | tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ | ||
2349 | - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ | ||
2350 | tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ | ||
2351 | + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ | ||
2352 | tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ | ||
2353 | + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ | ||
2354 | |||
2355 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2); | ||
2356 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2); | ||
2357 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2); | ||
2358 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2); | ||
2359 | |||
2360 | dataptr++; /* advance pointer to next column */ | ||
2361 | wsptr++; /* advance pointer to next column */ | ||
2362 | } | ||
2363 | } | ||
2364 | |||
2365 | |||
2366 | /* | ||
2367 | * Perform the forward DCT on a 16x8 sample block. | ||
2368 | * | ||
2369 | * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). | ||
2370 | */ | ||
2371 | |||
2372 | GLOBAL(void) | ||
2373 | jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
2374 | { | ||
2375 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | ||
2376 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; | ||
2377 | INT32 z1; | ||
2378 | DCTELEM *dataptr; | ||
2379 | JSAMPROW elemptr; | ||
2380 | int ctr; | ||
2381 | SHIFT_TEMPS | ||
2382 | |||
2383 | /* Pass 1: process rows. */ | ||
2384 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
2385 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
2386 | /* 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). */ | ||
2387 | |||
2388 | dataptr = data; | ||
2389 | ctr = 0; | ||
2390 | for (ctr = 0; ctr < DCTSIZE; ctr++) { | ||
2391 | elemptr = sample_data[ctr] + start_col; | ||
2392 | |||
2393 | /* Even part */ | ||
2394 | |||
2395 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]); | ||
2396 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]); | ||
2397 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]); | ||
2398 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]); | ||
2399 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]); | ||
2400 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]); | ||
2401 | tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]); | ||
2402 | tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]); | ||
2403 | |||
2404 | tmp10 = tmp0 + tmp7; | ||
2405 | tmp14 = tmp0 - tmp7; | ||
2406 | tmp11 = tmp1 + tmp6; | ||
2407 | tmp15 = tmp1 - tmp6; | ||
2408 | tmp12 = tmp2 + tmp5; | ||
2409 | tmp16 = tmp2 - tmp5; | ||
2410 | tmp13 = tmp3 + tmp4; | ||
2411 | tmp17 = tmp3 - tmp4; | ||
2412 | |||
2413 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]); | ||
2414 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]); | ||
2415 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]); | ||
2416 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]); | ||
2417 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]); | ||
2418 | tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]); | ||
2419 | tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]); | ||
2420 | tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]); | ||
2421 | |||
2422 | /* Apply unsigned->signed conversion */ | ||
2423 | dataptr[0] = (DCTELEM) | ||
2424 | ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS); | ||
2425 | dataptr[4] = (DCTELEM) | ||
2426 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ | ||
2427 | MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ | ||
2428 | CONST_BITS-PASS1_BITS); | ||
2429 | |||
2430 | tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ | ||
2431 | MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ | ||
2432 | |||
2433 | dataptr[2] = (DCTELEM) | ||
2434 | DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ | ||
2435 | + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ | ||
2436 | CONST_BITS-PASS1_BITS); | ||
2437 | dataptr[6] = (DCTELEM) | ||
2438 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ | ||
2439 | - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ | ||
2440 | CONST_BITS-PASS1_BITS); | ||
2441 | |||
2442 | /* Odd part */ | ||
2443 | |||
2444 | tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ | ||
2445 | MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ | ||
2446 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ | ||
2447 | MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ | ||
2448 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ | ||
2449 | MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ | ||
2450 | tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ | ||
2451 | MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ | ||
2452 | tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ | ||
2453 | MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ | ||
2454 | tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ | ||
2455 | MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ | ||
2456 | tmp10 = tmp11 + tmp12 + tmp13 - | ||
2457 | MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ | ||
2458 | MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ | ||
2459 | tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ | ||
2460 | - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ | ||
2461 | tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ | ||
2462 | + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ | ||
2463 | tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ | ||
2464 | + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ | ||
2465 | |||
2466 | dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | ||
2467 | dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | ||
2468 | dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | ||
2469 | dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | ||
2470 | |||
2471 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
2472 | } | ||
2473 | |||
2474 | /* Pass 2: process columns. | ||
2475 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
2476 | * by an overall factor of 8. | ||
2477 | * We must also scale the output by 8/16 = 1/2. | ||
2478 | */ | ||
2479 | |||
2480 | dataptr = data; | ||
2481 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
2482 | /* Even part per LL&M figure 1 --- note that published figure is faulty; | ||
2483 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | ||
2484 | */ | ||
2485 | |||
2486 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; | ||
2487 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; | ||
2488 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; | ||
2489 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; | ||
2490 | |||
2491 | tmp10 = tmp0 + tmp3; | ||
2492 | tmp12 = tmp0 - tmp3; | ||
2493 | tmp11 = tmp1 + tmp2; | ||
2494 | tmp13 = tmp1 - tmp2; | ||
2495 | |||
2496 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; | ||
2497 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; | ||
2498 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; | ||
2499 | tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; | ||
2500 | |||
2501 | dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1); | ||
2502 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1); | ||
2503 | |||
2504 | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||
2505 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), | ||
2506 | CONST_BITS+PASS1_BITS+1); | ||
2507 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), | ||
2508 | CONST_BITS+PASS1_BITS+1); | ||
2509 | |||
2510 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||
2511 | * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | ||
2512 | * i0..i3 in the paper are tmp0..tmp3 here. | ||
2513 | */ | ||
2514 | |||
2515 | tmp10 = tmp0 + tmp3; | ||
2516 | tmp11 = tmp1 + tmp2; | ||
2517 | tmp12 = tmp0 + tmp2; | ||
2518 | tmp13 = tmp1 + tmp3; | ||
2519 | z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ | ||
2520 | |||
2521 | tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ | ||
2522 | tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ | ||
2523 | tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ | ||
2524 | tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ | ||
2525 | tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ | ||
2526 | tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ | ||
2527 | tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ | ||
2528 | tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ | ||
2529 | |||
2530 | tmp12 += z1; | ||
2531 | tmp13 += z1; | ||
2532 | |||
2533 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, | ||
2534 | CONST_BITS+PASS1_BITS+1); | ||
2535 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, | ||
2536 | CONST_BITS+PASS1_BITS+1); | ||
2537 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, | ||
2538 | CONST_BITS+PASS1_BITS+1); | ||
2539 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, | ||
2540 | CONST_BITS+PASS1_BITS+1); | ||
2541 | |||
2542 | dataptr++; /* advance pointer to next column */ | ||
2543 | } | ||
2544 | } | ||
2545 | |||
2546 | |||
2547 | /* | ||
2548 | * Perform the forward DCT on a 14x7 sample block. | ||
2549 | * | ||
2550 | * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns). | ||
2551 | */ | ||
2552 | |||
2553 | GLOBAL(void) | ||
2554 | jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
2555 | { | ||
2556 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | ||
2557 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | ||
2558 | INT32 z1, z2, z3; | ||
2559 | DCTELEM *dataptr; | ||
2560 | JSAMPROW elemptr; | ||
2561 | int ctr; | ||
2562 | SHIFT_TEMPS | ||
2563 | |||
2564 | /* Zero bottom row of output coefficient block. */ | ||
2565 | MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE); | ||
2566 | |||
2567 | /* Pass 1: process rows. */ | ||
2568 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
2569 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
2570 | /* 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28). */ | ||
2571 | |||
2572 | dataptr = data; | ||
2573 | for (ctr = 0; ctr < 7; ctr++) { | ||
2574 | elemptr = sample_data[ctr] + start_col; | ||
2575 | |||
2576 | /* Even part */ | ||
2577 | |||
2578 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]); | ||
2579 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]); | ||
2580 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]); | ||
2581 | tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]); | ||
2582 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]); | ||
2583 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]); | ||
2584 | tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]); | ||
2585 | |||
2586 | tmp10 = tmp0 + tmp6; | ||
2587 | tmp14 = tmp0 - tmp6; | ||
2588 | tmp11 = tmp1 + tmp5; | ||
2589 | tmp15 = tmp1 - tmp5; | ||
2590 | tmp12 = tmp2 + tmp4; | ||
2591 | tmp16 = tmp2 - tmp4; | ||
2592 | |||
2593 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]); | ||
2594 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]); | ||
2595 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]); | ||
2596 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]); | ||
2597 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]); | ||
2598 | tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]); | ||
2599 | tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]); | ||
2600 | |||
2601 | /* Apply unsigned->signed conversion */ | ||
2602 | dataptr[0] = (DCTELEM) | ||
2603 | ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS); | ||
2604 | tmp13 += tmp13; | ||
2605 | dataptr[4] = (DCTELEM) | ||
2606 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */ | ||
2607 | MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */ | ||
2608 | MULTIPLY(tmp12 - tmp13, FIX(0.881747734)), /* c8 */ | ||
2609 | CONST_BITS-PASS1_BITS); | ||
2610 | |||
2611 | tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686)); /* c6 */ | ||
2612 | |||
2613 | dataptr[2] = (DCTELEM) | ||
2614 | DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590)) /* c2-c6 */ | ||
2615 | + MULTIPLY(tmp16, FIX(0.613604268)), /* c10 */ | ||
2616 | CONST_BITS-PASS1_BITS); | ||
2617 | dataptr[6] = (DCTELEM) | ||
2618 | DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954)) /* c6+c10 */ | ||
2619 | - MULTIPLY(tmp16, FIX(1.378756276)), /* c2 */ | ||
2620 | CONST_BITS-PASS1_BITS); | ||
2621 | |||
2622 | /* Odd part */ | ||
2623 | |||
2624 | tmp10 = tmp1 + tmp2; | ||
2625 | tmp11 = tmp5 - tmp4; | ||
2626 | dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS); | ||
2627 | tmp3 <<= CONST_BITS; | ||
2628 | tmp10 = MULTIPLY(tmp10, - FIX(0.158341681)); /* -c13 */ | ||
2629 | tmp11 = MULTIPLY(tmp11, FIX(1.405321284)); /* c1 */ | ||
2630 | tmp10 += tmp11 - tmp3; | ||
2631 | tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) + /* c5 */ | ||
2632 | MULTIPLY(tmp4 + tmp6, FIX(0.752406978)); /* c9 */ | ||
2633 | dataptr[5] = (DCTELEM) | ||
2634 | DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */ | ||
2635 | + MULTIPLY(tmp4, FIX(1.119999435)), /* c1+c11-c9 */ | ||
2636 | CONST_BITS-PASS1_BITS); | ||
2637 | tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) + /* c3 */ | ||
2638 | MULTIPLY(tmp5 - tmp6, FIX(0.467085129)); /* c11 */ | ||
2639 | dataptr[3] = (DCTELEM) | ||
2640 | DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */ | ||
2641 | - MULTIPLY(tmp5, FIX(3.069855259)), /* c1+c5+c11 */ | ||
2642 | CONST_BITS-PASS1_BITS); | ||
2643 | dataptr[1] = (DCTELEM) | ||
2644 | DESCALE(tmp11 + tmp12 + tmp3 + tmp6 - | ||
2645 | MULTIPLY(tmp0 + tmp6, FIX(1.126980169)), /* c3+c5-c1 */ | ||
2646 | CONST_BITS-PASS1_BITS); | ||
2647 | |||
2648 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
2649 | } | ||
2650 | |||
2651 | /* Pass 2: process columns. | ||
2652 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
2653 | * by an overall factor of 8. | ||
2654 | * We must also scale the output by (8/14)*(8/7) = 32/49, which we | ||
2655 | * partially fold into the constant multipliers and final shifting: | ||
2656 | * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49. | ||
2657 | */ | ||
2658 | |||
2659 | dataptr = data; | ||
2660 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
2661 | /* Even part */ | ||
2662 | |||
2663 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6]; | ||
2664 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5]; | ||
2665 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4]; | ||
2666 | tmp3 = dataptr[DCTSIZE*3]; | ||
2667 | |||
2668 | tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6]; | ||
2669 | tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5]; | ||
2670 | tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4]; | ||
2671 | |||
2672 | z1 = tmp0 + tmp2; | ||
2673 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
2674 | DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */ | ||
2675 | CONST_BITS+PASS1_BITS+1); | ||
2676 | tmp3 += tmp3; | ||
2677 | z1 -= tmp3; | ||
2678 | z1 -= tmp3; | ||
2679 | z1 = MULTIPLY(z1, FIX(0.461784020)); /* (c2+c6-c4)/2 */ | ||
2680 | z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084)); /* (c2+c4-c6)/2 */ | ||
2681 | z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446)); /* c6 */ | ||
2682 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1); | ||
2683 | z1 -= z2; | ||
2684 | z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509)); /* c4 */ | ||
2685 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
2686 | DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */ | ||
2687 | CONST_BITS+PASS1_BITS+1); | ||
2688 | dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1); | ||
2689 | |||
2690 | /* Odd part */ | ||
2691 | |||
2692 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677)); /* (c3+c1-c5)/2 */ | ||
2693 | tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464)); /* (c3+c5-c1)/2 */ | ||
2694 | tmp0 = tmp1 - tmp2; | ||
2695 | tmp1 += tmp2; | ||
2696 | tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */ | ||
2697 | tmp1 += tmp2; | ||
2698 | tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310)); /* c5 */ | ||
2699 | tmp0 += tmp3; | ||
2700 | tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355)); /* c3+c1-c5 */ | ||
2701 | |||
2702 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1); | ||
2703 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1); | ||
2704 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1); | ||
2705 | |||
2706 | dataptr++; /* advance pointer to next column */ | ||
2707 | } | ||
2708 | } | ||
2709 | |||
2710 | |||
2711 | /* | ||
2712 | * Perform the forward DCT on a 12x6 sample block. | ||
2713 | * | ||
2714 | * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). | ||
2715 | */ | ||
2716 | |||
2717 | GLOBAL(void) | ||
2718 | jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
2719 | { | ||
2720 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; | ||
2721 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | ||
2722 | DCTELEM *dataptr; | ||
2723 | JSAMPROW elemptr; | ||
2724 | int ctr; | ||
2725 | SHIFT_TEMPS | ||
2726 | |||
2727 | /* Zero 2 bottom rows of output coefficient block. */ | ||
2728 | MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2); | ||
2729 | |||
2730 | /* Pass 1: process rows. */ | ||
2731 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
2732 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
2733 | /* 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24). */ | ||
2734 | |||
2735 | dataptr = data; | ||
2736 | for (ctr = 0; ctr < 6; ctr++) { | ||
2737 | elemptr = sample_data[ctr] + start_col; | ||
2738 | |||
2739 | /* Even part */ | ||
2740 | |||
2741 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]); | ||
2742 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]); | ||
2743 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]); | ||
2744 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]); | ||
2745 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]); | ||
2746 | tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]); | ||
2747 | |||
2748 | tmp10 = tmp0 + tmp5; | ||
2749 | tmp13 = tmp0 - tmp5; | ||
2750 | tmp11 = tmp1 + tmp4; | ||
2751 | tmp14 = tmp1 - tmp4; | ||
2752 | tmp12 = tmp2 + tmp3; | ||
2753 | tmp15 = tmp2 - tmp3; | ||
2754 | |||
2755 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]); | ||
2756 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]); | ||
2757 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]); | ||
2758 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]); | ||
2759 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]); | ||
2760 | tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]); | ||
2761 | |||
2762 | /* Apply unsigned->signed conversion */ | ||
2763 | dataptr[0] = (DCTELEM) | ||
2764 | ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS); | ||
2765 | dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS); | ||
2766 | dataptr[4] = (DCTELEM) | ||
2767 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */ | ||
2768 | CONST_BITS-PASS1_BITS); | ||
2769 | dataptr[2] = (DCTELEM) | ||
2770 | DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */ | ||
2771 | CONST_BITS-PASS1_BITS); | ||
2772 | |||
2773 | /* Odd part */ | ||
2774 | |||
2775 | tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100); /* c9 */ | ||
2776 | tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865); /* c3-c9 */ | ||
2777 | tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065); /* c3+c9 */ | ||
2778 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054)); /* c5 */ | ||
2779 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669)); /* c7 */ | ||
2780 | tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */ | ||
2781 | + MULTIPLY(tmp5, FIX(0.184591911)); /* c11 */ | ||
2782 | tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */ | ||
2783 | tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */ | ||
2784 | + MULTIPLY(tmp5, FIX(0.860918669)); /* c7 */ | ||
2785 | tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */ | ||
2786 | - MULTIPLY(tmp5, FIX(1.121971054)); /* c5 */ | ||
2787 | tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */ | ||
2788 | - MULTIPLY(tmp2 + tmp5, FIX_0_541196100); /* c9 */ | ||
2789 | |||
2790 | dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS); | ||
2791 | dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS); | ||
2792 | dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS); | ||
2793 | dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS); | ||
2794 | |||
2795 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
2796 | } | ||
2797 | |||
2798 | /* Pass 2: process columns. | ||
2799 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
2800 | * by an overall factor of 8. | ||
2801 | * We must also scale the output by (8/12)*(8/6) = 8/9, which we | ||
2802 | * partially fold into the constant multipliers and final shifting: | ||
2803 | * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. | ||
2804 | */ | ||
2805 | |||
2806 | dataptr = data; | ||
2807 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
2808 | /* Even part */ | ||
2809 | |||
2810 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; | ||
2811 | tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; | ||
2812 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; | ||
2813 | |||
2814 | tmp10 = tmp0 + tmp2; | ||
2815 | tmp12 = tmp0 - tmp2; | ||
2816 | |||
2817 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; | ||
2818 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; | ||
2819 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; | ||
2820 | |||
2821 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
2822 | DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ | ||
2823 | CONST_BITS+PASS1_BITS+1); | ||
2824 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
2825 | DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ | ||
2826 | CONST_BITS+PASS1_BITS+1); | ||
2827 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
2828 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ | ||
2829 | CONST_BITS+PASS1_BITS+1); | ||
2830 | |||
2831 | /* Odd part */ | ||
2832 | |||
2833 | tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ | ||
2834 | |||
2835 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
2836 | DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ | ||
2837 | CONST_BITS+PASS1_BITS+1); | ||
2838 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
2839 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ | ||
2840 | CONST_BITS+PASS1_BITS+1); | ||
2841 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
2842 | DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ | ||
2843 | CONST_BITS+PASS1_BITS+1); | ||
2844 | |||
2845 | dataptr++; /* advance pointer to next column */ | ||
2846 | } | ||
2847 | } | ||
2848 | |||
2849 | |||
2850 | /* | ||
2851 | * Perform the forward DCT on a 10x5 sample block. | ||
2852 | * | ||
2853 | * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns). | ||
2854 | */ | ||
2855 | |||
2856 | GLOBAL(void) | ||
2857 | jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
2858 | { | ||
2859 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4; | ||
2860 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | ||
2861 | DCTELEM *dataptr; | ||
2862 | JSAMPROW elemptr; | ||
2863 | int ctr; | ||
2864 | SHIFT_TEMPS | ||
2865 | |||
2866 | /* Zero 3 bottom rows of output coefficient block. */ | ||
2867 | MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3); | ||
2868 | |||
2869 | /* Pass 1: process rows. */ | ||
2870 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
2871 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
2872 | /* 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20). */ | ||
2873 | |||
2874 | dataptr = data; | ||
2875 | for (ctr = 0; ctr < 5; ctr++) { | ||
2876 | elemptr = sample_data[ctr] + start_col; | ||
2877 | |||
2878 | /* Even part */ | ||
2879 | |||
2880 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]); | ||
2881 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]); | ||
2882 | tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]); | ||
2883 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]); | ||
2884 | tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]); | ||
2885 | |||
2886 | tmp10 = tmp0 + tmp4; | ||
2887 | tmp13 = tmp0 - tmp4; | ||
2888 | tmp11 = tmp1 + tmp3; | ||
2889 | tmp14 = tmp1 - tmp3; | ||
2890 | |||
2891 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]); | ||
2892 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]); | ||
2893 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]); | ||
2894 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]); | ||
2895 | tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]); | ||
2896 | |||
2897 | /* Apply unsigned->signed conversion */ | ||
2898 | dataptr[0] = (DCTELEM) | ||
2899 | ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS); | ||
2900 | tmp12 += tmp12; | ||
2901 | dataptr[4] = (DCTELEM) | ||
2902 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */ | ||
2903 | MULTIPLY(tmp11 - tmp12, FIX(0.437016024)), /* c8 */ | ||
2904 | CONST_BITS-PASS1_BITS); | ||
2905 | tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876)); /* c6 */ | ||
2906 | dataptr[2] = (DCTELEM) | ||
2907 | DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)), /* c2-c6 */ | ||
2908 | CONST_BITS-PASS1_BITS); | ||
2909 | dataptr[6] = (DCTELEM) | ||
2910 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)), /* c2+c6 */ | ||
2911 | CONST_BITS-PASS1_BITS); | ||
2912 | |||
2913 | /* Odd part */ | ||
2914 | |||
2915 | tmp10 = tmp0 + tmp4; | ||
2916 | tmp11 = tmp1 - tmp3; | ||
2917 | dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS); | ||
2918 | tmp2 <<= CONST_BITS; | ||
2919 | dataptr[1] = (DCTELEM) | ||
2920 | DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) + /* c1 */ | ||
2921 | MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 + /* c3 */ | ||
2922 | MULTIPLY(tmp3, FIX(0.642039522)) + /* c7 */ | ||
2923 | MULTIPLY(tmp4, FIX(0.221231742)), /* c9 */ | ||
2924 | CONST_BITS-PASS1_BITS); | ||
2925 | tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) - /* (c3+c7)/2 */ | ||
2926 | MULTIPLY(tmp1 + tmp3, FIX(0.587785252)); /* (c1-c9)/2 */ | ||
2927 | tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) + /* (c3-c7)/2 */ | ||
2928 | (tmp11 << (CONST_BITS - 1)) - tmp2; | ||
2929 | dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS); | ||
2930 | dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS); | ||
2931 | |||
2932 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
2933 | } | ||
2934 | |||
2935 | /* Pass 2: process columns. | ||
2936 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
2937 | * by an overall factor of 8. | ||
2938 | * We must also scale the output by (8/10)*(8/5) = 32/25, which we | ||
2939 | * fold into the constant multipliers: | ||
2940 | * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25. | ||
2941 | */ | ||
2942 | |||
2943 | dataptr = data; | ||
2944 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
2945 | /* Even part */ | ||
2946 | |||
2947 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4]; | ||
2948 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3]; | ||
2949 | tmp2 = dataptr[DCTSIZE*2]; | ||
2950 | |||
2951 | tmp10 = tmp0 + tmp1; | ||
2952 | tmp11 = tmp0 - tmp1; | ||
2953 | |||
2954 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4]; | ||
2955 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3]; | ||
2956 | |||
2957 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
2958 | DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)), /* 32/25 */ | ||
2959 | CONST_BITS+PASS1_BITS); | ||
2960 | tmp11 = MULTIPLY(tmp11, FIX(1.011928851)); /* (c2+c4)/2 */ | ||
2961 | tmp10 -= tmp2 << 2; | ||
2962 | tmp10 = MULTIPLY(tmp10, FIX(0.452548340)); /* (c2-c4)/2 */ | ||
2963 | dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS); | ||
2964 | dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS); | ||
2965 | |||
2966 | /* Odd part */ | ||
2967 | |||
2968 | tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961)); /* c3 */ | ||
2969 | |||
2970 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
2971 | DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */ | ||
2972 | CONST_BITS+PASS1_BITS); | ||
2973 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
2974 | DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */ | ||
2975 | CONST_BITS+PASS1_BITS); | ||
2976 | |||
2977 | dataptr++; /* advance pointer to next column */ | ||
2978 | } | ||
2979 | } | ||
2980 | |||
2981 | |||
2982 | /* | ||
2983 | * Perform the forward DCT on an 8x4 sample block. | ||
2984 | * | ||
2985 | * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). | ||
2986 | */ | ||
2987 | |||
2988 | GLOBAL(void) | ||
2989 | jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
2990 | { | ||
2991 | INT32 tmp0, tmp1, tmp2, tmp3; | ||
2992 | INT32 tmp10, tmp11, tmp12, tmp13; | ||
2993 | INT32 z1; | ||
2994 | DCTELEM *dataptr; | ||
2995 | JSAMPROW elemptr; | ||
2996 | int ctr; | ||
2997 | SHIFT_TEMPS | ||
2998 | |||
2999 | /* Zero 4 bottom rows of output coefficient block. */ | ||
3000 | MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4); | ||
3001 | |||
3002 | /* Pass 1: process rows. */ | ||
3003 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3004 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3005 | /* We must also scale the output by 8/4 = 2, which we add here. */ | ||
3006 | |||
3007 | dataptr = data; | ||
3008 | for (ctr = 0; ctr < 4; ctr++) { | ||
3009 | elemptr = sample_data[ctr] + start_col; | ||
3010 | |||
3011 | /* Even part per LL&M figure 1 --- note that published figure is faulty; | ||
3012 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | ||
3013 | */ | ||
3014 | |||
3015 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); | ||
3016 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); | ||
3017 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); | ||
3018 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); | ||
3019 | |||
3020 | tmp10 = tmp0 + tmp3; | ||
3021 | tmp12 = tmp0 - tmp3; | ||
3022 | tmp11 = tmp1 + tmp2; | ||
3023 | tmp13 = tmp1 - tmp2; | ||
3024 | |||
3025 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); | ||
3026 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); | ||
3027 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); | ||
3028 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); | ||
3029 | |||
3030 | /* Apply unsigned->signed conversion */ | ||
3031 | dataptr[0] = (DCTELEM) | ||
3032 | ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1)); | ||
3033 | dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1)); | ||
3034 | |||
3035 | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||
3036 | /* Add fudge factor here for final descale. */ | ||
3037 | z1 += ONE << (CONST_BITS-PASS1_BITS-2); | ||
3038 | dataptr[2] = (DCTELEM) RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), | ||
3039 | CONST_BITS-PASS1_BITS-1); | ||
3040 | dataptr[6] = (DCTELEM) RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), | ||
3041 | CONST_BITS-PASS1_BITS-1); | ||
3042 | |||
3043 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||
3044 | * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | ||
3045 | * i0..i3 in the paper are tmp0..tmp3 here. | ||
3046 | */ | ||
3047 | |||
3048 | tmp10 = tmp0 + tmp3; | ||
3049 | tmp11 = tmp1 + tmp2; | ||
3050 | tmp12 = tmp0 + tmp2; | ||
3051 | tmp13 = tmp1 + tmp3; | ||
3052 | z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ | ||
3053 | /* Add fudge factor here for final descale. */ | ||
3054 | z1 += ONE << (CONST_BITS-PASS1_BITS-2); | ||
3055 | |||
3056 | tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ | ||
3057 | tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ | ||
3058 | tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ | ||
3059 | tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ | ||
3060 | tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ | ||
3061 | tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ | ||
3062 | tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ | ||
3063 | tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ | ||
3064 | |||
3065 | tmp12 += z1; | ||
3066 | tmp13 += z1; | ||
3067 | |||
3068 | dataptr[1] = (DCTELEM) | ||
3069 | RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS-1); | ||
3070 | dataptr[3] = (DCTELEM) | ||
3071 | RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS-1); | ||
3072 | dataptr[5] = (DCTELEM) | ||
3073 | RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS-1); | ||
3074 | dataptr[7] = (DCTELEM) | ||
3075 | RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS-1); | ||
3076 | |||
3077 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3078 | } | ||
3079 | |||
3080 | /* Pass 2: process columns. | ||
3081 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3082 | * by an overall factor of 8. | ||
3083 | * 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | ||
3084 | */ | ||
3085 | |||
3086 | dataptr = data; | ||
3087 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
3088 | /* Even part */ | ||
3089 | |||
3090 | /* Add fudge factor here for final descale. */ | ||
3091 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1)); | ||
3092 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; | ||
3093 | |||
3094 | tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; | ||
3095 | tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; | ||
3096 | |||
3097 | dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); | ||
3098 | dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); | ||
3099 | |||
3100 | /* Odd part */ | ||
3101 | |||
3102 | tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ | ||
3103 | /* Add fudge factor here for final descale. */ | ||
3104 | tmp0 += ONE << (CONST_BITS+PASS1_BITS-1); | ||
3105 | |||
3106 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
3107 | RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ | ||
3108 | CONST_BITS+PASS1_BITS); | ||
3109 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
3110 | RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ | ||
3111 | CONST_BITS+PASS1_BITS); | ||
3112 | |||
3113 | dataptr++; /* advance pointer to next column */ | ||
3114 | } | ||
3115 | } | ||
3116 | |||
3117 | |||
3118 | /* | ||
3119 | * Perform the forward DCT on a 6x3 sample block. | ||
3120 | * | ||
3121 | * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns). | ||
3122 | */ | ||
3123 | |||
3124 | GLOBAL(void) | ||
3125 | jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3126 | { | ||
3127 | INT32 tmp0, tmp1, tmp2; | ||
3128 | INT32 tmp10, tmp11, tmp12; | ||
3129 | DCTELEM *dataptr; | ||
3130 | JSAMPROW elemptr; | ||
3131 | int ctr; | ||
3132 | SHIFT_TEMPS | ||
3133 | |||
3134 | /* Pre-zero output coefficient block. */ | ||
3135 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
3136 | |||
3137 | /* Pass 1: process rows. */ | ||
3138 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3139 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3140 | /* We scale the results further by 2 as part of output adaption */ | ||
3141 | /* scaling for different DCT size. */ | ||
3142 | /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ | ||
3143 | |||
3144 | dataptr = data; | ||
3145 | for (ctr = 0; ctr < 3; ctr++) { | ||
3146 | elemptr = sample_data[ctr] + start_col; | ||
3147 | |||
3148 | /* Even part */ | ||
3149 | |||
3150 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); | ||
3151 | tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); | ||
3152 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); | ||
3153 | |||
3154 | tmp10 = tmp0 + tmp2; | ||
3155 | tmp12 = tmp0 - tmp2; | ||
3156 | |||
3157 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); | ||
3158 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); | ||
3159 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); | ||
3160 | |||
3161 | /* Apply unsigned->signed conversion */ | ||
3162 | dataptr[0] = (DCTELEM) | ||
3163 | ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1)); | ||
3164 | dataptr[2] = (DCTELEM) | ||
3165 | DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ | ||
3166 | CONST_BITS-PASS1_BITS-1); | ||
3167 | dataptr[4] = (DCTELEM) | ||
3168 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ | ||
3169 | CONST_BITS-PASS1_BITS-1); | ||
3170 | |||
3171 | /* Odd part */ | ||
3172 | |||
3173 | tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ | ||
3174 | CONST_BITS-PASS1_BITS-1); | ||
3175 | |||
3176 | dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1))); | ||
3177 | dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1)); | ||
3178 | dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1))); | ||
3179 | |||
3180 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3181 | } | ||
3182 | |||
3183 | /* Pass 2: process columns. | ||
3184 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3185 | * by an overall factor of 8. | ||
3186 | * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially | ||
3187 | * fold into the constant multipliers (other part was done in pass 1): | ||
3188 | * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9. | ||
3189 | */ | ||
3190 | |||
3191 | dataptr = data; | ||
3192 | for (ctr = 0; ctr < 6; ctr++) { | ||
3193 | /* Even part */ | ||
3194 | |||
3195 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2]; | ||
3196 | tmp1 = dataptr[DCTSIZE*1]; | ||
3197 | |||
3198 | tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2]; | ||
3199 | |||
3200 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
3201 | DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ | ||
3202 | CONST_BITS+PASS1_BITS); | ||
3203 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
3204 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */ | ||
3205 | CONST_BITS+PASS1_BITS); | ||
3206 | |||
3207 | /* Odd part */ | ||
3208 | |||
3209 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
3210 | DESCALE(MULTIPLY(tmp2, FIX(2.177324216)), /* c1 */ | ||
3211 | CONST_BITS+PASS1_BITS); | ||
3212 | |||
3213 | dataptr++; /* advance pointer to next column */ | ||
3214 | } | ||
3215 | } | ||
3216 | |||
3217 | |||
3218 | /* | ||
3219 | * Perform the forward DCT on a 4x2 sample block. | ||
3220 | * | ||
3221 | * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). | ||
3222 | */ | ||
3223 | |||
3224 | GLOBAL(void) | ||
3225 | jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3226 | { | ||
3227 | INT32 tmp0, tmp1; | ||
3228 | INT32 tmp10, tmp11; | ||
3229 | DCTELEM *dataptr; | ||
3230 | JSAMPROW elemptr; | ||
3231 | int ctr; | ||
3232 | SHIFT_TEMPS | ||
3233 | |||
3234 | /* Pre-zero output coefficient block. */ | ||
3235 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
3236 | |||
3237 | /* Pass 1: process rows. */ | ||
3238 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3239 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3240 | /* We must also scale the output by (8/4)*(8/2) = 2**3, which we add here. */ | ||
3241 | /* 4-point FDCT kernel, */ | ||
3242 | /* cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. */ | ||
3243 | |||
3244 | dataptr = data; | ||
3245 | for (ctr = 0; ctr < 2; ctr++) { | ||
3246 | elemptr = sample_data[ctr] + start_col; | ||
3247 | |||
3248 | /* Even part */ | ||
3249 | |||
3250 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); | ||
3251 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); | ||
3252 | |||
3253 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); | ||
3254 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); | ||
3255 | |||
3256 | /* Apply unsigned->signed conversion */ | ||
3257 | dataptr[0] = (DCTELEM) | ||
3258 | ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3)); | ||
3259 | dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3)); | ||
3260 | |||
3261 | /* Odd part */ | ||
3262 | |||
3263 | tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ | ||
3264 | /* Add fudge factor here for final descale. */ | ||
3265 | tmp0 += ONE << (CONST_BITS-PASS1_BITS-4); | ||
3266 | |||
3267 | dataptr[1] = (DCTELEM) | ||
3268 | RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ | ||
3269 | CONST_BITS-PASS1_BITS-3); | ||
3270 | dataptr[3] = (DCTELEM) | ||
3271 | RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ | ||
3272 | CONST_BITS-PASS1_BITS-3); | ||
3273 | |||
3274 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3275 | } | ||
3276 | |||
3277 | /* Pass 2: process columns. | ||
3278 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3279 | * by an overall factor of 8. | ||
3280 | */ | ||
3281 | |||
3282 | dataptr = data; | ||
3283 | for (ctr = 0; ctr < 4; ctr++) { | ||
3284 | /* Even part */ | ||
3285 | |||
3286 | /* Add fudge factor here for final descale. */ | ||
3287 | tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1)); | ||
3288 | tmp1 = dataptr[DCTSIZE*1]; | ||
3289 | |||
3290 | dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS); | ||
3291 | |||
3292 | /* Odd part */ | ||
3293 | |||
3294 | dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS); | ||
3295 | |||
3296 | dataptr++; /* advance pointer to next column */ | ||
3297 | } | ||
3298 | } | ||
3299 | |||
3300 | |||
3301 | /* | ||
3302 | * Perform the forward DCT on a 2x1 sample block. | ||
3303 | * | ||
3304 | * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns). | ||
3305 | */ | ||
3306 | |||
3307 | GLOBAL(void) | ||
3308 | jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3309 | { | ||
3310 | INT32 tmp0, tmp1; | ||
3311 | JSAMPROW elemptr; | ||
3312 | |||
3313 | /* Pre-zero output coefficient block. */ | ||
3314 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
3315 | |||
3316 | elemptr = sample_data[0] + start_col; | ||
3317 | |||
3318 | tmp0 = GETJSAMPLE(elemptr[0]); | ||
3319 | tmp1 = GETJSAMPLE(elemptr[1]); | ||
3320 | |||
3321 | /* We leave the results scaled up by an overall factor of 8. | ||
3322 | * We must also scale the output by (8/2)*(8/1) = 2**5. | ||
3323 | */ | ||
3324 | |||
3325 | /* Even part */ | ||
3326 | /* Apply unsigned->signed conversion */ | ||
3327 | data[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); | ||
3328 | |||
3329 | /* Odd part */ | ||
3330 | data[1] = (DCTELEM) ((tmp0 - tmp1) << 5); | ||
3331 | } | ||
3332 | |||
3333 | |||
3334 | /* | ||
3335 | * Perform the forward DCT on an 8x16 sample block. | ||
3336 | * | ||
3337 | * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns). | ||
3338 | */ | ||
3339 | |||
3340 | GLOBAL(void) | ||
3341 | jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3342 | { | ||
3343 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | ||
3344 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17; | ||
3345 | INT32 z1; | ||
3346 | DCTELEM workspace[DCTSIZE2]; | ||
3347 | DCTELEM *dataptr; | ||
3348 | DCTELEM *wsptr; | ||
3349 | JSAMPROW elemptr; | ||
3350 | int ctr; | ||
3351 | SHIFT_TEMPS | ||
3352 | |||
3353 | /* Pass 1: process rows. */ | ||
3354 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3355 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3356 | |||
3357 | dataptr = data; | ||
3358 | ctr = 0; | ||
3359 | for (;;) { | ||
3360 | elemptr = sample_data[ctr] + start_col; | ||
3361 | |||
3362 | /* Even part per LL&M figure 1 --- note that published figure is faulty; | ||
3363 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | ||
3364 | */ | ||
3365 | |||
3366 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]); | ||
3367 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]); | ||
3368 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]); | ||
3369 | tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]); | ||
3370 | |||
3371 | tmp10 = tmp0 + tmp3; | ||
3372 | tmp12 = tmp0 - tmp3; | ||
3373 | tmp11 = tmp1 + tmp2; | ||
3374 | tmp13 = tmp1 - tmp2; | ||
3375 | |||
3376 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]); | ||
3377 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]); | ||
3378 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]); | ||
3379 | tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]); | ||
3380 | |||
3381 | /* Apply unsigned->signed conversion */ | ||
3382 | dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS); | ||
3383 | dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS); | ||
3384 | |||
3385 | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||
3386 | dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), | ||
3387 | CONST_BITS-PASS1_BITS); | ||
3388 | dataptr[6] = (DCTELEM) DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), | ||
3389 | CONST_BITS-PASS1_BITS); | ||
3390 | |||
3391 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||
3392 | * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | ||
3393 | * i0..i3 in the paper are tmp0..tmp3 here. | ||
3394 | */ | ||
3395 | |||
3396 | tmp10 = tmp0 + tmp3; | ||
3397 | tmp11 = tmp1 + tmp2; | ||
3398 | tmp12 = tmp0 + tmp2; | ||
3399 | tmp13 = tmp1 + tmp3; | ||
3400 | z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ | ||
3401 | |||
3402 | tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ | ||
3403 | tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ | ||
3404 | tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ | ||
3405 | tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ | ||
3406 | tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ | ||
3407 | tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ | ||
3408 | tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ | ||
3409 | tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ | ||
3410 | |||
3411 | tmp12 += z1; | ||
3412 | tmp13 += z1; | ||
3413 | |||
3414 | dataptr[1] = (DCTELEM) DESCALE(tmp0 + tmp10 + tmp12, CONST_BITS-PASS1_BITS); | ||
3415 | dataptr[3] = (DCTELEM) DESCALE(tmp1 + tmp11 + tmp13, CONST_BITS-PASS1_BITS); | ||
3416 | dataptr[5] = (DCTELEM) DESCALE(tmp2 + tmp11 + tmp12, CONST_BITS-PASS1_BITS); | ||
3417 | dataptr[7] = (DCTELEM) DESCALE(tmp3 + tmp10 + tmp13, CONST_BITS-PASS1_BITS); | ||
3418 | |||
3419 | ctr++; | ||
3420 | |||
3421 | if (ctr != DCTSIZE) { | ||
3422 | if (ctr == DCTSIZE * 2) | ||
3423 | break; /* Done. */ | ||
3424 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3425 | } else | ||
3426 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
3427 | } | ||
3428 | |||
3429 | /* Pass 2: process columns. | ||
3430 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3431 | * by an overall factor of 8. | ||
3432 | * We must also scale the output by 8/16 = 1/2. | ||
3433 | * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32). | ||
3434 | */ | ||
3435 | |||
3436 | dataptr = data; | ||
3437 | wsptr = workspace; | ||
3438 | for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { | ||
3439 | /* Even part */ | ||
3440 | |||
3441 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7]; | ||
3442 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6]; | ||
3443 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5]; | ||
3444 | tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4]; | ||
3445 | tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3]; | ||
3446 | tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2]; | ||
3447 | tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1]; | ||
3448 | tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0]; | ||
3449 | |||
3450 | tmp10 = tmp0 + tmp7; | ||
3451 | tmp14 = tmp0 - tmp7; | ||
3452 | tmp11 = tmp1 + tmp6; | ||
3453 | tmp15 = tmp1 - tmp6; | ||
3454 | tmp12 = tmp2 + tmp5; | ||
3455 | tmp16 = tmp2 - tmp5; | ||
3456 | tmp13 = tmp3 + tmp4; | ||
3457 | tmp17 = tmp3 - tmp4; | ||
3458 | |||
3459 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7]; | ||
3460 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6]; | ||
3461 | tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5]; | ||
3462 | tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4]; | ||
3463 | tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3]; | ||
3464 | tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2]; | ||
3465 | tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1]; | ||
3466 | tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0]; | ||
3467 | |||
3468 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
3469 | DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1); | ||
3470 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
3471 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */ | ||
3472 | MULTIPLY(tmp11 - tmp12, FIX_0_541196100), /* c12[16] = c6[8] */ | ||
3473 | CONST_BITS+PASS1_BITS+1); | ||
3474 | |||
3475 | tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) + /* c14[16] = c7[8] */ | ||
3476 | MULTIPLY(tmp14 - tmp16, FIX(1.387039845)); /* c2[16] = c1[8] */ | ||
3477 | |||
3478 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
3479 | DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982)) /* c6+c14 */ | ||
3480 | + MULTIPLY(tmp16, FIX(2.172734804)), /* c2+c10 */ | ||
3481 | CONST_BITS+PASS1_BITS+1); | ||
3482 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
3483 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243)) /* c2-c6 */ | ||
3484 | - MULTIPLY(tmp17, FIX(1.061594338)), /* c10+c14 */ | ||
3485 | CONST_BITS+PASS1_BITS+1); | ||
3486 | |||
3487 | /* Odd part */ | ||
3488 | |||
3489 | tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) + /* c3 */ | ||
3490 | MULTIPLY(tmp6 - tmp7, FIX(0.410524528)); /* c13 */ | ||
3491 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) + /* c5 */ | ||
3492 | MULTIPLY(tmp5 + tmp7, FIX(0.666655658)); /* c11 */ | ||
3493 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) + /* c7 */ | ||
3494 | MULTIPLY(tmp4 - tmp7, FIX(0.897167586)); /* c9 */ | ||
3495 | tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) + /* c15 */ | ||
3496 | MULTIPLY(tmp6 - tmp5, FIX(1.407403738)); /* c1 */ | ||
3497 | tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) + /* -c11 */ | ||
3498 | MULTIPLY(tmp4 + tmp6, - FIX(1.247225013)); /* -c5 */ | ||
3499 | tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) + /* -c3 */ | ||
3500 | MULTIPLY(tmp5 - tmp4, FIX(0.410524528)); /* c13 */ | ||
3501 | tmp10 = tmp11 + tmp12 + tmp13 - | ||
3502 | MULTIPLY(tmp0, FIX(2.286341144)) + /* c7+c5+c3-c1 */ | ||
3503 | MULTIPLY(tmp7, FIX(0.779653625)); /* c15+c13-c11+c9 */ | ||
3504 | tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */ | ||
3505 | - MULTIPLY(tmp6, FIX(1.663905119)); /* c7+c13+c1-c5 */ | ||
3506 | tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */ | ||
3507 | + MULTIPLY(tmp5, FIX(1.227391138)); /* c9-c11+c1-c13 */ | ||
3508 | tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */ | ||
3509 | + MULTIPLY(tmp4, FIX(2.167985692)); /* c1+c13+c5-c9 */ | ||
3510 | |||
3511 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1); | ||
3512 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1); | ||
3513 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1); | ||
3514 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1); | ||
3515 | |||
3516 | dataptr++; /* advance pointer to next column */ | ||
3517 | wsptr++; /* advance pointer to next column */ | ||
3518 | } | ||
3519 | } | ||
3520 | |||
3521 | |||
3522 | /* | ||
3523 | * Perform the forward DCT on a 7x14 sample block. | ||
3524 | * | ||
3525 | * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns). | ||
3526 | */ | ||
3527 | |||
3528 | GLOBAL(void) | ||
3529 | jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3530 | { | ||
3531 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | ||
3532 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; | ||
3533 | INT32 z1, z2, z3; | ||
3534 | DCTELEM workspace[8*6]; | ||
3535 | DCTELEM *dataptr; | ||
3536 | DCTELEM *wsptr; | ||
3537 | JSAMPROW elemptr; | ||
3538 | int ctr; | ||
3539 | SHIFT_TEMPS | ||
3540 | |||
3541 | /* Pre-zero output coefficient block. */ | ||
3542 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
3543 | |||
3544 | /* Pass 1: process rows. */ | ||
3545 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3546 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3547 | /* 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14). */ | ||
3548 | |||
3549 | dataptr = data; | ||
3550 | ctr = 0; | ||
3551 | for (;;) { | ||
3552 | elemptr = sample_data[ctr] + start_col; | ||
3553 | |||
3554 | /* Even part */ | ||
3555 | |||
3556 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]); | ||
3557 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]); | ||
3558 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]); | ||
3559 | tmp3 = GETJSAMPLE(elemptr[3]); | ||
3560 | |||
3561 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]); | ||
3562 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]); | ||
3563 | tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]); | ||
3564 | |||
3565 | z1 = tmp0 + tmp2; | ||
3566 | /* Apply unsigned->signed conversion */ | ||
3567 | dataptr[0] = (DCTELEM) | ||
3568 | ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS); | ||
3569 | tmp3 += tmp3; | ||
3570 | z1 -= tmp3; | ||
3571 | z1 -= tmp3; | ||
3572 | z1 = MULTIPLY(z1, FIX(0.353553391)); /* (c2+c6-c4)/2 */ | ||
3573 | z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002)); /* (c2+c4-c6)/2 */ | ||
3574 | z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123)); /* c6 */ | ||
3575 | dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS); | ||
3576 | z1 -= z2; | ||
3577 | z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734)); /* c4 */ | ||
3578 | dataptr[4] = (DCTELEM) | ||
3579 | DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */ | ||
3580 | CONST_BITS-PASS1_BITS); | ||
3581 | dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS); | ||
3582 | |||
3583 | /* Odd part */ | ||
3584 | |||
3585 | tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347)); /* (c3+c1-c5)/2 */ | ||
3586 | tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339)); /* (c3+c5-c1)/2 */ | ||
3587 | tmp0 = tmp1 - tmp2; | ||
3588 | tmp1 += tmp2; | ||
3589 | tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */ | ||
3590 | tmp1 += tmp2; | ||
3591 | tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268)); /* c5 */ | ||
3592 | tmp0 += tmp3; | ||
3593 | tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693)); /* c3+c1-c5 */ | ||
3594 | |||
3595 | dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS); | ||
3596 | dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS); | ||
3597 | dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS); | ||
3598 | |||
3599 | ctr++; | ||
3600 | |||
3601 | if (ctr != DCTSIZE) { | ||
3602 | if (ctr == 14) | ||
3603 | break; /* Done. */ | ||
3604 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3605 | } else | ||
3606 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
3607 | } | ||
3608 | |||
3609 | /* Pass 2: process columns. | ||
3610 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3611 | * by an overall factor of 8. | ||
3612 | * We must also scale the output by (8/7)*(8/14) = 32/49, which we | ||
3613 | * fold into the constant multipliers: | ||
3614 | * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49. | ||
3615 | */ | ||
3616 | |||
3617 | dataptr = data; | ||
3618 | wsptr = workspace; | ||
3619 | for (ctr = 0; ctr < 7; ctr++) { | ||
3620 | /* Even part */ | ||
3621 | |||
3622 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5]; | ||
3623 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4]; | ||
3624 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3]; | ||
3625 | tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2]; | ||
3626 | tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1]; | ||
3627 | tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0]; | ||
3628 | tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; | ||
3629 | |||
3630 | tmp10 = tmp0 + tmp6; | ||
3631 | tmp14 = tmp0 - tmp6; | ||
3632 | tmp11 = tmp1 + tmp5; | ||
3633 | tmp15 = tmp1 - tmp5; | ||
3634 | tmp12 = tmp2 + tmp4; | ||
3635 | tmp16 = tmp2 - tmp4; | ||
3636 | |||
3637 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5]; | ||
3638 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4]; | ||
3639 | tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3]; | ||
3640 | tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2]; | ||
3641 | tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1]; | ||
3642 | tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0]; | ||
3643 | tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; | ||
3644 | |||
3645 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
3646 | DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13, | ||
3647 | FIX(0.653061224)), /* 32/49 */ | ||
3648 | CONST_BITS+PASS1_BITS); | ||
3649 | tmp13 += tmp13; | ||
3650 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
3651 | DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */ | ||
3652 | MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */ | ||
3653 | MULTIPLY(tmp12 - tmp13, FIX(0.575835255)), /* c8 */ | ||
3654 | CONST_BITS+PASS1_BITS); | ||
3655 | |||
3656 | tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570)); /* c6 */ | ||
3657 | |||
3658 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
3659 | DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691)) /* c2-c6 */ | ||
3660 | + MULTIPLY(tmp16, FIX(0.400721155)), /* c10 */ | ||
3661 | CONST_BITS+PASS1_BITS); | ||
3662 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
3663 | DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725)) /* c6+c10 */ | ||
3664 | - MULTIPLY(tmp16, FIX(0.900412262)), /* c2 */ | ||
3665 | CONST_BITS+PASS1_BITS); | ||
3666 | |||
3667 | /* Odd part */ | ||
3668 | |||
3669 | tmp10 = tmp1 + tmp2; | ||
3670 | tmp11 = tmp5 - tmp4; | ||
3671 | dataptr[DCTSIZE*7] = (DCTELEM) | ||
3672 | DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6, | ||
3673 | FIX(0.653061224)), /* 32/49 */ | ||
3674 | CONST_BITS+PASS1_BITS); | ||
3675 | tmp3 = MULTIPLY(tmp3 , FIX(0.653061224)); /* 32/49 */ | ||
3676 | tmp10 = MULTIPLY(tmp10, - FIX(0.103406812)); /* -c13 */ | ||
3677 | tmp11 = MULTIPLY(tmp11, FIX(0.917760839)); /* c1 */ | ||
3678 | tmp10 += tmp11 - tmp3; | ||
3679 | tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) + /* c5 */ | ||
3680 | MULTIPLY(tmp4 + tmp6, FIX(0.491367823)); /* c9 */ | ||
3681 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
3682 | DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */ | ||
3683 | + MULTIPLY(tmp4, FIX(0.731428202)), /* c1+c11-c9 */ | ||
3684 | CONST_BITS+PASS1_BITS); | ||
3685 | tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) + /* c3 */ | ||
3686 | MULTIPLY(tmp5 - tmp6, FIX(0.305035186)); /* c11 */ | ||
3687 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
3688 | DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */ | ||
3689 | - MULTIPLY(tmp5, FIX(2.004803435)), /* c1+c5+c11 */ | ||
3690 | CONST_BITS+PASS1_BITS); | ||
3691 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
3692 | DESCALE(tmp11 + tmp12 + tmp3 | ||
3693 | - MULTIPLY(tmp0, FIX(0.735987049)) /* c3+c5-c1 */ | ||
3694 | - MULTIPLY(tmp6, FIX(0.082925825)), /* c9-c11-c13 */ | ||
3695 | CONST_BITS+PASS1_BITS); | ||
3696 | |||
3697 | dataptr++; /* advance pointer to next column */ | ||
3698 | wsptr++; /* advance pointer to next column */ | ||
3699 | } | ||
3700 | } | ||
3701 | |||
3702 | |||
3703 | /* | ||
3704 | * Perform the forward DCT on a 6x12 sample block. | ||
3705 | * | ||
3706 | * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns). | ||
3707 | */ | ||
3708 | |||
3709 | GLOBAL(void) | ||
3710 | jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3711 | { | ||
3712 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; | ||
3713 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; | ||
3714 | DCTELEM workspace[8*4]; | ||
3715 | DCTELEM *dataptr; | ||
3716 | DCTELEM *wsptr; | ||
3717 | JSAMPROW elemptr; | ||
3718 | int ctr; | ||
3719 | SHIFT_TEMPS | ||
3720 | |||
3721 | /* Pre-zero output coefficient block. */ | ||
3722 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
3723 | |||
3724 | /* Pass 1: process rows. */ | ||
3725 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3726 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3727 | /* 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12). */ | ||
3728 | |||
3729 | dataptr = data; | ||
3730 | ctr = 0; | ||
3731 | for (;;) { | ||
3732 | elemptr = sample_data[ctr] + start_col; | ||
3733 | |||
3734 | /* Even part */ | ||
3735 | |||
3736 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]); | ||
3737 | tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]); | ||
3738 | tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]); | ||
3739 | |||
3740 | tmp10 = tmp0 + tmp2; | ||
3741 | tmp12 = tmp0 - tmp2; | ||
3742 | |||
3743 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]); | ||
3744 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]); | ||
3745 | tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]); | ||
3746 | |||
3747 | /* Apply unsigned->signed conversion */ | ||
3748 | dataptr[0] = (DCTELEM) | ||
3749 | ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS); | ||
3750 | dataptr[2] = (DCTELEM) | ||
3751 | DESCALE(MULTIPLY(tmp12, FIX(1.224744871)), /* c2 */ | ||
3752 | CONST_BITS-PASS1_BITS); | ||
3753 | dataptr[4] = (DCTELEM) | ||
3754 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */ | ||
3755 | CONST_BITS-PASS1_BITS); | ||
3756 | |||
3757 | /* Odd part */ | ||
3758 | |||
3759 | tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)), /* c5 */ | ||
3760 | CONST_BITS-PASS1_BITS); | ||
3761 | |||
3762 | dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS)); | ||
3763 | dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS); | ||
3764 | dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS)); | ||
3765 | |||
3766 | ctr++; | ||
3767 | |||
3768 | if (ctr != DCTSIZE) { | ||
3769 | if (ctr == 12) | ||
3770 | break; /* Done. */ | ||
3771 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3772 | } else | ||
3773 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
3774 | } | ||
3775 | |||
3776 | /* Pass 2: process columns. | ||
3777 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3778 | * by an overall factor of 8. | ||
3779 | * We must also scale the output by (8/6)*(8/12) = 8/9, which we | ||
3780 | * fold into the constant multipliers: | ||
3781 | * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9. | ||
3782 | */ | ||
3783 | |||
3784 | dataptr = data; | ||
3785 | wsptr = workspace; | ||
3786 | for (ctr = 0; ctr < 6; ctr++) { | ||
3787 | /* Even part */ | ||
3788 | |||
3789 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3]; | ||
3790 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2]; | ||
3791 | tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1]; | ||
3792 | tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0]; | ||
3793 | tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7]; | ||
3794 | tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6]; | ||
3795 | |||
3796 | tmp10 = tmp0 + tmp5; | ||
3797 | tmp13 = tmp0 - tmp5; | ||
3798 | tmp11 = tmp1 + tmp4; | ||
3799 | tmp14 = tmp1 - tmp4; | ||
3800 | tmp12 = tmp2 + tmp3; | ||
3801 | tmp15 = tmp2 - tmp3; | ||
3802 | |||
3803 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3]; | ||
3804 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2]; | ||
3805 | tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1]; | ||
3806 | tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0]; | ||
3807 | tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7]; | ||
3808 | tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6]; | ||
3809 | |||
3810 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
3811 | DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */ | ||
3812 | CONST_BITS+PASS1_BITS); | ||
3813 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
3814 | DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */ | ||
3815 | CONST_BITS+PASS1_BITS); | ||
3816 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
3817 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)), /* c4 */ | ||
3818 | CONST_BITS+PASS1_BITS); | ||
3819 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
3820 | DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) + /* 8/9 */ | ||
3821 | MULTIPLY(tmp13 + tmp15, FIX(1.214244803)), /* c2 */ | ||
3822 | CONST_BITS+PASS1_BITS); | ||
3823 | |||
3824 | /* Odd part */ | ||
3825 | |||
3826 | tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200)); /* c9 */ | ||
3827 | tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102)); /* c3-c9 */ | ||
3828 | tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502)); /* c3+c9 */ | ||
3829 | tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603)); /* c5 */ | ||
3830 | tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039)); /* c7 */ | ||
3831 | tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */ | ||
3832 | + MULTIPLY(tmp5, FIX(0.164081699)); /* c11 */ | ||
3833 | tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */ | ||
3834 | tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */ | ||
3835 | + MULTIPLY(tmp5, FIX(0.765261039)); /* c7 */ | ||
3836 | tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */ | ||
3837 | - MULTIPLY(tmp5, FIX(0.997307603)); /* c5 */ | ||
3838 | tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */ | ||
3839 | - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */ | ||
3840 | |||
3841 | dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS); | ||
3842 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS); | ||
3843 | dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS); | ||
3844 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS); | ||
3845 | |||
3846 | dataptr++; /* advance pointer to next column */ | ||
3847 | wsptr++; /* advance pointer to next column */ | ||
3848 | } | ||
3849 | } | ||
3850 | |||
3851 | |||
3852 | /* | ||
3853 | * Perform the forward DCT on a 5x10 sample block. | ||
3854 | * | ||
3855 | * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns). | ||
3856 | */ | ||
3857 | |||
3858 | GLOBAL(void) | ||
3859 | jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
3860 | { | ||
3861 | INT32 tmp0, tmp1, tmp2, tmp3, tmp4; | ||
3862 | INT32 tmp10, tmp11, tmp12, tmp13, tmp14; | ||
3863 | DCTELEM workspace[8*2]; | ||
3864 | DCTELEM *dataptr; | ||
3865 | DCTELEM *wsptr; | ||
3866 | JSAMPROW elemptr; | ||
3867 | int ctr; | ||
3868 | SHIFT_TEMPS | ||
3869 | |||
3870 | /* Pre-zero output coefficient block. */ | ||
3871 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
3872 | |||
3873 | /* Pass 1: process rows. */ | ||
3874 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
3875 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
3876 | /* 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10). */ | ||
3877 | |||
3878 | dataptr = data; | ||
3879 | ctr = 0; | ||
3880 | for (;;) { | ||
3881 | elemptr = sample_data[ctr] + start_col; | ||
3882 | |||
3883 | /* Even part */ | ||
3884 | |||
3885 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]); | ||
3886 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]); | ||
3887 | tmp2 = GETJSAMPLE(elemptr[2]); | ||
3888 | |||
3889 | tmp10 = tmp0 + tmp1; | ||
3890 | tmp11 = tmp0 - tmp1; | ||
3891 | |||
3892 | tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]); | ||
3893 | tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]); | ||
3894 | |||
3895 | /* Apply unsigned->signed conversion */ | ||
3896 | dataptr[0] = (DCTELEM) | ||
3897 | ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS); | ||
3898 | tmp11 = MULTIPLY(tmp11, FIX(0.790569415)); /* (c2+c4)/2 */ | ||
3899 | tmp10 -= tmp2 << 2; | ||
3900 | tmp10 = MULTIPLY(tmp10, FIX(0.353553391)); /* (c2-c4)/2 */ | ||
3901 | dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS); | ||
3902 | dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS); | ||
3903 | |||
3904 | /* Odd part */ | ||
3905 | |||
3906 | tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876)); /* c3 */ | ||
3907 | |||
3908 | dataptr[1] = (DCTELEM) | ||
3909 | DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */ | ||
3910 | CONST_BITS-PASS1_BITS); | ||
3911 | dataptr[3] = (DCTELEM) | ||
3912 | DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */ | ||
3913 | CONST_BITS-PASS1_BITS); | ||
3914 | |||
3915 | ctr++; | ||
3916 | |||
3917 | if (ctr != DCTSIZE) { | ||
3918 | if (ctr == 10) | ||
3919 | break; /* Done. */ | ||
3920 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
3921 | } else | ||
3922 | dataptr = workspace; /* switch pointer to extended workspace */ | ||
3923 | } | ||
3924 | |||
3925 | /* Pass 2: process columns. | ||
3926 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
3927 | * by an overall factor of 8. | ||
3928 | * We must also scale the output by (8/5)*(8/10) = 32/25, which we | ||
3929 | * fold into the constant multipliers: | ||
3930 | * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25. | ||
3931 | */ | ||
3932 | |||
3933 | dataptr = data; | ||
3934 | wsptr = workspace; | ||
3935 | for (ctr = 0; ctr < 5; ctr++) { | ||
3936 | /* Even part */ | ||
3937 | |||
3938 | tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1]; | ||
3939 | tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0]; | ||
3940 | tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7]; | ||
3941 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6]; | ||
3942 | tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; | ||
3943 | |||
3944 | tmp10 = tmp0 + tmp4; | ||
3945 | tmp13 = tmp0 - tmp4; | ||
3946 | tmp11 = tmp1 + tmp3; | ||
3947 | tmp14 = tmp1 - tmp3; | ||
3948 | |||
3949 | tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1]; | ||
3950 | tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0]; | ||
3951 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7]; | ||
3952 | tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6]; | ||
3953 | tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; | ||
3954 | |||
3955 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
3956 | DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */ | ||
3957 | CONST_BITS+PASS1_BITS); | ||
3958 | tmp12 += tmp12; | ||
3959 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
3960 | DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */ | ||
3961 | MULTIPLY(tmp11 - tmp12, FIX(0.559380511)), /* c8 */ | ||
3962 | CONST_BITS+PASS1_BITS); | ||
3963 | tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961)); /* c6 */ | ||
3964 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
3965 | DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)), /* c2-c6 */ | ||
3966 | CONST_BITS+PASS1_BITS); | ||
3967 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
3968 | DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)), /* c2+c6 */ | ||
3969 | CONST_BITS+PASS1_BITS); | ||
3970 | |||
3971 | /* Odd part */ | ||
3972 | |||
3973 | tmp10 = tmp0 + tmp4; | ||
3974 | tmp11 = tmp1 - tmp3; | ||
3975 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
3976 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)), /* 32/25 */ | ||
3977 | CONST_BITS+PASS1_BITS); | ||
3978 | tmp2 = MULTIPLY(tmp2, FIX(1.28)); /* 32/25 */ | ||
3979 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
3980 | DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) + /* c1 */ | ||
3981 | MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 + /* c3 */ | ||
3982 | MULTIPLY(tmp3, FIX(0.821810588)) + /* c7 */ | ||
3983 | MULTIPLY(tmp4, FIX(0.283176630)), /* c9 */ | ||
3984 | CONST_BITS+PASS1_BITS); | ||
3985 | tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) - /* (c3+c7)/2 */ | ||
3986 | MULTIPLY(tmp1 + tmp3, FIX(0.752365123)); /* (c1-c9)/2 */ | ||
3987 | tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) + /* (c3-c7)/2 */ | ||
3988 | MULTIPLY(tmp11, FIX(0.64)) - tmp2; /* 16/25 */ | ||
3989 | dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS); | ||
3990 | dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS); | ||
3991 | |||
3992 | dataptr++; /* advance pointer to next column */ | ||
3993 | wsptr++; /* advance pointer to next column */ | ||
3994 | } | ||
3995 | } | ||
3996 | |||
3997 | |||
3998 | /* | ||
3999 | * Perform the forward DCT on a 4x8 sample block. | ||
4000 | * | ||
4001 | * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns). | ||
4002 | */ | ||
4003 | |||
4004 | GLOBAL(void) | ||
4005 | jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
4006 | { | ||
4007 | INT32 tmp0, tmp1, tmp2, tmp3; | ||
4008 | INT32 tmp10, tmp11, tmp12, tmp13; | ||
4009 | INT32 z1; | ||
4010 | DCTELEM *dataptr; | ||
4011 | JSAMPROW elemptr; | ||
4012 | int ctr; | ||
4013 | SHIFT_TEMPS | ||
4014 | |||
4015 | /* Pre-zero output coefficient block. */ | ||
4016 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
4017 | |||
4018 | /* Pass 1: process rows. */ | ||
4019 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
4020 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
4021 | /* We must also scale the output by 8/4 = 2, which we add here. */ | ||
4022 | /* 4-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). */ | ||
4023 | |||
4024 | dataptr = data; | ||
4025 | for (ctr = 0; ctr < DCTSIZE; ctr++) { | ||
4026 | elemptr = sample_data[ctr] + start_col; | ||
4027 | |||
4028 | /* Even part */ | ||
4029 | |||
4030 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]); | ||
4031 | tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]); | ||
4032 | |||
4033 | tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]); | ||
4034 | tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]); | ||
4035 | |||
4036 | /* Apply unsigned->signed conversion */ | ||
4037 | dataptr[0] = (DCTELEM) | ||
4038 | ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1)); | ||
4039 | dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1)); | ||
4040 | |||
4041 | /* Odd part */ | ||
4042 | |||
4043 | tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ | ||
4044 | /* Add fudge factor here for final descale. */ | ||
4045 | tmp0 += ONE << (CONST_BITS-PASS1_BITS-2); | ||
4046 | |||
4047 | dataptr[1] = (DCTELEM) | ||
4048 | RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ | ||
4049 | CONST_BITS-PASS1_BITS-1); | ||
4050 | dataptr[3] = (DCTELEM) | ||
4051 | RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ | ||
4052 | CONST_BITS-PASS1_BITS-1); | ||
4053 | |||
4054 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
4055 | } | ||
4056 | |||
4057 | /* Pass 2: process columns. | ||
4058 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
4059 | * by an overall factor of 8. | ||
4060 | */ | ||
4061 | |||
4062 | dataptr = data; | ||
4063 | for (ctr = 0; ctr < 4; ctr++) { | ||
4064 | /* Even part per LL&M figure 1 --- note that published figure is faulty; | ||
4065 | * rotator "sqrt(2)*c1" should be "sqrt(2)*c6". | ||
4066 | */ | ||
4067 | |||
4068 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7]; | ||
4069 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6]; | ||
4070 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5]; | ||
4071 | tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; | ||
4072 | |||
4073 | /* Add fudge factor here for final descale. */ | ||
4074 | tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1)); | ||
4075 | tmp12 = tmp0 - tmp3; | ||
4076 | tmp11 = tmp1 + tmp2; | ||
4077 | tmp13 = tmp1 - tmp2; | ||
4078 | |||
4079 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7]; | ||
4080 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6]; | ||
4081 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; | ||
4082 | tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; | ||
4083 | |||
4084 | dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS); | ||
4085 | dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS); | ||
4086 | |||
4087 | z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100); | ||
4088 | /* Add fudge factor here for final descale. */ | ||
4089 | z1 += ONE << (CONST_BITS+PASS1_BITS-1); | ||
4090 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
4091 | RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), CONST_BITS+PASS1_BITS); | ||
4092 | dataptr[DCTSIZE*6] = (DCTELEM) | ||
4093 | RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), CONST_BITS+PASS1_BITS); | ||
4094 | |||
4095 | /* Odd part per figure 8 --- note paper omits factor of sqrt(2). | ||
4096 | * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16). | ||
4097 | * i0..i3 in the paper are tmp0..tmp3 here. | ||
4098 | */ | ||
4099 | |||
4100 | tmp10 = tmp0 + tmp3; | ||
4101 | tmp11 = tmp1 + tmp2; | ||
4102 | tmp12 = tmp0 + tmp2; | ||
4103 | tmp13 = tmp1 + tmp3; | ||
4104 | z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602); /* c3 */ | ||
4105 | /* Add fudge factor here for final descale. */ | ||
4106 | z1 += ONE << (CONST_BITS+PASS1_BITS-1); | ||
4107 | |||
4108 | tmp0 = MULTIPLY(tmp0, FIX_1_501321110); /* c1+c3-c5-c7 */ | ||
4109 | tmp1 = MULTIPLY(tmp1, FIX_3_072711026); /* c1+c3+c5-c7 */ | ||
4110 | tmp2 = MULTIPLY(tmp2, FIX_2_053119869); /* c1+c3-c5+c7 */ | ||
4111 | tmp3 = MULTIPLY(tmp3, FIX_0_298631336); /* -c1+c3+c5-c7 */ | ||
4112 | tmp10 = MULTIPLY(tmp10, - FIX_0_899976223); /* c7-c3 */ | ||
4113 | tmp11 = MULTIPLY(tmp11, - FIX_2_562915447); /* -c1-c3 */ | ||
4114 | tmp12 = MULTIPLY(tmp12, - FIX_0_390180644); /* c5-c3 */ | ||
4115 | tmp13 = MULTIPLY(tmp13, - FIX_1_961570560); /* -c3-c5 */ | ||
4116 | |||
4117 | tmp12 += z1; | ||
4118 | tmp13 += z1; | ||
4119 | |||
4120 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
4121 | RIGHT_SHIFT(tmp0 + tmp10 + tmp12, CONST_BITS+PASS1_BITS); | ||
4122 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
4123 | RIGHT_SHIFT(tmp1 + tmp11 + tmp13, CONST_BITS+PASS1_BITS); | ||
4124 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
4125 | RIGHT_SHIFT(tmp2 + tmp11 + tmp12, CONST_BITS+PASS1_BITS); | ||
4126 | dataptr[DCTSIZE*7] = (DCTELEM) | ||
4127 | RIGHT_SHIFT(tmp3 + tmp10 + tmp13, CONST_BITS+PASS1_BITS); | ||
4128 | |||
4129 | dataptr++; /* advance pointer to next column */ | ||
4130 | } | ||
4131 | } | ||
4132 | |||
4133 | |||
4134 | /* | ||
4135 | * Perform the forward DCT on a 3x6 sample block. | ||
4136 | * | ||
4137 | * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns). | ||
4138 | */ | ||
4139 | |||
4140 | GLOBAL(void) | ||
4141 | jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
4142 | { | ||
4143 | INT32 tmp0, tmp1, tmp2; | ||
4144 | INT32 tmp10, tmp11, tmp12; | ||
4145 | DCTELEM *dataptr; | ||
4146 | JSAMPROW elemptr; | ||
4147 | int ctr; | ||
4148 | SHIFT_TEMPS | ||
4149 | |||
4150 | /* Pre-zero output coefficient block. */ | ||
4151 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
4152 | |||
4153 | /* Pass 1: process rows. */ | ||
4154 | /* Note results are scaled up by sqrt(8) compared to a true DCT; */ | ||
4155 | /* furthermore, we scale the results by 2**PASS1_BITS. */ | ||
4156 | /* We scale the results further by 2 as part of output adaption */ | ||
4157 | /* scaling for different DCT size. */ | ||
4158 | /* 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6). */ | ||
4159 | |||
4160 | dataptr = data; | ||
4161 | for (ctr = 0; ctr < 6; ctr++) { | ||
4162 | elemptr = sample_data[ctr] + start_col; | ||
4163 | |||
4164 | /* Even part */ | ||
4165 | |||
4166 | tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]); | ||
4167 | tmp1 = GETJSAMPLE(elemptr[1]); | ||
4168 | |||
4169 | tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]); | ||
4170 | |||
4171 | /* Apply unsigned->signed conversion */ | ||
4172 | dataptr[0] = (DCTELEM) | ||
4173 | ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1)); | ||
4174 | dataptr[2] = (DCTELEM) | ||
4175 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */ | ||
4176 | CONST_BITS-PASS1_BITS-1); | ||
4177 | |||
4178 | /* Odd part */ | ||
4179 | |||
4180 | dataptr[1] = (DCTELEM) | ||
4181 | DESCALE(MULTIPLY(tmp2, FIX(1.224744871)), /* c1 */ | ||
4182 | CONST_BITS-PASS1_BITS-1); | ||
4183 | |||
4184 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
4185 | } | ||
4186 | |||
4187 | /* Pass 2: process columns. | ||
4188 | * We remove the PASS1_BITS scaling, but leave the results scaled up | ||
4189 | * by an overall factor of 8. | ||
4190 | * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially | ||
4191 | * fold into the constant multipliers (other part was done in pass 1): | ||
4192 | * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9. | ||
4193 | */ | ||
4194 | |||
4195 | dataptr = data; | ||
4196 | for (ctr = 0; ctr < 3; ctr++) { | ||
4197 | /* Even part */ | ||
4198 | |||
4199 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5]; | ||
4200 | tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4]; | ||
4201 | tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; | ||
4202 | |||
4203 | tmp10 = tmp0 + tmp2; | ||
4204 | tmp12 = tmp0 - tmp2; | ||
4205 | |||
4206 | tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5]; | ||
4207 | tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4]; | ||
4208 | tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; | ||
4209 | |||
4210 | dataptr[DCTSIZE*0] = (DCTELEM) | ||
4211 | DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)), /* 16/9 */ | ||
4212 | CONST_BITS+PASS1_BITS); | ||
4213 | dataptr[DCTSIZE*2] = (DCTELEM) | ||
4214 | DESCALE(MULTIPLY(tmp12, FIX(2.177324216)), /* c2 */ | ||
4215 | CONST_BITS+PASS1_BITS); | ||
4216 | dataptr[DCTSIZE*4] = (DCTELEM) | ||
4217 | DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */ | ||
4218 | CONST_BITS+PASS1_BITS); | ||
4219 | |||
4220 | /* Odd part */ | ||
4221 | |||
4222 | tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829)); /* c5 */ | ||
4223 | |||
4224 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
4225 | DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)), /* 16/9 */ | ||
4226 | CONST_BITS+PASS1_BITS); | ||
4227 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
4228 | DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)), /* 16/9 */ | ||
4229 | CONST_BITS+PASS1_BITS); | ||
4230 | dataptr[DCTSIZE*5] = (DCTELEM) | ||
4231 | DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)), /* 16/9 */ | ||
4232 | CONST_BITS+PASS1_BITS); | ||
4233 | |||
4234 | dataptr++; /* advance pointer to next column */ | ||
4235 | } | ||
4236 | } | ||
4237 | |||
4238 | |||
4239 | /* | ||
4240 | * Perform the forward DCT on a 2x4 sample block. | ||
4241 | * | ||
4242 | * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns). | ||
4243 | */ | ||
4244 | |||
4245 | GLOBAL(void) | ||
4246 | jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
4247 | { | ||
4248 | INT32 tmp0, tmp1; | ||
4249 | INT32 tmp10, tmp11; | ||
4250 | DCTELEM *dataptr; | ||
4251 | JSAMPROW elemptr; | ||
4252 | int ctr; | ||
4253 | SHIFT_TEMPS | ||
4254 | |||
4255 | /* Pre-zero output coefficient block. */ | ||
4256 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
4257 | |||
4258 | /* Pass 1: process rows. */ | ||
4259 | /* Note results are scaled up by sqrt(8) compared to a true DCT. */ | ||
4260 | /* We must also scale the output by (8/2)*(8/4) = 2**3, which we add here. */ | ||
4261 | |||
4262 | dataptr = data; | ||
4263 | for (ctr = 0; ctr < 4; ctr++) { | ||
4264 | elemptr = sample_data[ctr] + start_col; | ||
4265 | |||
4266 | /* Even part */ | ||
4267 | |||
4268 | tmp0 = GETJSAMPLE(elemptr[0]); | ||
4269 | tmp1 = GETJSAMPLE(elemptr[1]); | ||
4270 | |||
4271 | /* Apply unsigned->signed conversion */ | ||
4272 | dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3); | ||
4273 | |||
4274 | /* Odd part */ | ||
4275 | |||
4276 | dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3); | ||
4277 | |||
4278 | dataptr += DCTSIZE; /* advance pointer to next row */ | ||
4279 | } | ||
4280 | |||
4281 | /* Pass 2: process columns. | ||
4282 | * We leave the results scaled up by an overall factor of 8. | ||
4283 | * 4-point FDCT kernel, | ||
4284 | * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT]. | ||
4285 | */ | ||
4286 | |||
4287 | dataptr = data; | ||
4288 | for (ctr = 0; ctr < 2; ctr++) { | ||
4289 | /* Even part */ | ||
4290 | |||
4291 | tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3]; | ||
4292 | tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2]; | ||
4293 | |||
4294 | tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3]; | ||
4295 | tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2]; | ||
4296 | |||
4297 | dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1); | ||
4298 | dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1); | ||
4299 | |||
4300 | /* Odd part */ | ||
4301 | |||
4302 | tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100); /* c6 */ | ||
4303 | /* Add fudge factor here for final descale. */ | ||
4304 | tmp0 += ONE << (CONST_BITS-1); | ||
4305 | |||
4306 | dataptr[DCTSIZE*1] = (DCTELEM) | ||
4307 | RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */ | ||
4308 | CONST_BITS); | ||
4309 | dataptr[DCTSIZE*3] = (DCTELEM) | ||
4310 | RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */ | ||
4311 | CONST_BITS); | ||
4312 | |||
4313 | dataptr++; /* advance pointer to next column */ | ||
4314 | } | ||
4315 | } | ||
4316 | |||
4317 | |||
4318 | /* | ||
4319 | * Perform the forward DCT on a 1x2 sample block. | ||
4320 | * | ||
4321 | * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns). | ||
4322 | */ | ||
4323 | |||
4324 | GLOBAL(void) | ||
4325 | jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col) | ||
4326 | { | ||
4327 | INT32 tmp0, tmp1; | ||
4328 | |||
4329 | /* Pre-zero output coefficient block. */ | ||
4330 | MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2); | ||
4331 | |||
4332 | tmp0 = GETJSAMPLE(sample_data[0][start_col]); | ||
4333 | tmp1 = GETJSAMPLE(sample_data[1][start_col]); | ||
4334 | |||
4335 | /* We leave the results scaled up by an overall factor of 8. | ||
4336 | * We must also scale the output by (8/1)*(8/2) = 2**5. | ||
4337 | */ | ||
4338 | |||
4339 | /* Even part */ | ||
4340 | /* Apply unsigned->signed conversion */ | ||
4341 | data[DCTSIZE*0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5); | ||
4342 | |||
4343 | /* Odd part */ | ||
4344 | data[DCTSIZE*1] = (DCTELEM) ((tmp0 - tmp1) << 5); | ||
4345 | } | ||
4346 | |||
4347 | #endif /* DCT_SCALING_SUPPORTED */ | ||
4348 | #endif /* DCT_ISLOW_SUPPORTED */ | ||