aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_convert_yuv.c
diff options
context:
space:
mode:
authorDavid Walter Seikel2012-01-04 18:41:13 +1000
committerDavid Walter Seikel2012-01-04 18:41:13 +1000
commitdd7595a3475407a7fa96a97393bae8c5220e8762 (patch)
treee341e911d7eb911a51684a7412ef7f7c7605d28e /libraries/evas/src/lib/engines/common/evas_convert_yuv.c
parentAdd the skeleton. (diff)
downloadSledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.zip
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.gz
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.bz2
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.xz
Add the base Enlightenment Foundation Libraries - eina, eet, evas, ecore, embryo, and edje.
Note that embryo wont be used, but I'm not sure yet if you can build edje without it.
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_convert_yuv.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_convert_yuv.c1258
1 files changed, 1258 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_convert_yuv.c b/libraries/evas/src/lib/engines/common/evas_convert_yuv.c
new file mode 100644
index 0000000..a0d155c
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_convert_yuv.c
@@ -0,0 +1,1258 @@
1#include "evas_common.h"
2#include "evas_convert_yuv.h"
3
4#if defined BUILD_MMX || defined BUILD_SSE
5# include "evas_mmx.h"
6#endif
7
8#if defined HAVE_ALTIVEC_H
9# include <altivec.h>
10#ifdef CONFIG_DARWIN
11#define AVV(x...) (x)
12#else
13#define AVV(x...) {x}
14#endif
15
16#endif
17
18#ifdef BUILD_CONVERT_YUV
19
20static void _evas_yuv_init (void);
21static void _evas_yv12torgb_sse (unsigned char **yuv, unsigned char *rgb, int w, int h);
22static void _evas_yv12torgb_mmx (unsigned char **yuv, unsigned char *rgb, int w, int h);
23#ifdef BUILD_ALTIVEC
24static void _evas_yv12torgb_altivec(unsigned char **yuv, unsigned char *rgb, int w, int h);
25static void _evas_yv12torgb_diz (unsigned char **yuv, unsigned char *rgb, int w, int h);
26#endif
27static void _evas_yv12torgb_raster (unsigned char **yuv, unsigned char *rgb, int w, int h);
28static void _evas_yuy2torgb_raster (unsigned char **yuv, unsigned char *rgb, int w, int h);
29static void _evas_nv12torgb_raster (unsigned char **yuv, unsigned char *rgb, int w, int h);
30static void _evas_nv12tiledtorgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h);
31
32#define CRV 104595
33#define CBU 132251
34#define CGU 25624
35#define CGV 53280
36#define YMUL 76283
37#define OFF 32768
38#define BITRES 16
39
40/* calculation float resolution in bits */
41/* ie RES = 6 is 10.6 fixed point */
42/* RES = 8 is 8.8 fixed point */
43/* RES = 4 is 12.4 fixed point */
44/* NB: going above 6 will lead to overflow... :( */
45#define RES 6
46
47#define RZ(i) (i >> (BITRES - RES))
48#define FOUR(i) {i, i, i, i}
49
50#if defined BUILD_MMX || defined BUILD_SSE
51__attribute__ ((aligned (8))) const volatile unsigned short _const_crvcrv[4] = FOUR(RZ(CRV));
52__attribute__ ((aligned (8))) const volatile unsigned short _const_cbucbu[4] = FOUR(RZ(CBU));
53__attribute__ ((aligned (8))) const volatile unsigned short _const_cgucgu[4] = FOUR(RZ(CGU));
54__attribute__ ((aligned (8))) const volatile unsigned short _const_cgvcgv[4] = FOUR(RZ(CGV));
55__attribute__ ((aligned (8))) const volatile unsigned short _const_ymul [4] = FOUR(RZ(YMUL));
56__attribute__ ((aligned (8))) const volatile unsigned short _const_128 [4] = FOUR(128);
57__attribute__ ((aligned (8))) const volatile unsigned short _const_32 [4] = FOUR(RZ(OFF));
58__attribute__ ((aligned (8))) const volatile unsigned short _const_16 [4] = FOUR(16);
59__attribute__ ((aligned (8))) const volatile unsigned short _const_ff [4] = FOUR(-1);
60
61#define CONST_CRVCRV *_const_crvcrv
62#define CONST_CBUCBU *_const_cbucbu
63#define CONST_CGUCGU *_const_cgucgu
64#define CONST_CGVCGV *_const_cgvcgv
65#define CONST_YMUL *_const_ymul
66#define CONST_128 *_const_128
67#define CONST_32 *_const_32
68#define CONST_16 *_const_16
69#define CONST_FF *_const_ff
70
71/* for C non aligned cleanup */
72const int _crv = RZ(CRV); /* 1.596 */
73const int _cbu = RZ(CBU); /* 2.018 */
74const int _cgu = RZ(CGU); /* 0.391 */
75const int _cgv = RZ(CGV); /* 0.813 */
76
77#endif
78
79#ifdef BUILD_ALTIVEC
80#ifdef __VEC__
81const vector unsigned short res = AVV(RES);
82const vector signed short crv = AVV(RZ(CRV));
83const vector signed short cbu = AVV(RZ(CBU));
84const vector signed short cgu = AVV(RZ(CGU));
85const vector signed short cgv = AVV(RZ(CGV));
86const vector signed short ymul = AVV(RZ(YMUL));
87const vector signed short c128 = AVV(128);
88const vector signed short c32 = AVV(RZ(OFF));
89const vector signed short c16 = AVV(16);
90const vector unsigned char zero = AVV(0);
91const vector signed short maxchar = AVV(255);
92const vector unsigned char pickrg1 = AVV(0, 0x1, 0x11, 0,
93 0, 0x3, 0x13, 0,
94 0, 0x5, 0x15, 0,
95 0, 0x7, 0x17, 0);
96const vector unsigned char pickrg2 = AVV(0, 0x9, 0x19, 0,
97 0, 0xb, 0x1b, 0,
98 0, 0xd, 0x1d, 0,
99 0, 0xf, 0x1f, 0);
100const vector unsigned char pickrgb1 = AVV(0x3, 0x1, 0x2, 0x11,
101 0x7, 0x5, 0x6, 0x13,
102 0xb, 0x9, 0xa, 0x15,
103 0xf, 0xd, 0xe, 0x17);
104const vector unsigned char pickrgb2 = AVV(0x3, 0x1, 0x2, 0x19,
105 0x7, 0x5, 0x6, 0x1b,
106 0xb, 0x9, 0xa, 0x1d,
107 0xf, 0xd, 0xe, 0x1f);
108#endif
109#endif
110
111#ifdef BUILD_C
112
113/* shortcut speedup lookup-tables */
114static short _v1164[256];
115static short _v1596[256];
116static short _v813[256];
117static short _v391[256];
118static short _v2018[256];
119
120static unsigned char _clip_lut[1024];
121#define LUT_CLIP(i) ((_clip_lut+384)[(i)])
122
123#define CMP_CLIP(i) ((i&256)? (~(i>>10)) : i);
124
125static int initted = 0;
126
127#endif
128
129void
130evas_common_convert_yuv_420p_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
131{
132 int mmx, sse, sse2;
133
134#if defined BUILD_MMX || defined BUILD_SSE
135 evas_common_cpu_can_do(&mmx, &sse, &sse2);
136#endif
137#ifndef BUILD_SSE
138 sse = 0;
139 sse2 = 0;
140#endif
141#ifndef BUILD_MMX
142 mmx = 0;
143#endif
144 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
145 _evas_yv12torgb_sse(src, dst, w, h);
146 else if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
147 _evas_yv12torgb_mmx(src, dst, w, h);
148#ifdef BUILD_ALTIVEC
149 if (evas_common_cpu_has_feature(CPU_FEATURE_ALTIVEC))
150 _evas_yv12torgb_altivec(src, dst, w, h);
151#endif
152 else
153 {
154#ifdef BUILD_C
155 if (!initted) _evas_yuv_init();
156 initted = 1;
157 /* FIXME: diz may be faster sometimes */
158 _evas_yv12torgb_raster(src, dst, w, h);
159#endif
160 }
161}
162
163/* Thanks to Diz for this code. i've munged it a little and turned it into */
164/* inline macros. I tried beating it with a different algorithm using MMX */
165/* but failed. So here we are. This is the fastest YUV->RGB i know of for */
166/* x86. It has an issue that it doesn't convert colours accurately so the */
167/* image looks a little "yellowy". This is a result of only 10.6 fixed point */
168/* resolution as opposed to 16.16 in the C code. This could be fixed by */
169/* processing half the number of pixels per cycle and going up to 32bits */
170/* per element during compute, but it would all but negate the speedup */
171/* from mmx I think :( It might be possible to use SSE and SSE2 here, but */
172/* I haven't tried yet. Let's see. */
173
174/* NB: XviD has almost the same code in it's assembly YV12->RGB code. same */
175/* algorithm, same constants, same all over actually, except it actually */
176/* does a few extra memory accesses that this one doesn't, so in theory */
177/* this code should be faster. In the end it's all just an mmx version of */
178/* the reference implimentation done with fixed point math */
179
180static void
181_evas_yv12torgb_sse(unsigned char **yuv, unsigned char *rgb, int w, int h)
182{
183#ifdef BUILD_SSE
184 int xx, yy;
185 register unsigned char *yp1, *up, *vp;
186 unsigned char *dp1;
187
188 /* destination pointers */
189 dp1 = rgb;
190
191 for (yy = 0; yy < h; yy++)
192 {
193 /* plane pointers */
194 yp1 = yuv[yy];
195 up = yuv[h + (yy / 2)];
196 vp = yuv[h + (h / 2) + (yy / 2)];
197 for (xx = 0; xx < (w - 7); xx += 8)
198 {
199 movd_m2r(*up, mm3);
200 movd_m2r(*vp, mm2);
201 movq_m2r(*yp1, mm0);
202
203 pxor_r2r(mm7, mm7);
204 punpcklbw_r2r(mm7, mm2);
205 punpcklbw_r2r(mm7, mm3);
206
207 movq_r2r(mm0, mm1);
208 psrlw_i2r(8, mm0);
209 psllw_i2r(8, mm1);
210 psrlw_i2r(8, mm1);
211
212 movq_m2r(CONST_16, mm4);
213 psubsw_r2r(mm4, mm0);
214 psubsw_r2r(mm4, mm1);
215
216 movq_m2r(CONST_128, mm5);
217 psubsw_r2r(mm5, mm2);
218 psubsw_r2r(mm5, mm3);
219
220 movq_m2r(CONST_YMUL, mm4);
221 pmullw_r2r(mm4, mm0);
222 pmullw_r2r(mm4, mm1);
223
224 movq_m2r(CONST_CRVCRV, mm7);
225 pmullw_r2r(mm3, mm7);
226 movq_m2r(CONST_CBUCBU, mm6);
227 pmullw_r2r(mm2, mm6);
228 movq_m2r(CONST_CGUCGU, mm5);
229 pmullw_r2r(mm2, mm5);
230 movq_m2r(CONST_CGVCGV, mm4);
231 pmullw_r2r(mm3, mm4);
232
233 movq_r2r(mm0, mm2);
234 paddsw_r2r(mm7, mm2);
235 paddsw_r2r(mm1, mm7);
236
237 psraw_i2r(RES, mm2);
238 psraw_i2r(RES, mm7);
239 packuswb_r2r(mm7, mm2);
240
241 pxor_r2r(mm7, mm7);
242 movq_r2r(mm2, mm3);
243 punpckhbw_r2r(mm7, mm2);
244 punpcklbw_r2r(mm3, mm7);
245 por_r2r(mm7, mm2);
246
247 movq_r2r(mm0, mm3);
248 psubsw_r2r(mm5, mm3);
249 psubsw_r2r(mm4, mm3);
250 paddsw_m2r(CONST_32, mm3);
251
252 movq_r2r(mm1, mm7);
253 psubsw_r2r(mm5, mm7);
254 psubsw_r2r(mm4, mm7);
255 paddsw_m2r(CONST_32, mm7);
256
257 psraw_i2r(RES, mm3);
258 psraw_i2r(RES, mm7);
259 packuswb_r2r(mm7, mm3);
260
261 pxor_r2r(mm7, mm7);
262 movq_r2r(mm3, mm4);
263 punpckhbw_r2r(mm7, mm3);
264 punpcklbw_r2r(mm4, mm7);
265 por_r2r(mm7, mm3);
266
267 movq_m2r(CONST_32, mm4);
268 paddsw_r2r(mm6, mm0);
269 paddsw_r2r(mm6, mm1);
270 paddsw_r2r(mm4, mm0);
271 paddsw_r2r(mm4, mm1);
272 psraw_i2r(RES, mm0);
273 psraw_i2r(RES, mm1);
274 packuswb_r2r(mm1, mm0);
275
276 pxor_r2r(mm7, mm7);
277 movq_r2r(mm0, mm5);
278 punpckhbw_r2r(mm7, mm0);
279 punpcklbw_r2r(mm5, mm7);
280 por_r2r(mm7, mm0);
281
282 movq_m2r(CONST_FF, mm1);
283 movq_r2r(mm0, mm5);
284 movq_r2r(mm3, mm6);
285 movq_r2r(mm2, mm7);
286 punpckhbw_r2r(mm3, mm2);
287 punpcklbw_r2r(mm6, mm7);
288 punpckhbw_r2r(mm1, mm0);
289 punpcklbw_r2r(mm1, mm5);
290
291 movq_r2r(mm7, mm1);
292 punpckhwd_r2r(mm5, mm7);
293 punpcklwd_r2r(mm5, mm1);
294
295 movq_r2r(mm2, mm4);
296 punpckhwd_r2r(mm0, mm2);
297 punpcklwd_r2r(mm0, mm4);
298
299 movntq_r2m(mm1, *(dp1));
300 movntq_r2m(mm7, *(dp1 + 8));
301 movntq_r2m(mm4, *(dp1 + 16));
302 movntq_r2m(mm2, *(dp1 + 24));
303
304 yp1 += 8;
305 up += 4;
306 vp += 4;
307 dp1 += 8 * 4;
308 }
309 /* cleanup pixles that arent a multiple of 8 pixels wide */
310 if (xx < w)
311 {
312 int y, u, v, r, g, b;
313
314 for (; xx < w; xx += 2)
315 {
316 u = (*up++) - 128;
317 v = (*vp++) - 128;
318
319 y = RZ(YMUL) * ((*yp1++) - 16);
320 r = LUT_CLIP((y + (_crv * v)) >> RES);
321 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
322 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
323 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
324
325 dp1 += 4;
326
327 y = RZ(YMUL) * ((*yp1++) - 16);
328 r = LUT_CLIP((y + (_crv * v)) >> RES);
329 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
330 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
331 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
332
333 dp1 += 4;
334 }
335 }
336 }
337 emms();
338#else
339 _evas_yv12torgb_mmx(yuv, rgb, w, h);
340#endif
341}
342
343static void
344_evas_yv12torgb_mmx(unsigned char **yuv, unsigned char *rgb, int w, int h)
345{
346#ifdef BUILD_MMX
347 int xx, yy;
348 register unsigned char *yp1, *up, *vp;
349 unsigned char *dp1;
350
351 /* destination pointers */
352 dp1 = rgb;
353
354 for (yy = 0; yy < h; yy++)
355 {
356 /* plane pointers */
357 yp1 = yuv[yy];
358 up = yuv[h + (yy / 2)];
359 vp = yuv[h + (h / 2) + (yy / 2)];
360 for (xx = 0; xx < (w - 7); xx += 8)
361 {
362 movd_m2r(*up, mm3);
363 movd_m2r(*vp, mm2);
364 movq_m2r(*yp1, mm0);
365
366 pxor_r2r(mm7, mm7);
367 punpcklbw_r2r(mm7, mm2);
368 punpcklbw_r2r(mm7, mm3);
369
370 movq_r2r(mm0, mm1);
371 psrlw_i2r(8, mm0);
372 psllw_i2r(8, mm1);
373 psrlw_i2r(8, mm1);
374
375 movq_m2r(CONST_16, mm4);
376 psubsw_r2r(mm4, mm0);
377 psubsw_r2r(mm4, mm1);
378
379 movq_m2r(CONST_128, mm5);
380 psubsw_r2r(mm5, mm2);
381 psubsw_r2r(mm5, mm3);
382
383 movq_m2r(CONST_YMUL, mm4);
384 pmullw_r2r(mm4, mm0);
385 pmullw_r2r(mm4, mm1);
386
387 movq_m2r(CONST_CRVCRV, mm7);
388 pmullw_r2r(mm3, mm7);
389 movq_m2r(CONST_CBUCBU, mm6);
390 pmullw_r2r(mm2, mm6);
391 movq_m2r(CONST_CGUCGU, mm5);
392 pmullw_r2r(mm2, mm5);
393 movq_m2r(CONST_CGVCGV, mm4);
394 pmullw_r2r(mm3, mm4);
395
396 movq_r2r(mm0, mm2);
397 paddsw_r2r(mm7, mm2);
398 paddsw_r2r(mm1, mm7);
399
400 psraw_i2r(RES, mm2);
401 psraw_i2r(RES, mm7);
402 packuswb_r2r(mm7, mm2);
403
404 pxor_r2r(mm7, mm7);
405 movq_r2r(mm2, mm3);
406 punpckhbw_r2r(mm7, mm2);
407 punpcklbw_r2r(mm3, mm7);
408 por_r2r(mm7, mm2);
409
410 movq_r2r(mm0, mm3);
411 psubsw_r2r(mm5, mm3);
412 psubsw_r2r(mm4, mm3);
413 paddsw_m2r(CONST_32, mm3);
414
415 movq_r2r(mm1, mm7);
416 psubsw_r2r(mm5, mm7);
417 psubsw_r2r(mm4, mm7);
418 paddsw_m2r(CONST_32, mm7);
419
420 psraw_i2r(RES, mm3);
421 psraw_i2r(RES, mm7);
422 packuswb_r2r(mm7, mm3);
423
424 pxor_r2r(mm7, mm7);
425 movq_r2r(mm3, mm4);
426 punpckhbw_r2r(mm7, mm3);
427 punpcklbw_r2r(mm4, mm7);
428 por_r2r(mm7, mm3);
429
430 movq_m2r(CONST_32, mm4);
431 paddsw_r2r(mm6, mm0);
432 paddsw_r2r(mm6, mm1);
433 paddsw_r2r(mm4, mm0);
434 paddsw_r2r(mm4, mm1);
435 psraw_i2r(RES, mm0);
436 psraw_i2r(RES, mm1);
437 packuswb_r2r(mm1, mm0);
438
439 pxor_r2r(mm7, mm7);
440 movq_r2r(mm0, mm5);
441 punpckhbw_r2r(mm7, mm0);
442 punpcklbw_r2r(mm5, mm7);
443 por_r2r(mm7, mm0);
444
445 movq_m2r(CONST_FF, mm1);
446 movq_r2r(mm0, mm5);
447 movq_r2r(mm3, mm6);
448 movq_r2r(mm2, mm7);
449 punpckhbw_r2r(mm3, mm2);
450 punpcklbw_r2r(mm6, mm7);
451 punpckhbw_r2r(mm1, mm0);
452 punpcklbw_r2r(mm1, mm5);
453
454 movq_r2r(mm7, mm1);
455 punpckhwd_r2r(mm5, mm7);
456 punpcklwd_r2r(mm5, mm1);
457
458 movq_r2r(mm2, mm4);
459 punpckhwd_r2r(mm0, mm2);
460 punpcklwd_r2r(mm0, mm4);
461
462 movq_r2m(mm1, *(dp1));
463 movq_r2m(mm7, *(dp1 + 8));
464 movq_r2m(mm4, *(dp1 + 16));
465 movq_r2m(mm2, *(dp1 + 24));
466
467 yp1 += 8;
468 up += 4;
469 vp += 4;
470 dp1 += 8 * 4;
471 }
472 /* cleanup pixles that arent a multiple of 8 pixels wide */
473 if (xx < w)
474 {
475 int y, u, v, r, g, b;
476
477 for (; xx < w; xx += 2)
478 {
479 u = (*up++) - 128;
480 v = (*vp++) - 128;
481
482 y = RZ(YMUL) * ((*yp1++) - 16);
483 r = LUT_CLIP((y + (_crv * v)) >> RES);
484 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
485 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
486 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
487
488 dp1 += 4;
489
490 y = RZ(YMUL) * ((*yp1++) - 16);
491 r = LUT_CLIP((y + (_crv * v)) >> RES);
492 g = LUT_CLIP((y - (_cgu * u) - (_cgv * v) + RZ(OFF)) >> RES);
493 b = LUT_CLIP((y + (_cbu * u) + RZ(OFF)) >> RES);
494 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
495
496 dp1 += 4;
497 }
498 }
499 }
500 emms();
501#else
502 _evas_yv12torgb_raster(yuv, rgb, w, h);
503#endif
504}
505
506#ifdef BUILD_ALTIVEC
507static void
508_evas_yv12torgb_altivec(unsigned char **yuv, unsigned char *rgb, int w, int h)
509{
510#ifdef __VEC__
511 int xx, yy;
512 int w2, h2;
513 unsigned char *yp1, *yp2, *up, *vp;
514 unsigned char *dp1, *dp2;
515 vector signed short y, u, v;
516 vector signed short r, g, b;
517 vector signed short tmp1, tmp2, tmp3;
518 vector unsigned char yperm, uperm, vperm, rgb1, rgb2;
519 vector unsigned char alpha;
520
521 /* handy halved w & h */
522 w2 = w / 2;
523 h2 = h / 2;
524 /* plane pointers */
525 yp1 = yuv;
526 yp2 = yuv + w;
527 up = yuv + (w * h);
528 vp = up + (w2 * h2);
529 /* destination pointers */
530 dp1 = rgb;
531 dp2 = rgb + (w * 4);
532
533 alpha = vec_mergeh((vector unsigned char)AVV(255), zero);
534 alpha = (vector unsigned char)vec_mergeh((vector unsigned short)alpha,
535 (vector unsigned short)zero);
536
537 for (yy = 0; yy < h2; yy++)
538 {
539 for (xx = 0; xx < w2; xx += 4)
540 {
541/* Cycles */
542 /*
543 * Load 4 y and 4 u & v pixels for the 8x2 pixel block.
544 */
545/* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp1);
546/* 3 */ tmp1 = (vector signed short)vec_lde(0, (unsigned int *)up);
547/* 3 */ tmp2 = (vector signed short)vec_lde(0, (unsigned int *)vp);
548
549 /* Prepare for aligning the data in their vectors */
550/* 3 */ yperm = vec_lvsl(0, yp1);
551/* 3 */ uperm = vec_lvsl(0, up);
552/* 3 */ vperm = vec_lvsl(0, vp);
553 yp1 += 4;
554
555 /* Save y and load the next 4 y pixels for a total of 8 */
556/* 2 */ y = vec_perm(tmp3, tmp3, yperm);
557/* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp1);
558
559 /* Setup and calculate the 4 u pixels */
560/* 2 */ tmp1 = vec_perm(tmp1, tmp1, uperm);
561/* 2 */ tmp2 = vec_perm(tmp2, tmp2, vperm);
562
563 /* Avoid dependency stalls on yperm and calculate the 4 u values */
564/* 3 */ yperm = vec_lvsr(12, yp1);
565/* 1 */ tmp1 = (vector signed short)vec_mergeh((vector unsigned char)tmp1,
566 (vector unsigned char)tmp1);
567/* 1 */ u = (vector signed short)vec_mergeh(zero,
568 (vector unsigned char)tmp1);
569
570/* 1 */ u = vec_sub(u, c128);
571/* 2 */ tmp3 = vec_perm(tmp3, tmp3, yperm);
572
573 /* Setup and calculate the 4 v values */
574/* 1 */ tmp2 = (vector signed short)vec_mergeh((vector unsigned char)tmp2,
575 (vector unsigned char)tmp2);
576/* 1 */ v = (vector signed short)vec_mergeh(zero,
577 (vector unsigned char)tmp2);
578/* 4 */ tmp2 = vec_mladd(cgu, u, (vector signed short)zero);
579/* 1 */ v = vec_sub(v, c128);
580
581 /* Move the data into y and start loading the next 4 pixels */
582/* 1 */ y = (vector signed short)vec_mergeh(zero,
583 (vector unsigned char)y);
584/* 1 */ tmp3 = (vector signed short)vec_mergeh(zero,
585 (vector unsigned char)tmp3);
586/* 1 */ y = vec_or(y, tmp3);
587
588 /* Finish calculating y */
589/* 1 */ y = vec_sub(y, c16);
590/* 4 */ y = vec_mladd(ymul, y, (vector signed short)zero);
591
592 /* Perform non-dependent multiplies first. */
593/* 4 */ tmp1 = vec_mladd(crv, v, y);
594/* 4 */ tmp2 = vec_mladd(cgv, v, tmp2);
595/* 4 */ tmp3 = vec_mladd(cbu, u, y);
596
597 /* Calculate rgb values */
598/* 1 */ r = vec_sra(tmp1, res);
599
600/* 1 */ tmp2 = vec_sub(y, tmp2);
601/* 1 */ tmp2 = vec_add(tmp2, c32);
602/* 1 */ g = vec_sra(tmp2, res);
603
604/* 1 */ tmp3 = vec_add(tmp3, c32);
605/* 1 */ b = vec_sra(tmp3, res);
606
607 /* Bound to 0 <= x <= 255 */
608/* 1 */ r = vec_min(r, maxchar);
609/* 1 */ g = vec_min(g, maxchar);
610/* 1 */ b = vec_min(b, maxchar);
611/* 1 */ r = vec_max(r, (vector signed short)zero);
612/* 1 */ g = vec_max(g, (vector signed short)zero);
613/* 1 */ b = vec_max(b, (vector signed short)zero);
614
615 /* Combine r, g and b. */
616/* 2 */ rgb1 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
617 pickrg1);
618/* 2 */ rgb2 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
619 pickrg2);
620
621/* 2 */ rgb1 = vec_perm(rgb1, (vector unsigned char)b, pickrgb1);
622/* 2 */ rgb2 = vec_perm(rgb2, (vector unsigned char)b, pickrgb2);
623
624/* 1 */ rgb1 = vec_or(alpha, rgb1);
625/* 1 */ rgb2 = vec_or(alpha, rgb2);
626
627/* 3 */ vec_stl(rgb1, 0, dp1);
628 dp1 += 16;
629/* 3 */ vec_stl(rgb2, 0, dp1);
630
631 /*
632 * Begin the second row calculations
633 */
634
635 /*
636 * Load 4 y pixels for the 8x2 pixel block.
637 */
638/* 3 */ yperm = vec_lvsl(0, yp2);
639/* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp2);
640 yp2 += 4;
641
642 /* Save y and load the next 4 y pixels for a total of 8 */
643/* 2 */ y = vec_perm(tmp3, tmp3, yperm);
644/* 3 */ yperm = vec_lvsr(12, yp2);
645/* 3 */ tmp3 = (vector signed short)vec_lde(0, (unsigned int *)yp2);
646/* 1 */ y = (vector signed short)vec_mergeh(zero,
647 (vector unsigned char)y);
648
649 /* Avoid dependency stalls on yperm */
650/* 2 */ tmp3 = vec_perm(tmp3, tmp3, yperm);
651/* 1 */ tmp3 = (vector signed short)vec_mergeh(zero,
652 (vector unsigned char)tmp3);
653/* 1 */ y = vec_or(y, tmp3);
654
655 /* Start the calculation for g */
656/* 4 */ tmp2 = vec_mladd(cgu, u, (vector signed short)zero);
657
658 /* Finish calculating y */
659/* 1 */ y = vec_sub(y, c16);
660/* 4 */ y = vec_mladd(ymul, y, (vector signed short)zero);
661
662 /* Perform non-dependent multiplies first. */
663/* 4 */ tmp2 = vec_mladd(cgv, v, tmp2);
664/* 4 */ tmp1 = vec_mladd(crv, v, y);
665/* 4 */ tmp3 = vec_mladd(cbu, u, y);
666
667 /* Calculate rgb values */
668/* 1 */ r = vec_sra(tmp1, res);
669
670/* 1 */ tmp2 = vec_sub(y, tmp2);
671/* 1 */ tmp2 = vec_add(tmp2, c32);
672/* 1 */ g = vec_sra(tmp2, res);
673
674/* 1 */ tmp3 = vec_add(tmp3, c32);
675/* 1 */ b = vec_sra(tmp3, res);
676
677 /* Bound to 0 <= x <= 255 */
678/* 1 */ r = vec_min(r, maxchar);
679/* 1 */ g = vec_min(g, maxchar);
680/* 1 */ b = vec_min(b, maxchar);
681/* 1 */ r = vec_max(r, (vector signed short)zero);
682/* 1 */ g = vec_max(g, (vector signed short)zero);
683/* 1 */ b = vec_max(b, (vector signed short)zero);
684
685 /* Combine r, g and b. */
686/* 2 */ rgb1 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
687 pickrg1);
688/* 2 */ rgb2 = vec_perm((vector unsigned char)r, (vector unsigned char)g,
689 pickrg2);
690
691/* 2 */ rgb1 = vec_perm(rgb1, (vector unsigned char)b, pickrgb1);
692/* 2 */ rgb2 = vec_perm(rgb2, (vector unsigned char)b, pickrgb2);
693
694/* 1 */ rgb1 = vec_or(alpha, rgb1);
695/* 1 */ rgb2 = vec_or(alpha, rgb2);
696
697/* 3 */ vec_stl(rgb1, 0, dp2);
698 dp2 += 16;
699/* 3 */ vec_stl(rgb2, 0, dp2);
700
701 /* Increment the YUV data pointers to the next set of pixels. */
702 yp1 += 4;
703 yp2 += 4;
704 up += 4;
705 vp += 4;
706
707 /* Move the destination pointers to the next set of pixels. */
708 dp1 += 16;
709 dp2 += 16;
710 }
711
712 /* jump down one line since we are doing 2 at once */
713 yp1 += w;
714 yp2 += w;
715 dp1 += (w * 4);
716 dp2 += (w * 4);
717 }
718#else
719 _evas_yv12torgb_diz(yuv, rgb, w, h);
720#endif
721}
722#endif
723
724static void
725_evas_yuv_init(void)
726{
727#ifdef BUILD_C
728 int i;
729
730 for (i = 0; i < 256; i++)
731 {
732 _v1164[i] = (int)(((float)(i - 16 )) * 1.164);
733
734 _v1596[i] = (int)(((float)(i - 128)) * 1.596);
735 _v813[i] = (int)(((float)(i - 128)) * 0.813);
736
737 _v391[i] = (int)(((float)(i - 128)) * 0.391);
738 _v2018[i] = (int)(((float)(i - 128)) * 2.018);
739 }
740
741 for (i = -384; i < 640; i++)
742 {
743 _clip_lut[i+384] = i < 0 ? 0 : (i > 255) ? 255 : i;
744 }
745#endif
746}
747
748#ifdef BUILD_ALTIVEC
749static void
750_evas_yv12torgb_diz(unsigned char **yuv, unsigned char *rgb, int w, int h)
751{
752#ifdef BUILD_C
753 int xx, yy;
754 int y, u, v, r, g, b;
755 unsigned char *yp1, *yp2, *up, *vp;
756 unsigned char *dp1, *dp2;
757 int crv, cbu, cgu, cgv;
758
759 /* destination pointers */
760 dp1 = rgb;
761 dp2 = rgb + (w * 4);
762
763 crv = CRV; /* 1.596 */
764 cbu = CBU; /* 2.018 */
765 cgu = CGU; /* 0.391 */
766 cgv = CGV; /* 0.813 */
767
768 for (yy = 0; yy < h; yy += 2)
769 {
770 /* plane pointers */
771 yp1 = yuv[yy];
772 yp2 = yuv[yy + 1];
773 up = yuv[h + (yy / 2)];
774 vp = yuv[h + (h / 2) + (yy / 2)];
775 for (xx = 0; xx < w; xx += 2)
776 {
777 /* collect u & v for 2x2 pixel block */
778 u = (*up++) - 128;
779 v = (*vp++) - 128;
780
781 /* do the top 2 pixels of the 2x2 block which shared u & v */
782 /* yuv to rgb */
783 y = YMUL * ((*yp1++) - 16);
784 r = LUT_CLIP((y + (crv * v)) >> 16);
785 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
786 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
787 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
788
789 dp1 += 4;
790
791 /* yuv to rgb */
792 y = YMUL * ((*yp1++) - 16);
793 r = LUT_CLIP((y + (crv * v)) >> 16);
794 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
795 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
796 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(r,g,b);
797
798 dp1 += 4;
799
800 /* do the bottom 2 pixels */
801 /* yuv to rgb */
802 y = YMUL * ((*yp2++) - 16);
803 r = LUT_CLIP((y + (crv * v)) >> 16);
804 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
805 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
806 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(r,g,b);
807
808 dp2 += 4;
809
810 /* yuv to rgb */
811 y = YMUL * ((*yp2++) - 16);
812 r = LUT_CLIP((y + (crv * v)) >> 16);
813 g = LUT_CLIP((y - (cgu * u) - (cgv * v) + OFF) >>16);
814 b = LUT_CLIP((y + (cbu * u) + OFF) >> 16);
815 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(r,g,b);
816
817 dp2 += 4;
818 }
819 /* jump down one line since we are doing 2 at once */
820 dp1 += (w * 4);
821 dp2 += (w * 4);
822 }
823#endif
824}
825#endif
826
827static void
828_evas_yv12torgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
829{
830#ifdef BUILD_C
831 int xx, yy;
832 int y, u, v;
833 unsigned char *yp1, *yp2, *up, *vp;
834 unsigned char *dp1, *dp2;
835
836 /* destination pointers */
837 dp1 = rgb;
838 dp2 = rgb + (w * 4);
839
840 for (yy = 0; yy < h; yy += 2)
841 {
842 /* plane pointers */
843 yp1 = yuv[yy];
844 yp2 = yuv[yy + 1];
845 up = yuv[h + (yy / 2)];
846 vp = yuv[h + (h / 2) + (yy / 2)];
847 for (xx = 0; xx < w; xx += 2)
848 {
849 int vmu;
850
851 /* collect u & v for 2x2 pixel block */
852 u = *up++;
853 v = *vp++;
854
855 /* save lookups */
856 vmu = _v813[v] + _v391[u];
857 u = _v2018[u];
858 v = _v1596[v];
859
860 /* do the top 2 pixels of the 2x2 block which shared u & v */
861 /* yuv to rgb */
862 y = _v1164[*yp1++];
863 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
864
865 dp1 += 4;
866
867 /* yuv to rgb */
868 y = _v1164[*yp1++];
869 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
870
871 dp1 += 4;
872
873 /* do the bottom 2 pixels */
874 /* yuv to rgb */
875 y = _v1164[*yp2++];
876 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
877
878 dp2 += 4;
879
880 /* yuv to rgb */
881 y = _v1164[*yp2++];
882 *((DATA32 *) dp2) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
883
884 dp2 += 4;
885 }
886 /* jump down one line since we are doing 2 at once */
887 dp1 += (w * 4);
888 dp2 += (w * 4);
889 }
890#endif
891}
892
893void
894evas_common_convert_yuv_422_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
895{
896#ifdef BUILD_C
897 if (!initted) _evas_yuv_init();
898 initted = 1;
899 _evas_yuy2torgb_raster(src, dst, w, h);
900#endif
901}
902
903void
904evas_common_convert_yuv_420_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
905{
906#ifdef BUILD_C
907 if (!initted) _evas_yuv_init();
908 initted = 1;
909 _evas_nv12torgb_raster(src, dst, w, h);
910#endif
911}
912
913void
914evas_common_convert_yuv_420T_601_rgba(DATA8 **src, DATA8 *dst, int w, int h)
915{
916#ifdef BUILD_C
917 if (initted) _evas_yuv_init();
918 initted = 1;
919 _evas_nv12tiledtorgb_raster(src, dst, w, h);
920#endif
921}
922
923static void
924_evas_yuy2torgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
925{
926#ifdef BUILD_C
927 int xx, yy;
928 int y, u, v;
929 unsigned char *yp1, *yp2, *up, *vp;
930 unsigned char *dp1;
931
932 dp1 = rgb;
933
934 /* destination pointers */
935 for (yy = 0; yy < h; yy++)
936 {
937 /* plane pointers */
938 unsigned char *line;
939
940 line = yuv[yy];
941 yp1 = line + 0;
942 up = line + 1;
943 yp2 = line + 2;
944 vp = line + 3;
945
946 for (xx = 0; xx < w; xx += 2)
947 {
948 int vmu;
949
950 /* collect u & v for 2 pixels block */
951 u = *up;
952 v = *vp;
953
954 /* save lookups */
955 vmu = _v813[v] + _v391[u];
956 u = _v2018[u];
957 v = _v1596[v];
958
959 /* do the top 2 pixels of the 2x2 block which shared u & v */
960 /* yuv to rgb */
961 y = _v1164[*yp1];
962 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
963
964 dp1 += 4;
965
966 /* yuv to rgb */
967 y = _v1164[*yp2];
968 *((DATA32 *) dp1) = 0xff000000 + RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
969
970 dp1 += 4;
971
972 yp1 += 4; yp2 += 4; up += 4; vp += 4;
973 }
974 }
975#endif
976}
977
978#ifdef BUILD_C
979static inline void
980_evas_yuv2rgb_420_raster(unsigned char *yp1, unsigned char *yp2, unsigned char *up, unsigned char *vp,
981 unsigned char *dp1, unsigned char *dp2)
982{
983 int y, u, v;
984 int vmu;
985 int rgb;
986
987 /* collect u & v for 4 pixels block */
988 u = *up;
989 v = *vp;
990
991 /* save lookups */
992#ifdef MEM_BP
993 vmu = _v813[v] + _v391[u];
994 u = _v2018[u];
995 v = _v1596[v];
996#else
997 u -= 128;
998 v -= 128;
999 vmu = v * CGV + u * CGU;
1000 u = u * CBU;
1001 v = v * CRV;
1002#endif
1003
1004 /* do the top 2 pixels of the 2x2 block which shared u & v */
1005 /* yuv to rgb */
1006#ifdef MEM_BP
1007 y = _v1164[*yp1];
1008 rgb = RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
1009#else
1010 y = (*yp1 - 16 ) * YMUL;
1011 rgb = RGB_JOIN(LUT_CLIP(((y + v) >> 16)),
1012 LUT_CLIP(((y - vmu + OFF) >> 16)),
1013 LUT_CLIP(((y + u + OFF) >> 16)));
1014#endif
1015 *((DATA32 *) dp1) = 0xff000000 + rgb;
1016
1017 dp1 += 4; yp1++;
1018
1019 /* yuv to rgb */
1020#ifdef MEM_BP
1021 y = _v1164[*yp1];
1022 rgb = RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
1023#else
1024 y = (*yp1 - 16 ) * YMUL;
1025 rgb = RGB_JOIN(LUT_CLIP(((y + v) >> 16)),
1026 LUT_CLIP(((y - vmu + OFF) >> 16)),
1027 LUT_CLIP(((y + u + OFF) >> 16)));
1028#endif
1029 *((DATA32 *) dp1) = 0xff000000 + rgb;
1030
1031 /* do the bottom 2 pixels of the 2x2 block which shared u & v */
1032 /* yuv to rgb */
1033#ifdef MEM_BP
1034 y = _v1164[*yp2];
1035 rgb = RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
1036#else
1037 y = (*yp2 - 16 ) * YMUL;
1038 rgb = RGB_JOIN(LUT_CLIP(((y + v) >> 16)),
1039 LUT_CLIP(((y - vmu + OFF) >> 16)),
1040 LUT_CLIP(((y + u + OFF) >> 16)));
1041#endif
1042 *((DATA32 *) dp2) = 0xff000000 + rgb;
1043
1044 dp2 += 4; yp2++;
1045
1046 /* yuv to rgb */
1047#ifdef MEM_BP
1048 y = _v1164[*yp2];
1049 rgb = RGB_JOIN(LUT_CLIP(y + v), LUT_CLIP(y - vmu), LUT_CLIP(y + u));
1050#else
1051 y = (*yp2 - 16 ) * YMUL;
1052 rgb = RGB_JOIN(LUT_CLIP(((y + v) >> 16)),
1053 LUT_CLIP(((y - vmu + OFF) >> 16)),
1054 LUT_CLIP(((y + u + OFF) >> 16)));
1055#endif
1056 *((DATA32 *) dp2) = 0xff000000 + rgb;
1057}
1058#endif
1059
1060static void
1061_evas_nv12tiledtorgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
1062{
1063#ifdef BUILD_C
1064
1065#define HANDLE_MACROBLOCK(YP1, YP2, UP, VP, DP1, DP2) \
1066 { \
1067 int i; \
1068 int j; \
1069 \
1070 for (i = 0; i < 32; i += 2) \
1071 { \
1072 for (j = 0; j < 64; j += 2) \
1073 { \
1074 _evas_yuv2rgb_420_raster(YP1, YP2, UP, VP, DP1, DP2); \
1075 \
1076 /* the previous call just rendered 2 pixels per lines */ \
1077 DP1 += 8; DP2 += 8; \
1078 \
1079 /* and took for that 2 lines with 2 Y, 1 U and 1 V. Don't forget U & V are in the same plane */ \
1080 YP1 += 2; YP2 += 2; UP += 2; VP += 2; \
1081 } \
1082 \
1083 DP1 += sizeof (int) * ((w << 1) - 64); \
1084 DP2 += sizeof (int) * ((w << 1) - 64); \
1085 YP1 += 64; \
1086 YP2 += 64; \
1087 } \
1088 }
1089
1090 /* One macro block is 32 lines of Y and 16 lines of UV */
1091 const int offset_value[2] = { 0, 64 * 16 };
1092 int mb_x, mb_y, mb_w, mb_h;
1093 int base_h;
1094 int uv_x, uv_y, uv_step;
1095 int stride;
1096
1097 /* Idea iterate over each macroblock and convert each of them using _evas_nv12torgb_raster */
1098
1099 /* The layout of the Y macroblock order in RGB non tiled space : */
1100 /* --------------------------------------------------- */
1101 /* | 0 | 1 | 6 | 7 | 8 | 9 | 14 | 15 | 16 | 17 | */
1102 /* --------------------------------------------------- */
1103 /* | 2 | 3 | 4 | 5 | 10 | 11 | 12 | 13 | 18 | 19 | */
1104 /* --------------------------------------------------- */
1105 /* | 20 | 21 | 26 | 27 | 28 | 29 | 34 | 35 | 36 | 37 | */
1106 /* --------------------------------------------------- */
1107 /* | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 38 | 39 | */
1108 /* --------------------------------------------------- */
1109 /* | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | */
1110 /* --------------------------------------------------- */
1111 /* The layout of the UV macroblock order in the same RGB non tiled space : */
1112 /* --------------------------------------------------- */
1113 /* | | | | | | | | | | | */
1114 /* - 0 - 1 - 6 - 7 - 8 - 9 - 14 - 15 - 16 - 17 - */
1115 /* | | | | | | | | | | | */
1116 /* --------------------------------------------------- */
1117 /* | | | | | | | | | | | */
1118 /* - 2 - 3 - 4 - 5 - 10 - 11 - 12 - 13 - 18 - 19 - */
1119 /* | | | | | | | | | | | */
1120 /* --------------------------------------------------- */
1121 /* | | | | | | | | | | | */
1122 /* - 20 - 21 - 22 - 22 - 23 - 24 - 25 - 26 - 27 - 28 - */
1123
1124 /* the number of macroblock should be a multiple of 64x32 */
1125 mb_w = w / 64;
1126 mb_h = h / 32;
1127
1128 base_h = (mb_h >> 1) + (mb_h & 0x1);
1129 stride = w * sizeof (int);
1130
1131 uv_x = 0; uv_y = 0;
1132
1133 /* In this format we linearize macroblock on two line to form a Z and it's invert */
1134 for (mb_y = 0; mb_y < (mb_h >> 1); mb_y++)
1135 {
1136 int step = 2;
1137 int offset = 0;
1138 int x = 0;
1139 int rmb_x = 0;
1140 int ry[2];
1141
1142 ry[0] = mb_y * 2 * 32 * stride;
1143 ry[1] = ry[0] + 32 * stride;
1144
1145 uv_step = (mb_y & 0x1) == 0 ? 4 : 0;
1146 uv_x = (mb_y & 0x1) == 0 ? 0 : 2 * 64 * 32;
1147
1148 for (mb_x = 0; mb_x < mb_w * 2; mb_x++, rmb_x += 64 * 32)
1149 {
1150 unsigned char *yp1, *yp2, *up, *vp;
1151 unsigned char *dp1, *dp2;
1152
1153 dp1 = rgb + x + ry[offset];
1154 dp2 = dp1 + stride;
1155
1156 yp1 = yuv[mb_y] + rmb_x;
1157 yp2 = yp1 + 64;
1158
1159 /* UV plane is two time less bigger in pixel count, but it old two bytes each times */
1160 up = yuv[(mb_y >> 1) + base_h] + uv_x + offset_value[offset];
1161 vp = up + 1;
1162
1163 HANDLE_MACROBLOCK(yp1, yp2, up, vp, dp1, dp2);
1164
1165 step++;
1166 if ((step & 0x3) == 0)
1167 {
1168 offset = 1 - offset;
1169 x -= 64 * sizeof (int);
1170 uv_x -= 64 * 32;
1171 }
1172 else
1173 {
1174 x += 64 * sizeof (int);
1175 uv_x += 64 * 32;
1176 }
1177
1178 uv_step++;
1179 if (uv_step == 8)
1180 {
1181 uv_step = 0;
1182 uv_x += 4 * 64 * 32;
1183 }
1184 }
1185 }
1186
1187 if (mb_h & 0x1)
1188 {
1189 int x = 0;
1190 int ry;
1191
1192 ry = mb_y << 1;
1193
1194 uv_step = 0;
1195 uv_x = 0;
1196
1197 for (mb_x = 0; mb_x < mb_w; mb_x++, x++, uv_x++)
1198 {
1199 unsigned char *yp1, *yp2, *up, *vp;
1200 unsigned char *dp1, *dp2;
1201
1202 dp1 = rgb + (x * 64 + (ry * 32 * w)) * sizeof (int);
1203 dp2 = dp1 + sizeof (int) * w;
1204
1205 yp1 = yuv[mb_y] + mb_x * 64 * 32;
1206 yp2 = yp1 + 64;
1207
1208 up = yuv[mb_y / 2 + base_h] + uv_x * 64 * 32;
1209 vp = up + 1;
1210
1211 HANDLE_MACROBLOCK(yp1, yp2, up, vp, dp1, dp2);
1212 }
1213 }
1214#endif
1215}
1216
1217static void
1218_evas_nv12torgb_raster(unsigned char **yuv, unsigned char *rgb, int w, int h)
1219{
1220#ifdef BUILD_C
1221 int xx, yy;
1222 unsigned char *yp1, *yp2, *up, *vp;
1223 unsigned char *dp1;
1224 unsigned char *dp2;
1225
1226 dp1 = rgb;
1227 dp2 = dp1 + sizeof (int) * w;
1228
1229 for (yy = 0; yy < h; yy++)
1230 {
1231 yp1 = yuv[yy++];
1232 yp2 = yuv[yy];
1233
1234 up = yuv[h + (yy >> 1)];
1235 vp = up + 1;
1236
1237 for (xx = 0; xx < w; xx += 2)
1238 {
1239 _evas_yuv2rgb_420_raster(yp1, yp2, up, vp, dp1, dp2);
1240
1241 /* the previous call just rendered 2 pixels per lines */
1242 dp1 += 8; dp2 += 8;
1243
1244 /* and took for that 2 lines with 2 Y, 1 U and 1 V. Don't forget U & V are in the same plane */
1245 yp1 += 2; yp2 += 2; up += 2; vp += 2;
1246 }
1247
1248 /* jump one line */
1249 dp1 += sizeof (int) * w;
1250 dp2 += sizeof (int) * w;
1251 yp1 += w;
1252 yp2 += w;
1253 }
1254#endif
1255}
1256
1257#endif
1258