aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_blit_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_blit_main.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_blit_main.c700
1 files changed, 700 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_blit_main.c b/libraries/evas/src/lib/engines/common/evas_blit_main.c
new file mode 100644
index 0000000..4c077b3
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_blit_main.c
@@ -0,0 +1,700 @@
1#include "evas_common.h"
2
3#if defined BUILD_MMX || defined BUILD_SSE
4#include "evas_mmx.h"
5#endif
6
7#define ALIGN_FIX
8
9static void evas_common_copy_pixels_c (DATA32 *src, DATA32 *dst, int len);
10static void evas_common_copy_pixels_mmx (DATA32 *src, DATA32 *dst, int len);
11static void evas_common_copy_pixels_mmx2 (DATA32 *src, DATA32 *dst, int len);
12static void evas_common_copy_pixels_sse/*NB*/ (DATA32 *src, DATA32 *dst, int len);
13
14#ifdef BUILD_NEON
15static void evas_common_copy_pixels_neon (DATA32 *src, DATA32 *dst, int len);
16static void evas_common_copy_pixels_rev_neon (DATA32 *src, DATA32 *dst, int len);
17#endif
18
19static void evas_common_copy_pixels_rev_c (DATA32 *src, DATA32 *dst, int len);
20static void evas_common_copy_pixels_rev_mmx (DATA32 *src, DATA32 *dst, int len);
21static void evas_common_copy_pixels_rev_sse/*NB*/ (DATA32 *src, DATA32 *dst, int len);
22
23static void evas_common_copy_rev_pixels_c (DATA32 *src, DATA32 *dst, int len);
24
25
26EAPI void
27evas_common_blit_init(void)
28{
29}
30
31EAPI void
32evas_common_blit_rectangle(const RGBA_Image *src, RGBA_Image *dst, int src_x, int src_y, int w, int h, int dst_x, int dst_y)
33{
34 int y;
35 Gfx_Func_Copy func;
36 DATA32 *src_ptr, *dst_ptr;
37
38 /* clip clip clip */
39 if (w <= 0) return;
40 if (src_x + w > (int)src->cache_entry.w) w = src->cache_entry.w - src_x;
41 if (w <= 0) return;
42 if (src_x < 0)
43 {
44 dst_x -= src_x;
45 w += src_x;
46 src_x = 0;
47 }
48 if (w <= 0) return;
49
50 if (h <= 0) return;
51 if (src_y + h > (int)src->cache_entry.h) h = src->cache_entry.h - src_y;
52 if (h <= 0) return;
53 if (src_y < 0)
54 {
55 dst_y -= src_y;
56 h += src_y;
57 src_y = 0;
58 }
59 if (h <= 0) return;
60
61 if (w <= 0) return;
62 if (dst_x + w > (int)dst->cache_entry.w) w = dst->cache_entry.w - dst_x;
63 if (w <= 0) return;
64 if (dst_x < 0)
65 {
66 src_x -= dst_x;
67 w += dst_x;
68 dst_x = 0;
69 }
70 if (w <= 0) return;
71
72 if (h <= 0) return;
73 if (dst_y + h > (int)dst->cache_entry.h) h = dst->cache_entry.h - dst_y;
74 if (h <= 0) return;
75 if (dst_y < 0)
76 {
77 src_y -= dst_y;
78 h += dst_y;
79 dst_y = 0;
80 }
81 if (h <= 0) return;
82
83 if (dst == src)
84 {
85 /* src after dst - go forward */
86 if (((src_y * src->cache_entry.w) + src_x) > ((dst_y * dst->cache_entry.w) + dst_x))
87 {
88 func = evas_common_draw_func_copy_get(w, 0);
89 for (y = 0; y < h; y++)
90 {
91 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
92 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
93 func(src_ptr, dst_ptr, w);
94 }
95 }
96 /* reverse */
97 else
98 {
99 func = evas_common_draw_func_copy_get(w, 1);
100 for (y = h - 1; y >= 0; y--)
101 {
102 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
103 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
104 func(src_ptr, dst_ptr, w);
105 }
106 }
107 }
108 else
109 {
110 func = evas_common_draw_func_copy_get(w, 0);
111 for (y = 0; y < h; y++)
112 {
113 src_ptr = src->image.data + ((y + src_y) * src->cache_entry.w) + src_x;
114 dst_ptr = dst->image.data + ((y + dst_y) * dst->cache_entry.w) + dst_x;
115 func(src_ptr, dst_ptr, w);
116 }
117 }
118}
119
120/****************************************************************************/
121
122static void
123evas_common_copy_rev_pixels_c(DATA32 *src, DATA32 *dst, int len)
124{
125 DATA32 *dst_end = dst + len;
126
127 src += len - 1;
128 while (dst < dst_end) *dst++ = *src--;
129}
130
131
132#ifdef BUILD_NEON
133static void
134evas_common_copy_pixels_rev_neon(DATA32 *src, DATA32 *dst, int len)
135{
136 uint32_t *tmp = (void *)37;
137#define AP "evas_common_copy_rev_pixels_neon_"
138 asm volatile (
139 ".fpu neon \n\t"
140 // Can we do 32 byte?
141 "andS %[tmp], %[d], $0x1f \n\t"
142 "beq "AP"quadstart \n\t"
143
144 // Can we do at least 16 byte?
145 "andS %[tmp], %[d], $0x4 \n\t"
146 "beq "AP"dualstart \n\t"
147
148 // Only once
149 AP"singleloop: \n\t"
150 "sub %[s], #4 \n\t"
151 "vld1.32 d0[0], [%[s]] \n\t"
152 "vst1.32 d0[0], [%[d]]! \n\t"
153
154 // Up to 3 times
155 AP"dualstart: \n\t"
156 "sub %[tmp], %[e], %[d] \n\t"
157 "cmp %[tmp], #31 \n\t"
158 "blt "AP"loopout \n\t"
159
160 "andS %[tmp], %[d], $0x1f \n\t"
161 "beq "AP"quadstart \n\t"
162
163 AP"dualloop: \n\t"
164 "sub %[s], #8 \n\t"
165 "vldm %[s], {d0} \n\t"
166 "vrev64.32 d1, d0 \n\t"
167 "vstm %[d]!, {d1} \n\t"
168
169 "andS %[tmp], %[d], $0x1f \n\t"
170 "bne "AP"dualloop \n\t"
171
172
173 AP"quadstart: \n\t"
174 "sub %[tmp], %[e], %[d] \n\t"
175 "cmp %[tmp], #32 \n\t"
176 "blt "AP"loopout \n\t"
177
178 "sub %[tmp],%[e],#32 \n\t"
179
180 AP "quadloop: \n\t"
181 "sub %[s], #32 \n\t"
182 "vldm %[s], {d0,d1,d2,d3} \n\t"
183
184 "vrev64.32 d7,d0 \n\t"
185 "vrev64.32 d6,d1 \n\t"
186 "vrev64.32 d5,d2 \n\t"
187 "vrev64.32 d4,d3 \n\t"
188
189 "vstm %[d]!, {d4,d5,d6,d7} \n\t"
190
191 "cmp %[tmp], %[d] \n\t"
192 "bhi "AP"quadloop \n\t"
193
194
195 AP "loopout: \n\t"
196 "cmp %[d], %[e] \n\t"
197 "beq "AP"done \n\t"
198 "sub %[tmp],%[e], %[d] \n\t"
199 "cmp %[tmp],$0x04 \n\t"
200 "beq "AP"singleloop2 \n\t"
201
202 AP "dualloop2: \n\t"
203 "sub %[tmp],%[e],$0x7 \n\t"
204 AP "dualloop2int: \n\t"
205 "sub %[s], #8 \n\t"
206 "vldm %[s], {d0} \n\t"
207 "vrev64.32 d1,d0 \n\t"
208 "vstm %[d]!, {d1} \n\t"
209
210 "cmp %[tmp], %[d] \n\t"
211 "bhi "AP"dualloop2int \n\t"
212
213 // Single ??
214 "cmp %[e], %[d] \n\t"
215 "beq "AP"done \n\t"
216
217 AP "singleloop2: \n\t"
218 "sub %[s], #4 \n\t"
219 "vld1.32 d0[0], [%[s]] \n\t"
220 "vst1.32 d0[0], [%[d]] \n\t"
221
222 AP "done:\n\t"
223
224 : // No output regs
225 // Input
226 : [s] "r" (src + len), [e] "r" (dst + len), [d] "r" (dst),[tmp] "r" (tmp)
227 // Clobbered
228 : "q0","q1","q2","q3","0","1","memory"
229 );
230#undef AP
231
232}
233#endif
234
235
236#ifdef BUILD_C
237static void
238evas_common_copy_pixels_c(DATA32 *src, DATA32 *dst, int len)
239{
240 DATA32 *dst_end = dst + len;
241
242 while (dst < dst_end) *dst++ = *src++;
243}
244#endif
245
246#ifdef BUILD_MMX
247static void
248evas_common_copy_pixels_mmx(DATA32 *src, DATA32 *dst, int len)
249{ // XXX cppcheck: [./src/lib/engines/common/evas_blit_main.c:248]: (error) Invalid number of character ({). Can't process file.
250 // so... wtf? what's wrong with this { ? or anytrhing surrounding it?
251 DATA32 *dst_end, *dst_end_pre;
252#ifdef ALIGN_FIX
253 intptr_t src_align;
254 intptr_t dst_align;
255
256 src_align = (intptr_t)src & 0x3f; /* 64 byte alignment */
257 dst_align = (intptr_t)dst & 0x3f; /* 64 byte alignment */
258
259 if ((src_align != dst_align) ||
260 ((src_align & 0x3) != 0))
261 {
262#ifdef BUILD_C
263 evas_common_copy_pixels_c(src, dst, len);
264 return;
265 }
266#endif
267
268 while ((src_align > 0) && (len > 0))
269 {
270 *dst++ = *src++;
271 len--;
272 src_align -= sizeof(DATA32);
273 }
274#endif /* ALIGN_FIX */
275
276 dst_end = dst + len;
277 dst_end_pre = dst + ((len / 16) * 16);
278
279 while (dst < dst_end_pre)
280 {
281 MOVE_16DWORDS_MMX(src, dst);
282 src += 16;
283 dst += 16;
284 }
285 while (dst < dst_end) *dst++ = *src++;
286}
287#endif
288
289#ifdef BUILD_MMX
290static void
291evas_common_copy_pixels_mmx2(DATA32 *src, DATA32 *dst, int len)
292{
293 DATA32 *dst_end, *dst_end_pre;
294#ifdef ALIGN_FIX
295 intptr_t src_align;
296 intptr_t dst_align;
297
298 src_align = (intptr_t)src & 0x3f; /* 64 byte alignment */
299 dst_align = (intptr_t)dst & 0x3f; /* 64 byte alignment */
300
301 if ((src_align != dst_align) ||
302 ((src_align & 0x3) != 0))
303 {
304#ifdef BUILD_C
305 evas_common_copy_pixels_c(src, dst, len);
306#endif
307 return;
308 }
309
310 while ((src_align > 0) && (len > 0))
311 {
312 *dst++ = *src++;
313 len--;
314 src_align -= sizeof(DATA32);
315 }
316#endif
317
318 dst_end = dst + len;
319 dst_end_pre = dst + ((len / 16) * 16);
320
321 while (dst < dst_end_pre)
322 {
323 MOVE_16DWORDS_MMX(src, dst);
324 src += 16;
325 dst += 16;
326 }
327 while (dst < dst_end) *dst++ = *src++;
328}
329#endif
330
331#ifdef BUILD_NEON
332static void
333evas_common_copy_pixels_neon(DATA32 *src, DATA32 *dst, int len){
334 uint32_t *e,*tmp = (void *)37;
335 e = dst + len;
336#define AP "evas_common_copy_pixels_neon_"
337 asm volatile (
338 ".fpu neon \n\t"
339 // Can we do 32 byte?
340 "andS %[tmp], %[d], $0x1f \n\t"
341 "beq "AP"quadstart \n\t"
342
343 // Can we do at least 16 byte?
344 "andS %[tmp], %[d], $0x4 \n\t"
345 "beq "AP"dualstart \n\t"
346
347 // Only once
348 AP"singleloop: \n\t"
349 "vld1.32 d0[0], [%[s]]! \n\t"
350 "vst1.32 d0[0], [%[d]]! \n\t"
351
352 // Up to 3 times
353 AP"dualstart: \n\t"
354 "sub %[tmp], %[e], %[d] \n\t"
355 "cmp %[tmp], #31 \n\t"
356 "blt "AP"loopout \n\t"
357
358 "andS %[tmp], %[d], $0x1f \n\t"
359 "beq "AP"quadstart \n\t"
360
361 AP"dualloop: \n\t"
362 "vldm %[s]!, {d0} \n\t"
363 "vstm %[d]!, {d0} \n\t"
364
365 "andS %[tmp], %[d], $0x1f \n\t"
366 "bne "AP"dualloop \n\t"
367
368
369 AP"quadstart: \n\t"
370 "sub %[tmp], %[e], %[d] \n\t"
371 "cmp %[tmp], #64 \n\t"
372 "blt "AP"loopout \n\t"
373
374 "sub %[tmp],%[e],#63 \n\t"
375
376 AP "quadloop: \n\t"
377 "vldm %[s]!, {d0,d1,d2,d3} \n\t"
378 "vldm %[s]!, {d4,d5,d6,d7} \n\t"
379 "vstm %[d]!, {d0,d1,d2,d3} \n\t"
380 "vstm %[d]!, {d4,d5,d6,d7} \n\t"
381
382 "cmp %[tmp], %[d] \n\t"
383 "bhi "AP"quadloop \n\t"
384
385
386 AP "loopout: \n\t"
387 "cmp %[d], %[e] \n\t"
388 "beq "AP"done \n\t"
389 "sub %[tmp],%[e], %[d] \n\t"
390 "cmp %[tmp],$0x04 \n\t"
391 "beq "AP"singleloop2 \n\t"
392
393 AP "dualloop2: \n\t"
394 "sub %[tmp],%[e],$0x7 \n\t"
395 AP "dualloop2int: \n\t"
396 "vldm %[s]!, {d0} \n\t"
397 "vstm %[d]!, {d0} \n\t"
398
399 "cmp %[tmp], %[d] \n\t"
400 "bhi "AP"dualloop2int \n\t"
401
402 // Single ??
403 "cmp %[e], %[d] \n\t"
404 "beq "AP"done \n\t"
405
406 AP "singleloop2: \n\t"
407 "vld1.32 d0[0], [%[s]] \n\t"
408 "vst1.32 d0[0], [%[d]] \n\t"
409
410 AP "done:\n\t"
411
412 : // No output regs
413 // Input
414 : [s] "r" (src), [e] "r" (e), [d] "r" (dst),[tmp] "r" (tmp)
415 // Clobbered
416 : "q0","q1","q2","q3","memory"
417 );
418#undef AP
419
420}
421#endif /* BUILD_NEON */
422
423#ifdef BUILD_SSE
424static void
425evas_common_copy_pixels_sse(DATA32 *src, DATA32 *dst, int len)
426{
427 DATA32 *src_ptr, *dst_ptr, *dst_end_ptr;
428
429 dst_end_ptr = dst + len;
430 dst_end_ptr -= 15;
431 src_ptr = src;
432 dst_ptr = dst;
433 while (dst_ptr < dst_end_ptr)
434 {
435 MOVE_16DWORDS_MMX2(src_ptr, dst_ptr);
436 src_ptr+=16;
437 dst_ptr+=16;
438 }
439 dst_end_ptr = dst + len;
440 while (dst_ptr < dst_end_ptr)
441 {
442 *dst_ptr = *src_ptr;
443 src_ptr++;
444 dst_ptr++;
445 }
446#if 0
447#ifdef ALIGN_FIX
448 int src_align;
449 int dst_align;
450
451 src_align = (int)src & 0x3f; /* 64 byte alignment */
452 dst_align = (int)dst & 0x3f; /* 64 byte alignment */
453
454 if ((src_align != dst_align) ||
455 ((src_align & 0x3) != 0))
456 {
457#ifdef BUILD_C
458 evas_common_copy_pixels_c(src, dst, len);
459#endif
460 return;
461 }
462
463 while ((src_align > 0) && (len > 0))
464 {
465 *dst = *src;
466 dst++;
467 src++;
468 len--;
469 src_align -= sizeof(DATA32);
470 }
471#endif /* ALIGN_FIX */
472
473 src_ptr = src;
474 dst_ptr = dst;
475 dst_end_ptr = dst + len;
476 dst_end_ptr_pre = dst + ((len / 16) * 16);
477
478 while (dst_ptr < dst_end_ptr_pre)
479 {
480 prefetch(&src_ptr[16]);
481 MOVE_16DWORDS_MMX(src_ptr, dst_ptr);
482 src_ptr+=16;
483 dst_ptr+=16;
484 }
485 while (dst_ptr < dst_end_ptr)
486 {
487 *dst_ptr = *src_ptr;
488 src_ptr++;
489 dst_ptr++;
490 }
491#endif
492}
493#endif
494
495/****************************************************************************/
496
497#ifdef BUILD_C
498static void
499evas_common_copy_pixels_rev_c(DATA32 *src, DATA32 *dst, int len)
500{
501 DATA32 *dst_end;
502
503 src = src + len - 1;
504 dst_end = dst - 1;
505 dst = dst + len - 1;
506
507 while (dst > dst_end) *dst-- = *src--;
508}
509#endif
510
511#ifdef BUILD_MMX
512static void
513evas_common_copy_pixels_rev_mmx(DATA32 *src, DATA32 *dst, int len)
514{
515 DATA32 *dst_end, *dst_end_pre;
516
517 if (len >= 16)
518 {
519 src = src + len - 16;
520 dst_end = dst;
521 dst_end_pre = dst + len - ((len / 16) * 16);
522 dst = dst + len - 16;
523
524 while (dst >= dst_end_pre)
525 {
526 MOVE_16DWORDS_MMX(src, dst);
527 src -= 16;
528 dst -= 16;
529 }
530 src += 15;
531 dst += 15;
532 while (dst >= dst_end)
533 *dst-- = *src--;
534 }
535 else
536 {
537 src = src + len - 1;
538 dst_end = dst - 1;
539 dst = dst + len - 1;
540 while (dst > dst_end)
541 *dst-- = *src--;
542 }
543}
544#endif
545
546#ifdef BUILD_SSE
547static void
548evas_common_copy_pixels_rev_sse(DATA32 *src, DATA32 *dst, int len)
549{
550 DATA32 *src_ptr, *dst_ptr, *dst_end_ptr, *dst_end_ptr_pre;
551
552 src_ptr = src + len - 16;
553 dst_ptr = dst + len - 16;
554 dst_end_ptr = dst;
555 dst_end_ptr_pre = dst + len - ((len / 16) * 16);
556
557 if (len >= 16)
558 {
559 while (dst_ptr >= dst_end_ptr_pre)
560 {
561 prefetch(&src_ptr[-16]);
562 MOVE_10DWORDS_MMX(src_ptr, dst_ptr);
563 src_ptr -= 16;
564 dst_ptr -= 16;
565 }
566 src_ptr += 15;
567 dst_ptr += 15;
568 while (dst_ptr >= dst_end_ptr)
569 {
570 *dst_ptr = *src_ptr;
571 src_ptr--;
572 dst_ptr--;
573 }
574 }
575 else
576 {
577 src_ptr = src + len - 1;
578 dst_ptr = dst + len - 1;
579 while (dst_ptr >= dst_end_ptr)
580 {
581 *dst_ptr = *src_ptr;
582 src_ptr--;
583 dst_ptr--;
584 }
585 }
586}
587#endif
588
589
590Gfx_Func_Copy
591evas_common_draw_func_copy_get(int pixels, int reverse)
592{
593 if (reverse == -1)
594 return evas_common_copy_rev_pixels_c;
595 if (reverse)
596 {
597#ifdef BUILD_SSE
598 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 256 * 256))
599 return evas_common_copy_pixels_rev_sse;
600#endif
601#ifdef BUILD_MMX
602# ifdef BUILD_SSE
603 else
604# endif
605 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
606 return evas_common_copy_pixels_rev_mmx;
607#endif
608#ifdef BUILD_NEON
609# if defined(BUILD_SSE) || defined(BUILD_MMX)
610 else
611# endif
612 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
613 return evas_common_copy_pixels_rev_neon;
614#endif
615
616#ifdef BUILD_C
617# if defined(BUILD_MMX) || defined(BUILD_NEON)
618 else
619# endif
620 return evas_common_copy_pixels_rev_c;
621#endif
622 }
623 else
624 {
625#if 1
626
627# ifdef BUILD_MMX
628# ifdef BUILD_C
629 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
630# endif
631 return evas_common_copy_pixels_mmx2;
632# ifdef BUILD_SSE
633 else
634# endif
635#endif
636#ifdef BUILD_SSE
637# ifdef BUILD_C
638 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 64 * 64))
639# endif
640 return evas_common_copy_pixels_sse;
641# ifdef BUILD_MMX
642 else
643# endif
644#endif
645# ifdef BUILD_NEON
646# ifdef BUILD_C
647 if (evas_common_cpu_has_feature(CPU_FEATURE_NEON))
648# endif
649 return evas_common_copy_pixels_neon;
650# ifdef BUILD_SSE
651 else
652# endif
653#endif
654#ifdef BUILD_MMX
655# ifdef BUILD_C
656 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
657# endif
658 return evas_common_copy_pixels_mmx;
659# ifdef BUILD_C
660 else
661# endif
662#endif
663#ifdef BUILD_C
664 return evas_common_copy_pixels_c;
665#endif
666
667#else
668
669# ifdef BUILD_SSE
670 if (evas_common_cpu_has_feature(CPU_FEATURE_SSE) && (pixels > 256 * 256))
671 return evas_common_copy_pixels_sse;
672# ifdef BUILD_MMX
673 else
674# endif
675#endif
676#ifdef BUILD_MMX
677# ifdef BUILD_C
678 if (evas_common_cpu_has_feature(CPU_FEATURE_MMX2))
679# endif
680 return evas_common_copy_pixels_mmx2;
681# ifdef BUILD_C
682 else if (evas_common_cpu_has_feature(CPU_FEATURE_MMX))
683# endif
684 return evas_common_copy_pixels_mmx;
685# ifdef BUILD_C
686 else
687# endif
688#endif
689#ifdef BUILD_C
690 return evas_common_copy_pixels_c;
691#endif
692
693#endif
694 }
695#ifdef BUILD_C
696 return evas_common_copy_pixels_c;
697#else
698 return NULL;
699#endif
700}