aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c158
1 files changed, 158 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c b/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c
new file mode 100644
index 0000000..5b8bd60
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c
@@ -0,0 +1,158 @@
1/* copy pixel --> dst */
2
3#ifdef BUILD_NEON
4static void
5_op_copy_p_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
6//#define USENEON 1
7#ifndef USENEON
8 memcpy(d, s, l * sizeof(DATA32));
9 return;
10#else
11 DATA32 *e;
12 e = d + l - 23;
13 if (e > d)
14 {
15 int dl;
16
17 asm volatile
18 (".fpu neon \n\t"
19 "_op_copy_p_dp_neon_asmloop: \n\t"
20 "pld [%[s], #192] \n\t" // preload 256 bytes ahead
21 "pld [%[s], #320] \n\t" // preload 320 bytes ahead
22 "vld1.32 {d0-d3}, [%[s]]! \n\t" // load 256bits (32 bytes 8 pix), 32bit aligned
23 "vld1.32 {d4-d7} , [%[s]]! \n\t" // load 256bits (32 bytes 8 pix), 32bit aligned
24 "vld1.32 {d8-d11}, [%[s]]! \n\t" // load 256bits (32 bytes 8 pix), 32bit aligned
25 "vst1.32 {d0-d3}, [%[d]]! \n\t" // store 256bits (32 bytes 8 pix), 32bit aligned
26 "vst1.32 {d4-d7}, [%[d]]! \n\t" // store 256bits (32 bytes 8 pix), 32bit aligned
27 "vst1.32 {d8-d11}, [%[d]]! \n\t" // store 256bits (32 bytes 8 pix), 32bit aligned
28 "cmp %[e], %[d] \n\t" // compare current and end ptr
29 "bgt _op_copy_p_dp_neon_asmloop \n\t"
30 : /*out*/
31 : /*in */ [s] "r" (s), [e] "r" (e), [d] "r" (d)
32 : /*clobber*/
33 "q0", "q1", "q2","q3", "q4", "q5", "q6",
34 "d0", "d1", "d2", "d3",
35 "d4", "d5", "d6", "d7",
36 "d8", "d9", "d10", "d11",
37 "memory" // clobbered
38 );
39 dl = l % 24; // dl is how many pixels at end that is not a multiple of 24
40 l = l - dl; // jump to there at the end of the run?
41 s = s + l;
42 d = d + l;
43 }
44 e += 23;
45 for (;d < e; d++, s++) *d = *s;
46#endif
47}
48
49#define _op_copy_pan_dp_neon _op_copy_p_dp_neon
50#define _op_copy_pas_dp_neon _op_copy_p_dp_neon
51
52#define _op_copy_p_dpan_neon _op_copy_p_dp_neon
53#define _op_copy_pan_dpan_neon _op_copy_pan_dp_neon
54#define _op_copy_pas_dpan_neon _op_copy_pas_dp_neon
55
56static void
57init_copy_pixel_span_funcs_neon(void)
58{
59 op_copy_span_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_p_dp_neon;
60 op_copy_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pan_dp_neon;
61 op_copy_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pas_dp_neon;
62
63 op_copy_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_p_dpan_neon;
64 op_copy_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pan_dpan_neon;
65 op_copy_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pas_dpan_neon;
66}
67#endif
68
69#ifdef BUILD_NEON
70static void
71_op_copy_pt_p_dp_neon(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) {
72 *d = s;
73}
74
75#define _op_copy_pt_pan_dp_neon _op_copy_pt_p_dp_neon
76#define _op_copy_pt_pas_dp_neon _op_copy_pt_p_dp_neon
77
78#define _op_copy_pt_p_dpan_neon _op_copy_pt_p_dp_neon
79#define _op_copy_pt_pan_dpan_neon _op_copy_pt_pan_dp_neon
80#define _op_copy_pt_pas_dpan_neon _op_copy_pt_pas_dp_neon
81
82static void
83init_copy_pixel_pt_funcs_neon(void)
84{
85 op_copy_pt_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_p_dp_neon;
86 op_copy_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_pan_dp_neon;
87 op_copy_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_pas_dp_neon;
88
89 op_copy_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_p_dpan_neon;
90 op_copy_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_pan_dpan_neon;
91 op_copy_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_pas_dpan_neon;
92}
93#endif
94
95/*-----*/
96
97/* copy_rel pixel --> dst */
98
99#ifdef BUILD_NEON
100static void
101_op_copy_rel_p_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) {
102 // FIXME: neon-it
103 DATA32 *e;
104 UNROLL8_PLD_WHILE(d, l, e,
105 {
106 *d = MUL_SYM(*d >> 24, c);
107 d++;
108 });
109}
110
111
112#define _op_copy_rel_pas_dp_neon _op_copy_rel_p_dp_neon
113#define _op_copy_rel_pan_dp_neon _op_copy_rel_p_dp_neon
114
115#define _op_copy_rel_p_dpan_neon _op_copy_p_dpan_neon
116#define _op_copy_rel_pan_dpan_neon _op_copy_pan_dpan_neon
117#define _op_copy_rel_pas_dpan_neon _op_copy_pas_dpan_neon
118
119static void
120init_copy_rel_pixel_span_funcs_neon(void)
121{
122 op_copy_rel_span_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_p_dp_neon;
123 op_copy_rel_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pan_dp_neon;
124 op_copy_rel_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pas_dp_neon;
125
126 op_copy_rel_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_p_dpan_neon;
127 op_copy_rel_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pan_dpan_neon;
128 op_copy_rel_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pas_dpan_neon;
129}
130#endif
131
132#ifdef BUILD_NEON
133static void
134_op_copy_rel_pt_p_dp_neon(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
135 s = 1 + (*d >> 24);
136 *d = MUL_256(s, c);
137}
138
139
140#define _op_copy_rel_pt_pan_dp_neon _op_copy_rel_pt_p_dp_neon
141#define _op_copy_rel_pt_pas_dp_neon _op_copy_rel_pt_p_dp_neon
142
143#define _op_copy_rel_pt_p_dpan_neon _op_copy_pt_p_dpan_neon
144#define _op_copy_rel_pt_pan_dpan_neon _op_copy_pt_pan_dpan_neon
145#define _op_copy_rel_pt_pas_dpan_neon _op_copy_pt_pas_dpan_neon
146
147static void
148init_copy_rel_pixel_pt_funcs_neon(void)
149{
150 op_copy_rel_pt_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_p_dp_neon;
151 op_copy_rel_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_pan_dp_neon;
152 op_copy_rel_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_pas_dp_neon;
153
154 op_copy_rel_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_p_dpan_neon;
155 op_copy_rel_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_pan_dpan_neon;
156 op_copy_rel_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_pas_dpan_neon;
157}
158#endif