aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c
diff options
context:
space:
mode:
authorDavid Walter Seikel2012-01-04 18:41:13 +1000
committerDavid Walter Seikel2012-01-04 18:41:13 +1000
commitdd7595a3475407a7fa96a97393bae8c5220e8762 (patch)
treee341e911d7eb911a51684a7412ef7f7c7605d28e /libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c
parentAdd the skeleton. (diff)
downloadSledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.zip
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.gz
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.bz2
SledjHamr-dd7595a3475407a7fa96a97393bae8c5220e8762.tar.xz
Add the base Enlightenment Foundation Libraries - eina, eet, evas, ecore, embryo, and edje.
Note that embryo wont be used, but I'm not sure yet if you can build edje without it.
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c212
1 files changed, 212 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c
new file mode 100644
index 0000000..96310cd
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_color_neon.c
@@ -0,0 +1,212 @@
1/* copy color --> dst */
2
3#ifdef BUILD_NEON
4static void
5_op_copy_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
6#define AP "COPY_C_DP_"
7 uint32_t *e = d + l,*tmp;
8 asm volatile (
9 ".fpu neon \n\t"
10
11 "vdup.i32 q0, %[c] \n\t"
12
13 // Can we do 32 byte?
14 "andS %[tmp], %[d], $0x1f \n\t"
15 "beq "AP"quadstart \n\t"
16
17 // Can we do at least 16 byte?
18 "andS %[tmp], %[d], $0x4 \n\t"
19 "beq "AP"dualstart \n\t"
20
21 // Only once
22 AP"singleloop: \n\t"
23 "vst1.32 d0[0], [%[d]] \n\t"
24 "add %[d], #4 \n\t"
25
26 // Up to 3 times
27 AP"dualstart: \n\t"
28 "sub %[tmp], %[e], %[d] \n\t"
29 "cmp %[tmp], #32 \n\t"
30 "blt "AP"loopout \n\t"
31
32 AP"dualloop: \n\t"
33 "vstr.32 d0, [%[d]] \n\t"
34
35 "add %[d], #8 \n\t"
36 "andS %[tmp], %[d], $0x1f \n\t"
37 "bne "AP"dualloop \n\t"
38
39
40 AP"quadstart: \n\t"
41 "sub %[tmp], %[e], %[d] \n\t"
42 "cmp %[tmp], #32 \n\t"
43 "blt "AP"loopout \n\t"
44
45 "vmov q1, q0 \n\t"
46 "sub %[tmp],%[e],#31 \n\t"
47
48 AP "quadloop: \n\t"
49 "vstm %[d]!, {d0,d1,d2,d3} \n\t"
50
51 "cmp %[tmp], %[d] \n\t"
52 "bhi "AP"quadloop \n\t"
53
54
55 AP "loopout: \n\t"
56 "cmp %[d], %[e] \n\t"
57 "beq "AP"done \n\t"
58 "sub %[tmp],%[e], %[d] \n\t"
59 "cmp %[tmp],$0x04 \n\t"
60 "beq "AP"singleloop2 \n\t"
61
62 AP "dualloop2: \n\t"
63 "sub %[tmp],%[e],#7 \n\t"
64 AP "dualloop2int: \n\t"
65 "vstr.64 d0, [%[d]] \n\t"
66
67 "add %[d], #8 \n\t"
68 "cmp %[tmp], %[d] \n\t"
69 "bhi "AP"dualloop2int \n\t"
70
71 // Single ??
72 "cmp %[e], %[d] \n\t"
73 "beq "AP"done \n\t"
74
75 AP "singleloop2: \n\t"
76 "vst1.32 d0[0], [%[d]] \n\t"
77
78 AP "done:\n\t"
79
80 : // No output regs
81 // Input
82 : [c] "r" (c), [e] "r" (e), [d] "r" (d),[tmp] "r" (tmp)
83 // Clobbered
84 : "q0","q1","memory"
85
86
87 );
88}
89
90#define _op_copy_cn_dp_neon _op_copy_c_dp_neon
91#define _op_copy_can_dp_neon _op_copy_c_dp_neon
92#define _op_copy_caa_dp_neon _op_copy_c_dp_neon
93
94#define _op_copy_cn_dpan_neon _op_copy_c_dp_neon
95#define _op_copy_c_dpan_neon _op_copy_c_dp_neon
96#define _op_copy_can_dpan_neon _op_copy_c_dp_neon
97#define _op_copy_caa_dpan_neon _op_copy_c_dp_neon
98
99static void
100init_copy_color_span_funcs_neon(void)
101{
102 op_copy_span_funcs[SP_N][SM_N][SC_N][DP][CPU_NEON] = _op_copy_cn_dp_neon;
103 op_copy_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_copy_c_dp_neon;
104 op_copy_span_funcs[SP_N][SM_N][SC_AN][DP][CPU_NEON] = _op_copy_can_dp_neon;
105 op_copy_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_copy_caa_dp_neon;
106
107 op_copy_span_funcs[SP_N][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_cn_dpan_neon;
108 op_copy_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_copy_c_dpan_neon;
109 op_copy_span_funcs[SP_N][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_copy_can_dpan_neon;
110 op_copy_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_copy_caa_dpan_neon;
111}
112#endif
113
114#ifdef BUILD_NEON
115static void
116_op_copy_pt_c_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
117 *d = c;
118}
119
120#define _op_copy_pt_cn_dp_neon _op_copy_pt_c_dp_neon
121#define _op_copy_pt_can_dp_neon _op_copy_pt_c_dp_neon
122#define _op_copy_pt_caa_dp_neon _op_copy_pt_c_dp_neon
123
124#define _op_copy_pt_cn_dpan_neon _op_copy_pt_c_dp_neon
125#define _op_copy_pt_c_dpan_neon _op_copy_pt_c_dp_neon
126#define _op_copy_pt_can_dpan_neon _op_copy_pt_c_dp_neon
127#define _op_copy_pt_caa_dpan_neon _op_copy_pt_c_dp_neon
128
129static void
130init_copy_color_pt_funcs_neon(void)
131{
132 op_copy_pt_funcs[SP_N][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_cn_dp_neon;
133 op_copy_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_copy_pt_c_dp_neon;
134 op_copy_pt_funcs[SP_N][SM_N][SC_AN][DP][CPU_NEON] = _op_copy_pt_can_dp_neon;
135 op_copy_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_copy_pt_caa_dp_neon;
136
137 op_copy_pt_funcs[SP_N][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_cn_dpan_neon;
138 op_copy_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_copy_pt_c_dpan_neon;
139 op_copy_pt_funcs[SP_N][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_copy_pt_can_dpan_neon;
140 op_copy_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_copy_pt_caa_dpan_neon;
141}
142#endif
143
144/*-----*/
145
146/* copy_rel color --> dst */
147
148#ifdef BUILD_NEON
149static void
150_op_copy_rel_c_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
151 // FIXME: neon-it
152 DATA32 *e = d + l;
153 for (; d < e; d++) {
154 *d = MUL_SYM(*d >> 24, c);
155 }
156}
157
158#define _op_copy_rel_cn_dp_neon _op_copy_rel_c_dp_neon
159#define _op_copy_rel_can_dp_neon _op_copy_rel_c_dp_neon
160#define _op_copy_rel_caa_dp_neon _op_copy_rel_c_dp_neon
161
162#define _op_copy_rel_cn_dpan_neon _op_copy_cn_dpan_neon
163#define _op_copy_rel_c_dpan_neon _op_copy_c_dpan_neon
164#define _op_copy_rel_can_dpan_neon _op_copy_can_dpan_neon
165#define _op_copy_rel_caa_dpan_neon _op_copy_caa_dpan_neon
166
167static void
168init_copy_rel_color_span_funcs_neon(void)
169{
170 op_copy_rel_span_funcs[SP_N][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_cn_dp_neon;
171 op_copy_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_copy_rel_c_dp_neon;
172 op_copy_rel_span_funcs[SP_N][SM_N][SC_AN][DP][CPU_NEON] = _op_copy_rel_can_dp_neon;
173 op_copy_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_copy_rel_caa_dp_neon;
174
175 op_copy_rel_span_funcs[SP_N][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_cn_dpan_neon;
176 op_copy_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_copy_rel_c_dpan_neon;
177 op_copy_rel_span_funcs[SP_N][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_copy_rel_can_dpan_neon;
178 op_copy_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_copy_rel_caa_dpan_neon;
179}
180#endif
181
182#ifdef BUILD_NEON
183static void
184_op_copy_rel_pt_c_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
185 s = 1 + (*d >> 24);
186 *d = MUL_256(s, c);
187}
188
189
190#define _op_copy_rel_pt_cn_dp_neon _op_copy_rel_pt_c_dp_neon
191#define _op_copy_rel_pt_can_dp_neon _op_copy_rel_pt_c_dp_neon
192#define _op_copy_rel_pt_caa_dp_neon _op_copy_rel_pt_c_dp_neon
193
194#define _op_copy_rel_pt_cn_dpan_neon _op_copy_pt_cn_dpan_neon
195#define _op_copy_rel_pt_c_dpan_neon _op_copy_pt_c_dpan_neon
196#define _op_copy_rel_pt_can_dpan_neon _op_copy_pt_can_dpan_neon
197#define _op_copy_rel_pt_caa_dpan_neon _op_copy_pt_caa_dpan_neon
198
199static void
200init_copy_rel_color_pt_funcs_neon(void)
201{
202 op_copy_rel_pt_funcs[SP_N][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_cn_dp_neon;
203 op_copy_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_copy_rel_pt_c_dp_neon;
204 op_copy_rel_pt_funcs[SP_N][SM_N][SC_AN][DP][CPU_NEON] = _op_copy_rel_pt_can_dp_neon;
205 op_copy_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_copy_rel_pt_caa_dp_neon;
206
207 op_copy_rel_pt_funcs[SP_N][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_cn_dpan_neon;
208 op_copy_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_copy_rel_pt_c_dpan_neon;
209 op_copy_rel_pt_funcs[SP_N][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_copy_rel_pt_can_dpan_neon;
210 op_copy_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_copy_rel_pt_caa_dpan_neon;
211}
212#endif