aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c223
1 files changed, 223 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c
new file mode 100644
index 0000000..53b9991
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c
@@ -0,0 +1,223 @@
1/* blend color --> dst */
2
3#ifdef BUILD_NEON
4static void
5_op_blend_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
6 DATA32 *e, *tmp = 0;
7#define AP "B_C_DP"
8 asm volatile (
9 ".fpu neon \n\t"
10 "vdup.u32 q6, %[c] \n\t"
11 "vmov.i8 q5, #1 \n\t"
12 "vmvn.u8 q7,q6 \n\t"
13 "vshr.u32 q7, q7, $0x18 \n\t"
14 "vmul.u32 q7,q5, q7 \n\t"
15 "bic %[e], #3 \n\t"
16 "bic %[d], #3 \n\t"
17
18 AP "loopchoose: \n\t"
19 // If aligned already - straight to quads
20 "andS %[tmp], %[d],$0x1f \n\t"
21 "beq "AP"quadloops \n\t"
22
23 "andS %[tmp], %[d],$0x4 \n\t"
24 "beq "AP"dualloop \n\t"
25
26 // Only ever executes once, fall through to dual
27 AP "singleloop: \n\t"
28 // Use 'tmp' not 'd'
29 "vld1.32 d0[0], [%[d]] \n\t"
30 // Only touch d1
31 "vmull.u8 q0, d0, d14 \n\t"
32 "vqrshrn.u16 d0, q0, #8 \n\t"
33 "vadd.u8 d0, d12, d0 \n\t"
34 "vst1.32 d0[0], [%[d]] \n\t"
35
36 "add %[d], #4 \n\t"
37
38 // Can we go the fast path?
39 "andS %[tmp], %[d],$0x1f \n\t"
40 "beq "AP"quadloops \n\t"
41
42 AP "dualloop: \n\t"
43 "sub %[tmp], %[e], %[d] \n\t"
44 "cmp %[tmp], #32 \n\t"
45 "blt "AP"loopout \n\t"
46
47
48 AP "dualloopint: \n\t"
49 "vldr.32 d0, [%[d]] \n\t"
50 "vmull.u8 q1, d0, d14 \n\t"
51 "vqrshrn.u16 d0, q1, #8 \n\t"
52 "vqadd.u8 d0, d0, d12 \n\t"
53
54 "vstm %[d]!, {d0} \n\t"
55
56 "ands %[tmp], %[d], $0x1f \n\t"
57 "bne "AP"dualloopint \n\t"
58
59 AP "quadloops: \n\t"
60 "sub %[tmp], %[e], %[d] \n\t"
61 "cmp %[tmp], #32 \n\t"
62 "blt "AP"loopout \n\t"
63
64 "sub %[tmp],%[e],#31 \n\t"
65
66 AP "quadloopint:\n\t"
67 "vldm %[d], {d0,d1,d2,d3} \n\t"
68
69 "vmull.u8 q2, d0, d14 \n\t"
70 "vmull.u8 q3, d1, d15 \n\t"
71 "vmull.u8 q4, d2, d14 \n\t"
72 "vmull.u8 q5, d3, d15 \n\t"
73
74 "vqrshrn.u16 d0, q2, #8 \n\t"
75 "vqrshrn.u16 d1, q3, #8 \n\t"
76 "vqrshrn.u16 d2, q4, #8 \n\t"
77 "vqrshrn.u16 d3, q5, #8 \n\t"
78
79 "vqadd.u8 q0, q6, q0 \n\t"
80 "vqadd.u8 q1, q6, q1 \n\t"
81
82 "vstm %[d]!, {d0,d1,d2,d3} \n\t"
83
84 "cmp %[tmp], %[d]\n\t"
85 "bhi "AP"quadloopint\n\t"
86
87 AP "loopout: \n\t"
88 "cmp %[d], %[e]\n\t"
89 "beq "AP"done\n\t"
90 "sub %[tmp],%[e], %[d] \n\t"
91 "cmp %[tmp],#8 \n\t"
92 "blt "AP"singleloop2 \n\t"
93
94 AP "dualloop2: \n\t"
95 "sub %[tmp],%[e],$0x7 \n\t"
96 AP "dualloop2int: \n\t"
97 "vldr.64 d0, [%[d]] \n\t"
98 "vmull.u8 q1, d0, d14 \n\t"
99 "vqrshrn.u16 d0, q1, #8 \n\t"
100 "vqadd.u8 d0, d0, d12 \n\t"
101
102 "vstr.64 d0, [%[d]] \n\t"
103
104 "add %[d], #8 \n\t"
105 "cmp %[tmp], %[d] \n\t"
106 "bhi "AP"dualloop2int \n\t"
107
108 // Single ??
109 "cmp %[e], %[d] \n\t"
110 "beq "AP"done \n\t"
111
112 AP "singleloop2: \n\t"
113 "vld1.32 d0[0], [%[d]] \n\t"
114 "vmull.u8 q1, d0, d14 \n\t"
115 "vqrshrn.u16 d0, q1, #8 \n\t"
116 "vqadd.u8 d0, d0, d12 \n\t"
117
118 "vst1.32 d0[0], [%[d]] \n\t"
119
120 AP "done:\n\t"
121
122 : // output regs
123 // Input
124 : [e] "r" (e = d + l), [d] "r" (d), [c] "r" (c), [tmp] "r" (tmp)
125 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","memory" // clobbered
126
127 );
128#undef AP
129
130}
131
132#define _op_blend_caa_dp_neon _op_blend_c_dp_neon
133
134#define _op_blend_c_dpan_neon _op_blend_c_dp_neon
135#define _op_blend_caa_dpan_neon _op_blend_c_dpan_neon
136
137static void
138init_blend_color_span_funcs_neon(void)
139{
140 op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_c_dp_neon;
141 op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_caa_dp_neon;
142
143 op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_c_dpan_neon;
144 op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_caa_dpan_neon;
145}
146#endif
147
148#ifdef BUILD_NEON
149static void
150_op_blend_pt_c_dp_neon(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
151 s = 256 - (c >> 24);
152 *d = c + MUL_256(s, *d);
153}
154
155#define _op_blend_pt_caa_dp_neon _op_blend_pt_c_dp_neon
156
157#define _op_blend_pt_c_dpan_neon _op_blend_pt_c_dp_neon
158#define _op_blend_pt_caa_dpan_neon _op_blend_pt_c_dpan_neon
159
160static void
161init_blend_color_pt_funcs_neon(void)
162{
163 op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_c_dp_neon;
164 op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_caa_dp_neon;
165
166 op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_c_dpan_neon;
167 op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_caa_dpan_neon;
168}
169#endif
170/*-----*/
171
172/* blend_rel color -> dst */
173
174#ifdef BUILD_NEON
175static void
176_op_blend_rel_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
177 DATA32 *e;
178 int alpha = 256 - (c >> 24);
179 UNROLL8_PLD_WHILE(d, l, e,
180 {
181 *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
182 d++;
183 });
184}
185
186#define _op_blend_rel_caa_dp_neon _op_blend_rel_c_dp_neon
187
188#define _op_blend_rel_c_dpan_neon _op_blend_c_dpan_neon
189#define _op_blend_rel_caa_dpan_neon _op_blend_caa_dpan_neon
190
191static void
192init_blend_rel_color_span_funcs_neon(void)
193{
194 op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_rel_c_dp_neon;
195 op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_rel_caa_dp_neon;
196
197 op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_c_dpan_neon;
198 op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_caa_dpan_neon;
199}
200#endif
201
202#ifdef BUILD_NEON
203static void
204_op_blend_rel_pt_c_dp_neon(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
205 s = *d >> 24;
206 *d = MUL_SYM(s, c) + MUL_256(256 - (c >> 24), *d);
207}
208
209#define _op_blend_rel_pt_caa_dp_neon _op_blend_rel_pt_c_dp_neon
210
211#define _op_blend_rel_pt_c_dpan_neon _op_blend_pt_c_dpan_neon
212#define _op_blend_rel_pt_caa_dpan_neon _op_blend_pt_caa_dpan_neon
213
214static void
215init_blend_rel_color_pt_funcs_neon(void)
216{
217 op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_rel_pt_c_dp_neon;
218 op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_rel_pt_caa_dp_neon;
219
220 op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_c_dpan_neon;
221 op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_caa_dpan_neon;
222}
223#endif