diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c | 223 |
1 files changed, 0 insertions, 223 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c deleted file mode 100644 index 53b9991..0000000 --- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c +++ /dev/null | |||
@@ -1,223 +0,0 @@ | |||
1 | /* blend color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_NEON | ||
4 | static void | ||
5 | _op_blend_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e, *tmp = 0; | ||
7 | #define AP "B_C_DP" | ||
8 | asm volatile ( | ||
9 | ".fpu neon \n\t" | ||
10 | "vdup.u32 q6, %[c] \n\t" | ||
11 | "vmov.i8 q5, #1 \n\t" | ||
12 | "vmvn.u8 q7,q6 \n\t" | ||
13 | "vshr.u32 q7, q7, $0x18 \n\t" | ||
14 | "vmul.u32 q7,q5, q7 \n\t" | ||
15 | "bic %[e], #3 \n\t" | ||
16 | "bic %[d], #3 \n\t" | ||
17 | |||
18 | AP "loopchoose: \n\t" | ||
19 | // If aligned already - straight to quads | ||
20 | "andS %[tmp], %[d],$0x1f \n\t" | ||
21 | "beq "AP"quadloops \n\t" | ||
22 | |||
23 | "andS %[tmp], %[d],$0x4 \n\t" | ||
24 | "beq "AP"dualloop \n\t" | ||
25 | |||
26 | // Only ever executes once, fall through to dual | ||
27 | AP "singleloop: \n\t" | ||
28 | // Use 'tmp' not 'd' | ||
29 | "vld1.32 d0[0], [%[d]] \n\t" | ||
30 | // Only touch d1 | ||
31 | "vmull.u8 q0, d0, d14 \n\t" | ||
32 | "vqrshrn.u16 d0, q0, #8 \n\t" | ||
33 | "vadd.u8 d0, d12, d0 \n\t" | ||
34 | "vst1.32 d0[0], [%[d]] \n\t" | ||
35 | |||
36 | "add %[d], #4 \n\t" | ||
37 | |||
38 | // Can we go the fast path? | ||
39 | "andS %[tmp], %[d],$0x1f \n\t" | ||
40 | "beq "AP"quadloops \n\t" | ||
41 | |||
42 | AP "dualloop: \n\t" | ||
43 | "sub %[tmp], %[e], %[d] \n\t" | ||
44 | "cmp %[tmp], #32 \n\t" | ||
45 | "blt "AP"loopout \n\t" | ||
46 | |||
47 | |||
48 | AP "dualloopint: \n\t" | ||
49 | "vldr.32 d0, [%[d]] \n\t" | ||
50 | "vmull.u8 q1, d0, d14 \n\t" | ||
51 | "vqrshrn.u16 d0, q1, #8 \n\t" | ||
52 | "vqadd.u8 d0, d0, d12 \n\t" | ||
53 | |||
54 | "vstm %[d]!, {d0} \n\t" | ||
55 | |||
56 | "ands %[tmp], %[d], $0x1f \n\t" | ||
57 | "bne "AP"dualloopint \n\t" | ||
58 | |||
59 | AP "quadloops: \n\t" | ||
60 | "sub %[tmp], %[e], %[d] \n\t" | ||
61 | "cmp %[tmp], #32 \n\t" | ||
62 | "blt "AP"loopout \n\t" | ||
63 | |||
64 | "sub %[tmp],%[e],#31 \n\t" | ||
65 | |||
66 | AP "quadloopint:\n\t" | ||
67 | "vldm %[d], {d0,d1,d2,d3} \n\t" | ||
68 | |||
69 | "vmull.u8 q2, d0, d14 \n\t" | ||
70 | "vmull.u8 q3, d1, d15 \n\t" | ||
71 | "vmull.u8 q4, d2, d14 \n\t" | ||
72 | "vmull.u8 q5, d3, d15 \n\t" | ||
73 | |||
74 | "vqrshrn.u16 d0, q2, #8 \n\t" | ||
75 | "vqrshrn.u16 d1, q3, #8 \n\t" | ||
76 | "vqrshrn.u16 d2, q4, #8 \n\t" | ||
77 | "vqrshrn.u16 d3, q5, #8 \n\t" | ||
78 | |||
79 | "vqadd.u8 q0, q6, q0 \n\t" | ||
80 | "vqadd.u8 q1, q6, q1 \n\t" | ||
81 | |||
82 | "vstm %[d]!, {d0,d1,d2,d3} \n\t" | ||
83 | |||
84 | "cmp %[tmp], %[d]\n\t" | ||
85 | "bhi "AP"quadloopint\n\t" | ||
86 | |||
87 | AP "loopout: \n\t" | ||
88 | "cmp %[d], %[e]\n\t" | ||
89 | "beq "AP"done\n\t" | ||
90 | "sub %[tmp],%[e], %[d] \n\t" | ||
91 | "cmp %[tmp],#8 \n\t" | ||
92 | "blt "AP"singleloop2 \n\t" | ||
93 | |||
94 | AP "dualloop2: \n\t" | ||
95 | "sub %[tmp],%[e],$0x7 \n\t" | ||
96 | AP "dualloop2int: \n\t" | ||
97 | "vldr.64 d0, [%[d]] \n\t" | ||
98 | "vmull.u8 q1, d0, d14 \n\t" | ||
99 | "vqrshrn.u16 d0, q1, #8 \n\t" | ||
100 | "vqadd.u8 d0, d0, d12 \n\t" | ||
101 | |||
102 | "vstr.64 d0, [%[d]] \n\t" | ||
103 | |||
104 | "add %[d], #8 \n\t" | ||
105 | "cmp %[tmp], %[d] \n\t" | ||
106 | "bhi "AP"dualloop2int \n\t" | ||
107 | |||
108 | // Single ?? | ||
109 | "cmp %[e], %[d] \n\t" | ||
110 | "beq "AP"done \n\t" | ||
111 | |||
112 | AP "singleloop2: \n\t" | ||
113 | "vld1.32 d0[0], [%[d]] \n\t" | ||
114 | "vmull.u8 q1, d0, d14 \n\t" | ||
115 | "vqrshrn.u16 d0, q1, #8 \n\t" | ||
116 | "vqadd.u8 d0, d0, d12 \n\t" | ||
117 | |||
118 | "vst1.32 d0[0], [%[d]] \n\t" | ||
119 | |||
120 | AP "done:\n\t" | ||
121 | |||
122 | : // output regs | ||
123 | // Input | ||
124 | : [e] "r" (e = d + l), [d] "r" (d), [c] "r" (c), [tmp] "r" (tmp) | ||
125 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","memory" // clobbered | ||
126 | |||
127 | ); | ||
128 | #undef AP | ||
129 | |||
130 | } | ||
131 | |||
132 | #define _op_blend_caa_dp_neon _op_blend_c_dp_neon | ||
133 | |||
134 | #define _op_blend_c_dpan_neon _op_blend_c_dp_neon | ||
135 | #define _op_blend_caa_dpan_neon _op_blend_c_dpan_neon | ||
136 | |||
137 | static void | ||
138 | init_blend_color_span_funcs_neon(void) | ||
139 | { | ||
140 | op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_c_dp_neon; | ||
141 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_caa_dp_neon; | ||
142 | |||
143 | op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_c_dpan_neon; | ||
144 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_caa_dpan_neon; | ||
145 | } | ||
146 | #endif | ||
147 | |||
148 | #ifdef BUILD_NEON | ||
149 | static void | ||
150 | _op_blend_pt_c_dp_neon(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
151 | s = 256 - (c >> 24); | ||
152 | *d = c + MUL_256(s, *d); | ||
153 | } | ||
154 | |||
155 | #define _op_blend_pt_caa_dp_neon _op_blend_pt_c_dp_neon | ||
156 | |||
157 | #define _op_blend_pt_c_dpan_neon _op_blend_pt_c_dp_neon | ||
158 | #define _op_blend_pt_caa_dpan_neon _op_blend_pt_c_dpan_neon | ||
159 | |||
160 | static void | ||
161 | init_blend_color_pt_funcs_neon(void) | ||
162 | { | ||
163 | op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_c_dp_neon; | ||
164 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_caa_dp_neon; | ||
165 | |||
166 | op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_c_dpan_neon; | ||
167 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_caa_dpan_neon; | ||
168 | } | ||
169 | #endif | ||
170 | /*-----*/ | ||
171 | |||
172 | /* blend_rel color -> dst */ | ||
173 | |||
174 | #ifdef BUILD_NEON | ||
175 | static void | ||
176 | _op_blend_rel_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
177 | DATA32 *e; | ||
178 | int alpha = 256 - (c >> 24); | ||
179 | UNROLL8_PLD_WHILE(d, l, e, | ||
180 | { | ||
181 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
182 | d++; | ||
183 | }); | ||
184 | } | ||
185 | |||
186 | #define _op_blend_rel_caa_dp_neon _op_blend_rel_c_dp_neon | ||
187 | |||
188 | #define _op_blend_rel_c_dpan_neon _op_blend_c_dpan_neon | ||
189 | #define _op_blend_rel_caa_dpan_neon _op_blend_caa_dpan_neon | ||
190 | |||
191 | static void | ||
192 | init_blend_rel_color_span_funcs_neon(void) | ||
193 | { | ||
194 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_rel_c_dp_neon; | ||
195 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_rel_caa_dp_neon; | ||
196 | |||
197 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_c_dpan_neon; | ||
198 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_caa_dpan_neon; | ||
199 | } | ||
200 | #endif | ||
201 | |||
202 | #ifdef BUILD_NEON | ||
203 | static void | ||
204 | _op_blend_rel_pt_c_dp_neon(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
205 | s = *d >> 24; | ||
206 | *d = MUL_SYM(s, c) + MUL_256(256 - (c >> 24), *d); | ||
207 | } | ||
208 | |||
209 | #define _op_blend_rel_pt_caa_dp_neon _op_blend_rel_pt_c_dp_neon | ||
210 | |||
211 | #define _op_blend_rel_pt_c_dpan_neon _op_blend_pt_c_dpan_neon | ||
212 | #define _op_blend_rel_pt_caa_dpan_neon _op_blend_pt_caa_dpan_neon | ||
213 | |||
214 | static void | ||
215 | init_blend_rel_color_pt_funcs_neon(void) | ||
216 | { | ||
217 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_rel_pt_c_dp_neon; | ||
218 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_rel_pt_caa_dp_neon; | ||
219 | |||
220 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_c_dpan_neon; | ||
221 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_caa_dpan_neon; | ||
222 | } | ||
223 | #endif | ||