diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c | 321 |
1 files changed, 0 insertions, 321 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c deleted file mode 100644 index 5883d15..0000000 --- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c +++ /dev/null | |||
@@ -1,321 +0,0 @@ | |||
1 | /* blend mask x color -> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_mas_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
9 | |||
10 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
11 | { /* UOP */ | ||
12 | |||
13 | DATA32 a = *m; | ||
14 | DATA32 mc = MUL_SYM(a, c); | ||
15 | a = 256 - (mc >> 24); | ||
16 | *d = mc + MUL_256(a, *d); | ||
17 | m++; d++; l--; | ||
18 | }, | ||
19 | { /* A4OP */ | ||
20 | |||
21 | if ((m[3] | m[2] | m[1] | m[0]) == 0) { | ||
22 | m += 4; d += 4; l -= 4; | ||
23 | continue; | ||
24 | } | ||
25 | |||
26 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
27 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
28 | |||
29 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
30 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
31 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
32 | |||
33 | mul0 = _mm_add_epi32(mul0, mc0); | ||
34 | |||
35 | _mm_store_si128((__m128i *)d, mul0); | ||
36 | |||
37 | m += 4; d += 4; l -= 4; | ||
38 | }, | ||
39 | { /* A8OP */ | ||
40 | |||
41 | if((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) { | ||
42 | m += 8; d += 8; l -= 8; | ||
43 | continue; | ||
44 | } | ||
45 | |||
46 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
47 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
48 | |||
49 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
50 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
51 | |||
52 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
53 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
54 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
55 | |||
56 | mul0 = _mm_add_epi32(mc0, mul0); | ||
57 | |||
58 | __m128i mc1 = mul_sym_sse3(m1, c_packed); | ||
59 | __m128i a1 = sub4_alpha_sse3(mc1); | ||
60 | __m128i mul1 = mul_256_sse3(a1, d1); | ||
61 | |||
62 | mul1 = _mm_add_epi32(mc1, mul1); | ||
63 | |||
64 | _mm_store_si128((__m128i *)d, mul0); | ||
65 | _mm_store_si128((__m128i *)(d+4), mul1); | ||
66 | |||
67 | m += 8; d += 8; l -= 8; | ||
68 | }) | ||
69 | } | ||
70 | |||
71 | static void | ||
72 | _op_blend_mas_can_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
73 | |||
74 | DATA32 alpha; | ||
75 | |||
76 | const __m128i one = _mm_set_epi32(1, 1, 1, 1); | ||
77 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
78 | |||
79 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
80 | { /* UOP */ | ||
81 | |||
82 | alpha = *m; | ||
83 | switch(alpha) | ||
84 | { | ||
85 | case 0: | ||
86 | break; | ||
87 | case 255: | ||
88 | *d = c; | ||
89 | break; | ||
90 | default: | ||
91 | alpha++; | ||
92 | *d = INTERP_256(alpha, c, *d); | ||
93 | break; | ||
94 | } | ||
95 | m++; d++; l--; | ||
96 | }, | ||
97 | { /* A4OP */ | ||
98 | |||
99 | if ((m[3] | m[2] | m[1] | m[0]) == 0) { | ||
100 | m += 4; d += 4; l -= 4; | ||
101 | continue; | ||
102 | } | ||
103 | |||
104 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
105 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
106 | |||
107 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
108 | |||
109 | m0 = _mm_add_epi32(one, m0); | ||
110 | |||
111 | __m128i r0 = interp4_256_sse3(m0, c_packed, d0); | ||
112 | |||
113 | r0 = _mm_and_si128(~zm0, r0); | ||
114 | d0 = _mm_and_si128(zm0, d0); | ||
115 | |||
116 | d0 = _mm_add_epi32(r0, d0); | ||
117 | |||
118 | _mm_store_si128((__m128i *)d, d0); | ||
119 | |||
120 | m += 4; d += 4; l -= 4; | ||
121 | }, | ||
122 | { /* A8OP */ | ||
123 | |||
124 | if ((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) { | ||
125 | m += 8; d += 8; l -= 8; | ||
126 | continue; | ||
127 | } | ||
128 | |||
129 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
130 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
131 | |||
132 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
133 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
134 | |||
135 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
136 | __m128i zm1 = _mm_cmpeq_epi32(m1, _mm_setzero_si128()); | ||
137 | |||
138 | m0 = _mm_add_epi32(one, m0); | ||
139 | m1 = _mm_add_epi32(one, m1); | ||
140 | |||
141 | __m128i r0 = interp4_256_sse3(m0, c_packed, d0); | ||
142 | __m128i r1 = interp4_256_sse3(m1, c_packed, d1); | ||
143 | |||
144 | r0 = _mm_and_si128(~zm0, r0); | ||
145 | d0 = _mm_and_si128(zm0, d0); | ||
146 | |||
147 | r1 = _mm_and_si128(~zm1, r1); | ||
148 | d1 = _mm_and_si128(zm1, d1); | ||
149 | |||
150 | d0 = _mm_add_epi32(d0, r0); | ||
151 | d1 = _mm_add_epi32(d1, r1); | ||
152 | |||
153 | _mm_store_si128((__m128i *)d, d0); | ||
154 | _mm_store_si128((__m128i *)(d+4), d1); | ||
155 | |||
156 | m += 8; d += 8; l -= 8; | ||
157 | }) | ||
158 | } | ||
159 | |||
160 | #define _op_blend_mas_cn_dp_sse3 _op_blend_mas_can_dp_sse3 | ||
161 | #define _op_blend_mas_caa_dp_sse3 _op_blend_mas_c_dp_sse3 | ||
162 | |||
163 | #define _op_blend_mas_c_dpan_sse3 _op_blend_mas_c_dp_sse3 | ||
164 | #define _op_blend_mas_cn_dpan_sse3 _op_blend_mas_cn_dp_sse3 | ||
165 | #define _op_blend_mas_can_dpan_sse3 _op_blend_mas_can_dp_sse3 | ||
166 | #define _op_blend_mas_caa_dpan_sse3 _op_blend_mas_caa_dp_sse3 | ||
167 | |||
168 | static void | ||
169 | init_blend_mask_color_span_funcs_sse3(void) | ||
170 | { | ||
171 | // FIXME: BUGGY BUGGY Core i5 750 (32bit), 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text and rectangle) | ||
172 | // op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_mas_c_dp_sse3; | ||
173 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_mas_cn_dp_sse3; | ||
174 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_mas_can_dp_sse3; | ||
175 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_mas_caa_dp_sse3; | ||
176 | |||
177 | // FIXME: BUGGY BUGGY Core i5 2500 (64bit), gcc version 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text) | ||
178 | // op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_mas_c_dpan_sse3; | ||
179 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_mas_cn_dpan_sse3; | ||
180 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_mas_can_dpan_sse3; | ||
181 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_mas_caa_dpan_sse3; | ||
182 | } | ||
183 | |||
184 | #define _op_blend_pt_mas_c_dp_sse3 NULL | ||
185 | #define _op_blend_pt_mas_can_dp_sse3 NULL | ||
186 | |||
187 | #define _op_blend_pt_mas_cn_dp_sse3 _op_blend_pt_mas_can_dp_sse3 | ||
188 | #define _op_blend_pt_mas_caa_dp_sse3 _op_blend_pt_mas_c_dp_sse3 | ||
189 | |||
190 | #define _op_blend_pt_mas_c_dpan_sse3 _op_blend_pt_mas_c_dp_sse3 | ||
191 | #define _op_blend_pt_mas_cn_dpan_sse3 _op_blend_pt_mas_cn_dp_sse3 | ||
192 | #define _op_blend_pt_mas_can_dpan_sse3 _op_blend_pt_mas_can_dp_sse3 | ||
193 | #define _op_blend_pt_mas_caa_dpan_sse3 _op_blend_pt_mas_caa_dp_sse3 | ||
194 | |||
195 | static void | ||
196 | init_blend_mask_color_pt_funcs_sse3(void) | ||
197 | { | ||
198 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_pt_mas_c_dp_sse3; | ||
199 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_mas_cn_dp_sse3; | ||
200 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_pt_mas_can_dp_sse3; | ||
201 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_pt_mas_caa_dp_sse3; | ||
202 | |||
203 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_pt_mas_c_dpan_sse3; | ||
204 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_mas_cn_dpan_sse3; | ||
205 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_mas_can_dpan_sse3; | ||
206 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_mas_caa_dpan_sse3; | ||
207 | } | ||
208 | |||
209 | /*-----*/ | ||
210 | |||
211 | /* blend_rel mask x color --> dst */ | ||
212 | |||
213 | static void | ||
214 | _op_blend_rel_mas_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
215 | |||
216 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
217 | |||
218 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
219 | { /* UOP */ | ||
220 | |||
221 | DATA32 mc = MUL_SYM(*m, c); | ||
222 | int alpha = 256 - (mc >> 24); | ||
223 | *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d); | ||
224 | d++; m++; l--; | ||
225 | }, | ||
226 | { /* A4OP */ | ||
227 | |||
228 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
229 | __m128i d0 = _mm_load_si128((__m128i *) d); | ||
230 | |||
231 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
232 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
233 | |||
234 | __m128i d0_sym = mul_sym_sse3(_mm_srli_epi32(d0, 24), mc0); | ||
235 | d0 = mul_256_sse3(a0, d0); | ||
236 | |||
237 | d0 = _mm_add_epi32(d0, d0_sym); | ||
238 | |||
239 | _mm_store_si128((__m128i *)d, d0); | ||
240 | |||
241 | d += 4; m += 4; l -= 4; | ||
242 | }, | ||
243 | { /* A8OP */ | ||
244 | |||
245 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
246 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
247 | |||
248 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
249 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
250 | |||
251 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
252 | __m128i mc1 = mul_sym_sse3(m1, c_packed); | ||
253 | |||
254 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
255 | __m128i a1 = sub4_alpha_sse3(mc1); | ||
256 | |||
257 | __m128i d0_sym = mul_sym_sse3(_mm_srli_epi32(d0, 24), mc0); | ||
258 | __m128i d1_sym = mul_sym_sse3(_mm_srli_epi32(d1, 24), mc1); | ||
259 | |||
260 | d0 = mul_256_sse3(a0, d0); | ||
261 | d1 = mul_256_sse3(a1, d1); | ||
262 | |||
263 | d0 = _mm_add_epi32(d0, d0_sym); | ||
264 | d1 = _mm_add_epi32(d1, d1_sym); | ||
265 | |||
266 | _mm_store_si128((__m128i *)d, d0); | ||
267 | _mm_store_si128((__m128i *)(d+4), d1); | ||
268 | |||
269 | d += 8; m += 8; l -= 8; | ||
270 | }) | ||
271 | } | ||
272 | |||
273 | #define _op_blend_rel_mas_cn_dp_sse3 _op_blend_rel_mas_c_dp_sse3 | ||
274 | #define _op_blend_rel_mas_can_dp_sse3 _op_blend_rel_mas_c_dp_sse3 | ||
275 | #define _op_blend_rel_mas_caa_dp_sse3 _op_blend_rel_mas_c_dp_sse3 | ||
276 | |||
277 | #define _op_blend_rel_mas_c_dpan_sse3 _op_blend_mas_c_dpan_sse3 | ||
278 | #define _op_blend_rel_mas_cn_dpan_sse3 _op_blend_mas_cn_dpan_sse3 | ||
279 | #define _op_blend_rel_mas_can_dpan_sse3 _op_blend_mas_can_dpan_sse3 | ||
280 | #define _op_blend_rel_mas_caa_dpan_sse3 _op_blend_mas_caa_dpan_sse3 | ||
281 | |||
282 | static void | ||
283 | init_blend_rel_mask_color_span_funcs_sse3(void) | ||
284 | { | ||
285 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_rel_mas_c_dp_sse3; | ||
286 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_mas_can_dp_sse3; | ||
287 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_rel_mas_can_dp_sse3; | ||
288 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_rel_mas_caa_dp_sse3; | ||
289 | |||
290 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_rel_mas_c_dpan_sse3; | ||
291 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_mas_cn_dpan_sse3; | ||
292 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_mas_can_dpan_sse3; | ||
293 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_mas_caa_dpan_sse3; | ||
294 | } | ||
295 | |||
296 | #define _op_blend_rel_pt_mas_c_dp_sse3 NULL | ||
297 | |||
298 | #define _op_blend_rel_pt_mas_cn_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3 | ||
299 | #define _op_blend_rel_pt_mas_can_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3 | ||
300 | #define _op_blend_rel_pt_mas_caa_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3 | ||
301 | |||
302 | #define _op_blend_rel_pt_mas_c_dpan_sse3 _op_blend_pt_mas_c_dpan_sse3 | ||
303 | #define _op_blend_rel_pt_mas_cn_dpan_sse3 _op_blend_pt_mas_cn_dpan_sse3 | ||
304 | #define _op_blend_rel_pt_mas_can_dpan_sse3 _op_blend_pt_mas_can_dpan_sse3 | ||
305 | #define _op_blend_rel_pt_mas_caa_dpan_sse3 _op_blend_pt_mas_caa_dpan_sse3 | ||
306 | |||
307 | static void | ||
308 | init_blend_rel_mask_color_pt_funcs_sse3(void) | ||
309 | { | ||
310 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_rel_pt_mas_c_dp_sse3; | ||
311 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_mas_cn_dp_sse3; | ||
312 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_mas_can_dp_sse3; | ||
313 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_mas_caa_dp_sse3; | ||
314 | |||
315 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_c_dpan_sse3; | ||
316 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_cn_dpan_sse3; | ||
317 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_can_dpan_sse3; | ||
318 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_caa_dpan_sse3; | ||
319 | } | ||
320 | |||
321 | #endif | ||