aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c
diff options
context:
space:
mode:
authorDavid Walter Seikel2013-01-13 17:29:19 +1000
committerDavid Walter Seikel2013-01-13 17:29:19 +1000
commit07274513e984f0b5544586c74508ccd16e7dcafa (patch)
treeb32ff2a9136fbc1a4a6a0ed1e4d79cde0f5f16d9 /libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c
parentAdded Irrlicht 1.8, but without all the Windows binaries. (diff)
downloadSledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.zip
SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.gz
SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.bz2
SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.xz
Remove EFL, since it's been released now.
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c321
1 files changed, 0 insertions, 321 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c
deleted file mode 100644
index 5883d15..0000000
--- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c
+++ /dev/null
@@ -1,321 +0,0 @@
1/* blend mask x color -> dst */
2
3#ifdef BUILD_SSE3
4
5static void
6_op_blend_mas_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
7
8 const __m128i c_packed = _mm_set_epi32(c, c, c, c);
9
10 LOOP_ALIGNED_U1_A48_SSE3(d, l,
11 { /* UOP */
12
13 DATA32 a = *m;
14 DATA32 mc = MUL_SYM(a, c);
15 a = 256 - (mc >> 24);
16 *d = mc + MUL_256(a, *d);
17 m++; d++; l--;
18 },
19 { /* A4OP */
20
21 if ((m[3] | m[2] | m[1] | m[0]) == 0) {
22 m += 4; d += 4; l -= 4;
23 continue;
24 }
25
26 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
27 __m128i d0 = _mm_load_si128((__m128i *)d);
28
29 __m128i mc0 = mul_sym_sse3(m0, c_packed);
30 __m128i a0 = sub4_alpha_sse3(mc0);
31 __m128i mul0 = mul_256_sse3(a0, d0);
32
33 mul0 = _mm_add_epi32(mul0, mc0);
34
35 _mm_store_si128((__m128i *)d, mul0);
36
37 m += 4; d += 4; l -= 4;
38 },
39 { /* A8OP */
40
41 if((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) {
42 m += 8; d += 8; l -= 8;
43 continue;
44 }
45
46 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
47 __m128i d0 = _mm_load_si128((__m128i *)d);
48
49 __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]);
50 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
51
52 __m128i mc0 = mul_sym_sse3(m0, c_packed);
53 __m128i a0 = sub4_alpha_sse3(mc0);
54 __m128i mul0 = mul_256_sse3(a0, d0);
55
56 mul0 = _mm_add_epi32(mc0, mul0);
57
58 __m128i mc1 = mul_sym_sse3(m1, c_packed);
59 __m128i a1 = sub4_alpha_sse3(mc1);
60 __m128i mul1 = mul_256_sse3(a1, d1);
61
62 mul1 = _mm_add_epi32(mc1, mul1);
63
64 _mm_store_si128((__m128i *)d, mul0);
65 _mm_store_si128((__m128i *)(d+4), mul1);
66
67 m += 8; d += 8; l -= 8;
68 })
69}
70
71static void
72_op_blend_mas_can_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
73
74 DATA32 alpha;
75
76 const __m128i one = _mm_set_epi32(1, 1, 1, 1);
77 const __m128i c_packed = _mm_set_epi32(c, c, c, c);
78
79 LOOP_ALIGNED_U1_A48_SSE3(d, l,
80 { /* UOP */
81
82 alpha = *m;
83 switch(alpha)
84 {
85 case 0:
86 break;
87 case 255:
88 *d = c;
89 break;
90 default:
91 alpha++;
92 *d = INTERP_256(alpha, c, *d);
93 break;
94 }
95 m++; d++; l--;
96 },
97 { /* A4OP */
98
99 if ((m[3] | m[2] | m[1] | m[0]) == 0) {
100 m += 4; d += 4; l -= 4;
101 continue;
102 }
103
104 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
105 __m128i d0 = _mm_load_si128((__m128i *)d);
106
107 __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128());
108
109 m0 = _mm_add_epi32(one, m0);
110
111 __m128i r0 = interp4_256_sse3(m0, c_packed, d0);
112
113 r0 = _mm_and_si128(~zm0, r0);
114 d0 = _mm_and_si128(zm0, d0);
115
116 d0 = _mm_add_epi32(r0, d0);
117
118 _mm_store_si128((__m128i *)d, d0);
119
120 m += 4; d += 4; l -= 4;
121 },
122 { /* A8OP */
123
124 if ((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) {
125 m += 8; d += 8; l -= 8;
126 continue;
127 }
128
129 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
130 __m128i d0 = _mm_load_si128((__m128i *)d);
131
132 __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]);
133 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
134
135 __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128());
136 __m128i zm1 = _mm_cmpeq_epi32(m1, _mm_setzero_si128());
137
138 m0 = _mm_add_epi32(one, m0);
139 m1 = _mm_add_epi32(one, m1);
140
141 __m128i r0 = interp4_256_sse3(m0, c_packed, d0);
142 __m128i r1 = interp4_256_sse3(m1, c_packed, d1);
143
144 r0 = _mm_and_si128(~zm0, r0);
145 d0 = _mm_and_si128(zm0, d0);
146
147 r1 = _mm_and_si128(~zm1, r1);
148 d1 = _mm_and_si128(zm1, d1);
149
150 d0 = _mm_add_epi32(d0, r0);
151 d1 = _mm_add_epi32(d1, r1);
152
153 _mm_store_si128((__m128i *)d, d0);
154 _mm_store_si128((__m128i *)(d+4), d1);
155
156 m += 8; d += 8; l -= 8;
157 })
158}
159
160#define _op_blend_mas_cn_dp_sse3 _op_blend_mas_can_dp_sse3
161#define _op_blend_mas_caa_dp_sse3 _op_blend_mas_c_dp_sse3
162
163#define _op_blend_mas_c_dpan_sse3 _op_blend_mas_c_dp_sse3
164#define _op_blend_mas_cn_dpan_sse3 _op_blend_mas_cn_dp_sse3
165#define _op_blend_mas_can_dpan_sse3 _op_blend_mas_can_dp_sse3
166#define _op_blend_mas_caa_dpan_sse3 _op_blend_mas_caa_dp_sse3
167
168static void
169init_blend_mask_color_span_funcs_sse3(void)
170{
171// FIXME: BUGGY BUGGY Core i5 750 (32bit), 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text and rectangle)
172// op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_mas_c_dp_sse3;
173 op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_mas_cn_dp_sse3;
174 op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_mas_can_dp_sse3;
175 op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_mas_caa_dp_sse3;
176
177// FIXME: BUGGY BUGGY Core i5 2500 (64bit), gcc version 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text)
178// op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_mas_c_dpan_sse3;
179 op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_mas_cn_dpan_sse3;
180 op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_mas_can_dpan_sse3;
181 op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_mas_caa_dpan_sse3;
182}
183
184#define _op_blend_pt_mas_c_dp_sse3 NULL
185#define _op_blend_pt_mas_can_dp_sse3 NULL
186
187#define _op_blend_pt_mas_cn_dp_sse3 _op_blend_pt_mas_can_dp_sse3
188#define _op_blend_pt_mas_caa_dp_sse3 _op_blend_pt_mas_c_dp_sse3
189
190#define _op_blend_pt_mas_c_dpan_sse3 _op_blend_pt_mas_c_dp_sse3
191#define _op_blend_pt_mas_cn_dpan_sse3 _op_blend_pt_mas_cn_dp_sse3
192#define _op_blend_pt_mas_can_dpan_sse3 _op_blend_pt_mas_can_dp_sse3
193#define _op_blend_pt_mas_caa_dpan_sse3 _op_blend_pt_mas_caa_dp_sse3
194
195static void
196init_blend_mask_color_pt_funcs_sse3(void)
197{
198 op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_pt_mas_c_dp_sse3;
199 op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_mas_cn_dp_sse3;
200 op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_pt_mas_can_dp_sse3;
201 op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_pt_mas_caa_dp_sse3;
202
203 op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_pt_mas_c_dpan_sse3;
204 op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_mas_cn_dpan_sse3;
205 op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_mas_can_dpan_sse3;
206 op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_mas_caa_dpan_sse3;
207}
208
209/*-----*/
210
211/* blend_rel mask x color --> dst */
212
213static void
214_op_blend_rel_mas_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
215
216 const __m128i c_packed = _mm_set_epi32(c, c, c, c);
217
218 LOOP_ALIGNED_U1_A48_SSE3(d, l,
219 { /* UOP */
220
221 DATA32 mc = MUL_SYM(*m, c);
222 int alpha = 256 - (mc >> 24);
223 *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d);
224 d++; m++; l--;
225 },
226 { /* A4OP */
227
228 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
229 __m128i d0 = _mm_load_si128((__m128i *) d);
230
231 __m128i mc0 = mul_sym_sse3(m0, c_packed);
232 __m128i a0 = sub4_alpha_sse3(mc0);
233
234 __m128i d0_sym = mul_sym_sse3(_mm_srli_epi32(d0, 24), mc0);
235 d0 = mul_256_sse3(a0, d0);
236
237 d0 = _mm_add_epi32(d0, d0_sym);
238
239 _mm_store_si128((__m128i *)d, d0);
240
241 d += 4; m += 4; l -= 4;
242 },
243 { /* A8OP */
244
245 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
246 __m128i d0 = _mm_load_si128((__m128i *)d);
247
248 __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]);
249 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
250
251 __m128i mc0 = mul_sym_sse3(m0, c_packed);
252 __m128i mc1 = mul_sym_sse3(m1, c_packed);
253
254 __m128i a0 = sub4_alpha_sse3(mc0);
255 __m128i a1 = sub4_alpha_sse3(mc1);
256
257 __m128i d0_sym = mul_sym_sse3(_mm_srli_epi32(d0, 24), mc0);
258 __m128i d1_sym = mul_sym_sse3(_mm_srli_epi32(d1, 24), mc1);
259
260 d0 = mul_256_sse3(a0, d0);
261 d1 = mul_256_sse3(a1, d1);
262
263 d0 = _mm_add_epi32(d0, d0_sym);
264 d1 = _mm_add_epi32(d1, d1_sym);
265
266 _mm_store_si128((__m128i *)d, d0);
267 _mm_store_si128((__m128i *)(d+4), d1);
268
269 d += 8; m += 8; l -= 8;
270 })
271}
272
273#define _op_blend_rel_mas_cn_dp_sse3 _op_blend_rel_mas_c_dp_sse3
274#define _op_blend_rel_mas_can_dp_sse3 _op_blend_rel_mas_c_dp_sse3
275#define _op_blend_rel_mas_caa_dp_sse3 _op_blend_rel_mas_c_dp_sse3
276
277#define _op_blend_rel_mas_c_dpan_sse3 _op_blend_mas_c_dpan_sse3
278#define _op_blend_rel_mas_cn_dpan_sse3 _op_blend_mas_cn_dpan_sse3
279#define _op_blend_rel_mas_can_dpan_sse3 _op_blend_mas_can_dpan_sse3
280#define _op_blend_rel_mas_caa_dpan_sse3 _op_blend_mas_caa_dpan_sse3
281
282static void
283init_blend_rel_mask_color_span_funcs_sse3(void)
284{
285 op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_rel_mas_c_dp_sse3;
286 op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_mas_can_dp_sse3;
287 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_rel_mas_can_dp_sse3;
288 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_rel_mas_caa_dp_sse3;
289
290 op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_rel_mas_c_dpan_sse3;
291 op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_mas_cn_dpan_sse3;
292 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_mas_can_dpan_sse3;
293 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_mas_caa_dpan_sse3;
294}
295
296#define _op_blend_rel_pt_mas_c_dp_sse3 NULL
297
298#define _op_blend_rel_pt_mas_cn_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3
299#define _op_blend_rel_pt_mas_can_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3
300#define _op_blend_rel_pt_mas_caa_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3
301
302#define _op_blend_rel_pt_mas_c_dpan_sse3 _op_blend_pt_mas_c_dpan_sse3
303#define _op_blend_rel_pt_mas_cn_dpan_sse3 _op_blend_pt_mas_cn_dpan_sse3
304#define _op_blend_rel_pt_mas_can_dpan_sse3 _op_blend_pt_mas_can_dpan_sse3
305#define _op_blend_rel_pt_mas_caa_dpan_sse3 _op_blend_pt_mas_caa_dpan_sse3
306
307static void
308init_blend_rel_mask_color_pt_funcs_sse3(void)
309{
310 op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_rel_pt_mas_c_dp_sse3;
311 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_mas_cn_dp_sse3;
312 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_mas_can_dp_sse3;
313 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_mas_caa_dp_sse3;
314
315 op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_c_dpan_sse3;
316 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_cn_dpan_sse3;
317 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_can_dpan_sse3;
318 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_caa_dpan_sse3;
319}
320
321#endif