diff options
author | David Walter Seikel | 2013-01-13 17:29:19 +1000 |
---|---|---|
committer | David Walter Seikel | 2013-01-13 17:29:19 +1000 |
commit | 07274513e984f0b5544586c74508ccd16e7dcafa (patch) | |
tree | b32ff2a9136fbc1a4a6a0ed1e4d79cde0f5f16d9 /libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c | |
parent | Added Irrlicht 1.8, but without all the Windows binaries. (diff) | |
download | SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.zip SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.gz SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.bz2 SledjHamr-07274513e984f0b5544586c74508ccd16e7dcafa.tar.xz |
Remove EFL, since it's been released now.
Diffstat (limited to '')
-rw-r--r-- | libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c | 300 |
1 files changed, 0 insertions, 300 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c deleted file mode 100644 index 617b9e2..0000000 --- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c +++ /dev/null | |||
@@ -1,300 +0,0 @@ | |||
1 | /* blend pixel x mask --> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | int alpha; | ||
9 | |||
10 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
11 | { /* UOP */ | ||
12 | |||
13 | alpha = *m; | ||
14 | c = MUL_SYM(alpha, *s); | ||
15 | alpha = 256 - (c >> 24); | ||
16 | *d = c + MUL_256(alpha, *d); | ||
17 | m++; s++; d++; l--; | ||
18 | }, | ||
19 | { /* A4OP */ | ||
20 | |||
21 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
22 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
23 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
24 | |||
25 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
26 | __m128i a0 = sub4_alpha_sse3(c0); | ||
27 | __m128i r0 = mul_256_sse3(a0, d0); | ||
28 | |||
29 | r0 = _mm_add_epi32(r0, c0); | ||
30 | |||
31 | _mm_store_si128((__m128i *)d, r0); | ||
32 | |||
33 | m += 4; s += 4; d += 4; l -= 4; | ||
34 | }, | ||
35 | { /* A8OP */ | ||
36 | |||
37 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
38 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
39 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
40 | |||
41 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
42 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
43 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
44 | |||
45 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
46 | __m128i c1 = mul_sym_sse3(m1, s1); | ||
47 | |||
48 | __m128i a0 = sub4_alpha_sse3(c0); | ||
49 | __m128i a1 = sub4_alpha_sse3(c1); | ||
50 | |||
51 | __m128i r0 = mul_256_sse3(a0, d0); | ||
52 | __m128i r1 = mul_256_sse3(a1, d1); | ||
53 | |||
54 | r0 = _mm_add_epi32(r0, c0); | ||
55 | r1 = _mm_add_epi32(r1, c1); | ||
56 | |||
57 | _mm_store_si128((__m128i *)d, r0); | ||
58 | _mm_store_si128((__m128i *)(d+4), r1); | ||
59 | |||
60 | m += 8; s += 8; d += 8; l -= 8; | ||
61 | }) | ||
62 | } | ||
63 | |||
64 | static void | ||
65 | _op_blend_pas_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
66 | |||
67 | const __m128i ones = _mm_set_epi32(1, 1, 1, 1); | ||
68 | int alpha; | ||
69 | |||
70 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
71 | { /* UOP */ | ||
72 | |||
73 | alpha = *m; | ||
74 | switch(alpha) | ||
75 | { | ||
76 | case 0: | ||
77 | break; | ||
78 | case 255: | ||
79 | *d = *s; | ||
80 | break; | ||
81 | default: | ||
82 | alpha++; | ||
83 | *d = INTERP_256(alpha, *s, *d); | ||
84 | break; | ||
85 | } | ||
86 | m++; s++; d++; l--; | ||
87 | }, | ||
88 | { /*A4OP */ | ||
89 | |||
90 | if ((m[3] | m[2] | m[1] | m[0]) == 0) { | ||
91 | m += 4; s += 4; d += 4; l -= 4; | ||
92 | continue; | ||
93 | } | ||
94 | |||
95 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
96 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
97 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
98 | |||
99 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
100 | |||
101 | m0 = _mm_add_epi32(m0, ones); | ||
102 | |||
103 | __m128i r0 = interp4_256_sse3(m0, s0, d0); | ||
104 | |||
105 | r0 = _mm_and_si128(~zm0, r0); | ||
106 | d0 = _mm_and_si128(zm0, d0); | ||
107 | |||
108 | d0 = _mm_add_epi32(r0, d0); | ||
109 | |||
110 | _mm_store_si128((__m128i *)d, d0); | ||
111 | |||
112 | m += 4; s += 4; d += 4; l -= 4; | ||
113 | }, | ||
114 | { /* A8OP */ | ||
115 | |||
116 | if ((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) { | ||
117 | m += 8; s += 8; d += 8; l -= 8; | ||
118 | continue; | ||
119 | } | ||
120 | |||
121 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
122 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
123 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
124 | |||
125 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
126 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
127 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
128 | |||
129 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
130 | __m128i zm1 = _mm_cmpeq_epi32(m1, _mm_setzero_si128()); | ||
131 | |||
132 | m0 = _mm_add_epi32(m0, ones); | ||
133 | m1 = _mm_add_epi32(m1, ones); | ||
134 | |||
135 | __m128i r0 = interp4_256_sse3(m0, s0, d0); | ||
136 | __m128i r1 = interp4_256_sse3(m1, s1, d1); | ||
137 | |||
138 | r0 = _mm_and_si128(~zm0, r0); | ||
139 | d0 = _mm_and_si128(zm0, d0); | ||
140 | |||
141 | r1 = _mm_and_si128(~zm1, r1); | ||
142 | d1 = _mm_and_si128(zm1, d1); | ||
143 | |||
144 | d0 = _mm_add_epi32(d0, r0); | ||
145 | d1 = _mm_add_epi32(d1, r1); | ||
146 | |||
147 | _mm_store_si128((__m128i *)d, d0); | ||
148 | _mm_store_si128((__m128i *)(d+4), d1); | ||
149 | |||
150 | m += 8; s += 8; d += 8; l -= 8; | ||
151 | }) | ||
152 | } | ||
153 | |||
154 | #define _op_blend_pan_mas_dp_sse3 _op_blend_pas_mas_dp_sse3 | ||
155 | |||
156 | #define _op_blend_p_mas_dpan_sse3 _op_blend_p_mas_dp_sse3 | ||
157 | #define _op_blend_pas_mas_dpan_sse3 _op_blend_pas_mas_dp_sse3 | ||
158 | #define _op_blend_pan_mas_dpan_sse3 _op_blend_pan_mas_dp_sse3 | ||
159 | |||
160 | static void | ||
161 | init_blend_pixel_mask_span_funcs_sse3(void) | ||
162 | { | ||
163 | op_blend_span_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_p_mas_dp_sse3; | ||
164 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pas_mas_dp_sse3; | ||
165 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pan_mas_dp_sse3; | ||
166 | |||
167 | op_blend_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_p_mas_dpan_sse3; | ||
168 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pas_mas_dpan_sse3; | ||
169 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pan_mas_dpan_sse3; | ||
170 | } | ||
171 | |||
172 | #define _op_blend_pt_p_mas_dp_sse3 NULL | ||
173 | #define _op_blend_pt_pan_mas_dp_sse3 NULL | ||
174 | |||
175 | #define _op_blend_pt_pas_mas_dp_sse3 _op_blend_pt_p_mas_dp_sse3 | ||
176 | |||
177 | #define _op_blend_pt_p_mas_dpan_sse3 _op_blend_pt_p_mas_dp_sse3 | ||
178 | #define _op_blend_pt_pas_mas_dpan_sse3 _op_blend_pt_pas_mas_dp_sse3 | ||
179 | #define _op_blend_pt_pan_mas_dpan_sse3 _op_blend_pt_pan_mas_dp_sse3 | ||
180 | |||
181 | static void | ||
182 | init_blend_pixel_mask_pt_funcs_sse3(void) | ||
183 | { | ||
184 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_p_mas_dp_sse3; | ||
185 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_pas_mas_dp_sse3; | ||
186 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_pan_mas_dp_sse3; | ||
187 | |||
188 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_p_mas_dpan_sse3; | ||
189 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pas_mas_dpan_sse3; | ||
190 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pan_mas_dpan_sse3; | ||
191 | } | ||
192 | |||
193 | /*-----*/ | ||
194 | |||
195 | /* blend_rel pixel x mask -> dst */ | ||
196 | |||
197 | static void | ||
198 | _op_blend_rel_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
199 | |||
200 | int alpha; | ||
201 | |||
202 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
203 | { /* UOP */ | ||
204 | |||
205 | c = MUL_SYM(*m, *s); | ||
206 | alpha = 256 - (c >> 24); | ||
207 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
208 | d++; m++; s++; l--; | ||
209 | }, | ||
210 | { /* A4OP */ | ||
211 | |||
212 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
213 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
214 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
215 | |||
216 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
217 | __m128i a0 = sub4_alpha_sse3(c0); | ||
218 | |||
219 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c0); | ||
220 | __m128i r0 = mul_256_sse3(a0, d0); | ||
221 | |||
222 | d0 = _mm_add_epi32(l0, r0); | ||
223 | |||
224 | _mm_store_si128((__m128i *)d, d0); | ||
225 | |||
226 | d += 4; m += 4; s += 4; l -= 4; | ||
227 | }, | ||
228 | { /* A8OP */ | ||
229 | |||
230 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
231 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
232 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
233 | |||
234 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
235 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
236 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
237 | |||
238 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
239 | __m128i c1 = mul_sym_sse3(m1, s1); | ||
240 | |||
241 | __m128i a0 = sub4_alpha_sse3(c0); | ||
242 | __m128i a1 = sub4_alpha_sse3(c1); | ||
243 | |||
244 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c0); | ||
245 | __m128i r0 = mul_256_sse3(a0, d0); | ||
246 | |||
247 | __m128i l1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), c1); | ||
248 | __m128i r1 = mul_256_sse3(a1, d1); | ||
249 | |||
250 | d0 = _mm_add_epi32(l0, r0); | ||
251 | d1 = _mm_add_epi32(l1, r1); | ||
252 | |||
253 | _mm_store_si128((__m128i *)d, d0); | ||
254 | _mm_store_si128((__m128i *)(d+4), d1); | ||
255 | |||
256 | d += 8; m += 8; s += 8; l -= 8; | ||
257 | }) | ||
258 | } | ||
259 | |||
260 | #define _op_blend_rel_pas_mas_dp_sse3 _op_blend_rel_p_mas_dp_sse3 | ||
261 | #define _op_blend_rel_pan_mas_dp_sse3 _op_blend_rel_p_mas_dp_sse3 | ||
262 | |||
263 | #define _op_blend_rel_p_mas_dpan_sse3 _op_blend_p_mas_dpan_sse3 | ||
264 | #define _op_blend_rel_pas_mas_dpan_sse3 _op_blend_pas_mas_dpan_sse3 | ||
265 | #define _op_blend_rel_pan_mas_dpan_sse3 _op_blend_pan_mas_dpan_sse3 | ||
266 | |||
267 | static void | ||
268 | init_blend_rel_pixel_mask_span_funcs_sse3(void) | ||
269 | { | ||
270 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_p_mas_dp_sse3; | ||
271 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pas_mas_dp_sse3; | ||
272 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pan_mas_dp_sse3; | ||
273 | |||
274 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_p_mas_dpan_sse3; | ||
275 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pas_mas_dpan_sse3; | ||
276 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pan_mas_dpan_sse3; | ||
277 | } | ||
278 | |||
279 | #define _op_blend_rel_pt_p_mas_dp_sse3 NULL | ||
280 | |||
281 | #define _op_blend_rel_pt_pas_mas_dp_sse3 _op_blend_rel_pt_p_mas_dp_sse3 | ||
282 | #define _op_blend_rel_pt_pan_mas_dp_sse3 _op_blend_rel_pt_p_mas_dp_sse3 | ||
283 | |||
284 | #define _op_blend_rel_pt_p_mas_dpan_sse3 _op_blend_pt_p_mas_dpan_sse3 | ||
285 | #define _op_blend_rel_pt_pas_mas_dpan_sse3 _op_blend_pt_pas_mas_dpan_sse3 | ||
286 | #define _op_blend_rel_pt_pan_mas_dpan_sse3 _op_blend_pt_pan_mas_dpan_sse3 | ||
287 | |||
288 | static void | ||
289 | init_blend_rel_pixel_mask_pt_funcs_sse3(void) | ||
290 | { | ||
291 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_p_mas_dp_sse3; | ||
292 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pas_mas_dp_sse3; | ||
293 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pan_mas_dp_sse3; | ||
294 | |||
295 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_mas_dpan_sse3; | ||
296 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_mas_dpan_sse3; | ||
297 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_mas_dpan_sse3; | ||
298 | } | ||
299 | |||
300 | #endif | ||