aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c300
1 files changed, 0 insertions, 300 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c
deleted file mode 100644
index 617b9e2..0000000
--- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c
+++ /dev/null
@@ -1,300 +0,0 @@
1/* blend pixel x mask --> dst */
2
3#ifdef BUILD_SSE3
4
5static void
6_op_blend_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
7
8 int alpha;
9
10 LOOP_ALIGNED_U1_A48_SSE3(d, l,
11 { /* UOP */
12
13 alpha = *m;
14 c = MUL_SYM(alpha, *s);
15 alpha = 256 - (c >> 24);
16 *d = c + MUL_256(alpha, *d);
17 m++; s++; d++; l--;
18 },
19 { /* A4OP */
20
21 __m128i s0 = _mm_lddqu_si128((__m128i *)s);
22 __m128i d0 = _mm_load_si128((__m128i *)d);
23 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
24
25 __m128i c0 = mul_sym_sse3(m0, s0);
26 __m128i a0 = sub4_alpha_sse3(c0);
27 __m128i r0 = mul_256_sse3(a0, d0);
28
29 r0 = _mm_add_epi32(r0, c0);
30
31 _mm_store_si128((__m128i *)d, r0);
32
33 m += 4; s += 4; d += 4; l -= 4;
34 },
35 { /* A8OP */
36
37 __m128i s0 = _mm_lddqu_si128((__m128i *)s);
38 __m128i d0 = _mm_load_si128((__m128i *)d);
39 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
40
41 __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4));
42 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
43 __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]);
44
45 __m128i c0 = mul_sym_sse3(m0, s0);
46 __m128i c1 = mul_sym_sse3(m1, s1);
47
48 __m128i a0 = sub4_alpha_sse3(c0);
49 __m128i a1 = sub4_alpha_sse3(c1);
50
51 __m128i r0 = mul_256_sse3(a0, d0);
52 __m128i r1 = mul_256_sse3(a1, d1);
53
54 r0 = _mm_add_epi32(r0, c0);
55 r1 = _mm_add_epi32(r1, c1);
56
57 _mm_store_si128((__m128i *)d, r0);
58 _mm_store_si128((__m128i *)(d+4), r1);
59
60 m += 8; s += 8; d += 8; l -= 8;
61 })
62}
63
64static void
65_op_blend_pas_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) {
66
67 const __m128i ones = _mm_set_epi32(1, 1, 1, 1);
68 int alpha;
69
70 LOOP_ALIGNED_U1_A48_SSE3(d, l,
71 { /* UOP */
72
73 alpha = *m;
74 switch(alpha)
75 {
76 case 0:
77 break;
78 case 255:
79 *d = *s;
80 break;
81 default:
82 alpha++;
83 *d = INTERP_256(alpha, *s, *d);
84 break;
85 }
86 m++; s++; d++; l--;
87 },
88 { /*A4OP */
89
90 if ((m[3] | m[2] | m[1] | m[0]) == 0) {
91 m += 4; s += 4; d += 4; l -= 4;
92 continue;
93 }
94
95 __m128i s0 = _mm_lddqu_si128((__m128i *)s);
96 __m128i d0 = _mm_load_si128((__m128i *)d);
97 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
98
99 __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128());
100
101 m0 = _mm_add_epi32(m0, ones);
102
103 __m128i r0 = interp4_256_sse3(m0, s0, d0);
104
105 r0 = _mm_and_si128(~zm0, r0);
106 d0 = _mm_and_si128(zm0, d0);
107
108 d0 = _mm_add_epi32(r0, d0);
109
110 _mm_store_si128((__m128i *)d, d0);
111
112 m += 4; s += 4; d += 4; l -= 4;
113 },
114 { /* A8OP */
115
116 if ((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) {
117 m += 8; s += 8; d += 8; l -= 8;
118 continue;
119 }
120
121 __m128i s0 = _mm_lddqu_si128((__m128i *)s);
122 __m128i d0 = _mm_load_si128((__m128i *)d);
123 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
124
125 __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4));
126 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
127 __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]);
128
129 __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128());
130 __m128i zm1 = _mm_cmpeq_epi32(m1, _mm_setzero_si128());
131
132 m0 = _mm_add_epi32(m0, ones);
133 m1 = _mm_add_epi32(m1, ones);
134
135 __m128i r0 = interp4_256_sse3(m0, s0, d0);
136 __m128i r1 = interp4_256_sse3(m1, s1, d1);
137
138 r0 = _mm_and_si128(~zm0, r0);
139 d0 = _mm_and_si128(zm0, d0);
140
141 r1 = _mm_and_si128(~zm1, r1);
142 d1 = _mm_and_si128(zm1, d1);
143
144 d0 = _mm_add_epi32(d0, r0);
145 d1 = _mm_add_epi32(d1, r1);
146
147 _mm_store_si128((__m128i *)d, d0);
148 _mm_store_si128((__m128i *)(d+4), d1);
149
150 m += 8; s += 8; d += 8; l -= 8;
151 })
152}
153
154#define _op_blend_pan_mas_dp_sse3 _op_blend_pas_mas_dp_sse3
155
156#define _op_blend_p_mas_dpan_sse3 _op_blend_p_mas_dp_sse3
157#define _op_blend_pas_mas_dpan_sse3 _op_blend_pas_mas_dp_sse3
158#define _op_blend_pan_mas_dpan_sse3 _op_blend_pan_mas_dp_sse3
159
160static void
161init_blend_pixel_mask_span_funcs_sse3(void)
162{
163 op_blend_span_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_p_mas_dp_sse3;
164 op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pas_mas_dp_sse3;
165 op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pan_mas_dp_sse3;
166
167 op_blend_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_p_mas_dpan_sse3;
168 op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pas_mas_dpan_sse3;
169 op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pan_mas_dpan_sse3;
170}
171
172#define _op_blend_pt_p_mas_dp_sse3 NULL
173#define _op_blend_pt_pan_mas_dp_sse3 NULL
174
175#define _op_blend_pt_pas_mas_dp_sse3 _op_blend_pt_p_mas_dp_sse3
176
177#define _op_blend_pt_p_mas_dpan_sse3 _op_blend_pt_p_mas_dp_sse3
178#define _op_blend_pt_pas_mas_dpan_sse3 _op_blend_pt_pas_mas_dp_sse3
179#define _op_blend_pt_pan_mas_dpan_sse3 _op_blend_pt_pan_mas_dp_sse3
180
181static void
182init_blend_pixel_mask_pt_funcs_sse3(void)
183{
184 op_blend_pt_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_p_mas_dp_sse3;
185 op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_pas_mas_dp_sse3;
186 op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_pan_mas_dp_sse3;
187
188 op_blend_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_p_mas_dpan_sse3;
189 op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pas_mas_dpan_sse3;
190 op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pan_mas_dpan_sse3;
191}
192
193/*-----*/
194
195/* blend_rel pixel x mask -> dst */
196
197static void
198_op_blend_rel_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) {
199
200 int alpha;
201
202 LOOP_ALIGNED_U1_A48_SSE3(d, l,
203 { /* UOP */
204
205 c = MUL_SYM(*m, *s);
206 alpha = 256 - (c >> 24);
207 *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
208 d++; m++; s++; l--;
209 },
210 { /* A4OP */
211
212 __m128i s0 = _mm_lddqu_si128((__m128i *)s);
213 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
214 __m128i d0 = _mm_load_si128((__m128i *)d);
215
216 __m128i c0 = mul_sym_sse3(m0, s0);
217 __m128i a0 = sub4_alpha_sse3(c0);
218
219 __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c0);
220 __m128i r0 = mul_256_sse3(a0, d0);
221
222 d0 = _mm_add_epi32(l0, r0);
223
224 _mm_store_si128((__m128i *)d, d0);
225
226 d += 4; m += 4; s += 4; l -= 4;
227 },
228 { /* A8OP */
229
230 __m128i s0 = _mm_lddqu_si128((__m128i *)s);
231 __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]);
232 __m128i d0 = _mm_load_si128((__m128i *)d);
233
234 __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4));
235 __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]);
236 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
237
238 __m128i c0 = mul_sym_sse3(m0, s0);
239 __m128i c1 = mul_sym_sse3(m1, s1);
240
241 __m128i a0 = sub4_alpha_sse3(c0);
242 __m128i a1 = sub4_alpha_sse3(c1);
243
244 __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c0);
245 __m128i r0 = mul_256_sse3(a0, d0);
246
247 __m128i l1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), c1);
248 __m128i r1 = mul_256_sse3(a1, d1);
249
250 d0 = _mm_add_epi32(l0, r0);
251 d1 = _mm_add_epi32(l1, r1);
252
253 _mm_store_si128((__m128i *)d, d0);
254 _mm_store_si128((__m128i *)(d+4), d1);
255
256 d += 8; m += 8; s += 8; l -= 8;
257 })
258}
259
260#define _op_blend_rel_pas_mas_dp_sse3 _op_blend_rel_p_mas_dp_sse3
261#define _op_blend_rel_pan_mas_dp_sse3 _op_blend_rel_p_mas_dp_sse3
262
263#define _op_blend_rel_p_mas_dpan_sse3 _op_blend_p_mas_dpan_sse3
264#define _op_blend_rel_pas_mas_dpan_sse3 _op_blend_pas_mas_dpan_sse3
265#define _op_blend_rel_pan_mas_dpan_sse3 _op_blend_pan_mas_dpan_sse3
266
267static void
268init_blend_rel_pixel_mask_span_funcs_sse3(void)
269{
270 op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_p_mas_dp_sse3;
271 op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pas_mas_dp_sse3;
272 op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pan_mas_dp_sse3;
273
274 op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_p_mas_dpan_sse3;
275 op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pas_mas_dpan_sse3;
276 op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pan_mas_dpan_sse3;
277}
278
279#define _op_blend_rel_pt_p_mas_dp_sse3 NULL
280
281#define _op_blend_rel_pt_pas_mas_dp_sse3 _op_blend_rel_pt_p_mas_dp_sse3
282#define _op_blend_rel_pt_pan_mas_dp_sse3 _op_blend_rel_pt_p_mas_dp_sse3
283
284#define _op_blend_rel_pt_p_mas_dpan_sse3 _op_blend_pt_p_mas_dpan_sse3
285#define _op_blend_rel_pt_pas_mas_dpan_sse3 _op_blend_pt_pas_mas_dpan_sse3
286#define _op_blend_rel_pt_pan_mas_dpan_sse3 _op_blend_pt_pan_mas_dpan_sse3
287
288static void
289init_blend_rel_pixel_mask_pt_funcs_sse3(void)
290{
291 op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_p_mas_dp_sse3;
292 op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pas_mas_dp_sse3;
293 op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pan_mas_dp_sse3;
294
295 op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_mas_dpan_sse3;
296 op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_mas_dpan_sse3;
297 op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_mas_dpan_sse3;
298}
299
300#endif