diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c | 543 |
1 files changed, 0 insertions, 543 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c deleted file mode 100644 index 4ee31f5..0000000 --- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c +++ /dev/null | |||
@@ -1,543 +0,0 @@ | |||
1 | /* blend pixel x color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_p_c_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | DATA32 alpha; | ||
9 | |||
10 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
11 | |||
12 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
13 | { /* UOP */ | ||
14 | |||
15 | DATA32 sc = MUL4_SYM(c, *s); | ||
16 | alpha = 256 - (sc >> 24); | ||
17 | *d = sc + MUL_256(alpha, *d); | ||
18 | d++; s++; l--; | ||
19 | }, | ||
20 | { /* A4OP */ | ||
21 | |||
22 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
23 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
24 | |||
25 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
26 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
27 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
28 | |||
29 | d0 = _mm_add_epi32(sc0, mul0); | ||
30 | |||
31 | _mm_store_si128((__m128i *)d, d0); | ||
32 | |||
33 | d += 4; s += 4; l -= 4; | ||
34 | }, | ||
35 | { /* A8OP */ | ||
36 | |||
37 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
38 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
39 | |||
40 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
41 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
42 | |||
43 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
44 | __m128i sc1 = mul4_sym_sse3(c_packed, s1); | ||
45 | |||
46 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
47 | __m128i a1 = sub4_alpha_sse3(sc1); | ||
48 | |||
49 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
50 | __m128i mul1 = mul_256_sse3(a1, d1); | ||
51 | |||
52 | d0 = _mm_add_epi32(sc0, mul0); | ||
53 | d1 = _mm_add_epi32(sc1, mul1); | ||
54 | |||
55 | _mm_store_si128((__m128i *)d, d0); | ||
56 | _mm_store_si128((__m128i *)(d+4), d1); | ||
57 | |||
58 | d += 8; s += 8; l -= 8; | ||
59 | }) | ||
60 | } | ||
61 | |||
62 | static void | ||
63 | _op_blend_pan_c_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
64 | |||
65 | DATA32 c_a = c & 0xFF000000; | ||
66 | DATA32 alpha = 256 - (c >> 24); | ||
67 | |||
68 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
69 | const __m128i c_alpha = _mm_set_epi32(c_a, c_a, c_a, c_a); | ||
70 | const __m128i a0 = _mm_set_epi32(alpha, alpha, alpha, alpha); | ||
71 | |||
72 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
73 | { /* UOP */ | ||
74 | |||
75 | *d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d); | ||
76 | d++; s++; l--; | ||
77 | }, | ||
78 | { /* A4OP */ | ||
79 | |||
80 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
81 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
82 | |||
83 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
84 | mul_256_sse3(a0, d0)); | ||
85 | |||
86 | r0 = _mm_add_epi32(r0, c_alpha); | ||
87 | |||
88 | _mm_store_si128((__m128i *)d, r0); | ||
89 | |||
90 | d += 4; s += 4; l -= 4; | ||
91 | }, | ||
92 | { /* A8OP */ | ||
93 | |||
94 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
95 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
96 | |||
97 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
98 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
99 | |||
100 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
101 | mul_256_sse3(a0, d0)); | ||
102 | |||
103 | __m128i r1 = _mm_add_epi32(mul3_sym_sse3(c_packed, s1), | ||
104 | mul_256_sse3(a0, d1)); | ||
105 | |||
106 | r0 = _mm_add_epi32(r0, c_alpha); | ||
107 | r1 = _mm_add_epi32(r1, c_alpha); | ||
108 | |||
109 | _mm_store_si128((__m128i *)d, r0); | ||
110 | _mm_store_si128((__m128i *)(d+4), r1); | ||
111 | |||
112 | d += 8; s += 8; l -= 8; | ||
113 | }) | ||
114 | } | ||
115 | |||
116 | static void | ||
117 | _op_blend_p_can_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
118 | |||
119 | int alpha; | ||
120 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
121 | |||
122 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
123 | { /* UOP */ | ||
124 | |||
125 | alpha = 256 - (*s >> 24); | ||
126 | *d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d); | ||
127 | d++; s++; l--; | ||
128 | }, | ||
129 | { /* A4OP */ | ||
130 | |||
131 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
132 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
133 | |||
134 | __m128i a0 = sub4_alpha_sse3(s0); | ||
135 | |||
136 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
137 | mul_256_sse3(a0, d0)); | ||
138 | |||
139 | r0 = _mm_add_epi32(r0, _mm_and_si128(s0, A_MASK_SSE3)); | ||
140 | |||
141 | _mm_store_si128((__m128i *)d, r0); | ||
142 | |||
143 | d += 4; s += 4; l -= 4; | ||
144 | }, | ||
145 | { | ||
146 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
147 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
148 | |||
149 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
150 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
151 | |||
152 | __m128i a0 = sub4_alpha_sse3(s0); | ||
153 | __m128i a1 = sub4_alpha_sse3(s1); | ||
154 | |||
155 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
156 | mul_256_sse3(a0, d0)); | ||
157 | |||
158 | __m128i r1 = _mm_add_epi32(mul3_sym_sse3(c_packed, s1), | ||
159 | mul_256_sse3(a1, d1)); | ||
160 | |||
161 | r0 = _mm_add_epi32(r0, _mm_and_si128(s0, A_MASK_SSE3)); | ||
162 | r1 = _mm_add_epi32(r1, _mm_and_si128(s1, A_MASK_SSE3)); | ||
163 | |||
164 | _mm_store_si128((__m128i *)d, r0); | ||
165 | _mm_store_si128((__m128i *)(d+4), r1); | ||
166 | |||
167 | d += 8; s += 8; l -= 8; | ||
168 | }) | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | _op_blend_pan_can_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
173 | |||
174 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
175 | |||
176 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
177 | { /* UOP */ | ||
178 | |||
179 | *d++ = 0xff000000 + MUL3_SYM(c, *s); | ||
180 | s++; l--; | ||
181 | }, | ||
182 | { /* A4OP */ | ||
183 | |||
184 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
185 | |||
186 | __m128i r0 = mul3_sym_sse3(c_packed, s0); | ||
187 | r0 = _mm_add_epi32(r0, A_MASK_SSE3); | ||
188 | |||
189 | _mm_store_si128((__m128i *)d, r0); | ||
190 | |||
191 | d += 4; s += 4; l -= 4; | ||
192 | }, | ||
193 | { /* A8OP */ | ||
194 | |||
195 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
196 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
197 | |||
198 | __m128i r0 = mul3_sym_sse3(c_packed, s0); | ||
199 | __m128i r1 = mul3_sym_sse3(c_packed, s1); | ||
200 | |||
201 | r0 = _mm_add_epi32(r0, A_MASK_SSE3); | ||
202 | r1 = _mm_add_epi32(r1, A_MASK_SSE3); | ||
203 | |||
204 | _mm_store_si128((__m128i *)d, r0); | ||
205 | _mm_store_si128((__m128i *)(d+4), r1); | ||
206 | |||
207 | d += 8; s += 8; l -= 8; | ||
208 | }) | ||
209 | } | ||
210 | |||
211 | static void | ||
212 | _op_blend_p_caa_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
213 | |||
214 | int alpha; | ||
215 | c = 1 + (c & 0xff); | ||
216 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
217 | |||
218 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
219 | { /* UOP */ | ||
220 | |||
221 | DATA32 sc = MUL_256(c, *s); | ||
222 | alpha = 256 - (sc >> 24); | ||
223 | *d = sc + MUL_256(alpha, *d); | ||
224 | d++; | ||
225 | s++; | ||
226 | l--; | ||
227 | }, | ||
228 | { /* A4OP */ | ||
229 | |||
230 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
231 | __m128i d0 = _mm_load_si128 ((__m128i *)d); | ||
232 | |||
233 | __m128i sc0 = mul_256_sse3(c_packed, s0); | ||
234 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
235 | |||
236 | __m128i r0 = _mm_add_epi32(mul_256_sse3(a0, d0), sc0); | ||
237 | |||
238 | _mm_store_si128((__m128i *)d, r0); | ||
239 | |||
240 | d += 4; s += 4; l -= 4; | ||
241 | }, | ||
242 | { | ||
243 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
244 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
245 | |||
246 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
247 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
248 | |||
249 | __m128i sc0 = mul_256_sse3(c_packed, s0); | ||
250 | __m128i sc1 = mul_256_sse3(c_packed, s1); | ||
251 | |||
252 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
253 | __m128i a1 = sub4_alpha_sse3(sc1); | ||
254 | |||
255 | __m128i r0 = _mm_add_epi32(mul_256_sse3(a0, d0), sc0); | ||
256 | __m128i r1 = _mm_add_epi32(mul_256_sse3(a1, d1), sc1); | ||
257 | |||
258 | _mm_store_si128((__m128i *)d, r0); | ||
259 | _mm_store_si128((__m128i *)(d+4), r1); | ||
260 | |||
261 | d += 8; s += 8; l -= 8; | ||
262 | }) | ||
263 | } | ||
264 | |||
265 | static void | ||
266 | _op_blend_pan_caa_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
267 | |||
268 | c = 1 + (c & 0xff); | ||
269 | const __m128i c_packed = _mm_set_epi32(c, c, c,c); | ||
270 | |||
271 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
272 | { /* UOP */ | ||
273 | |||
274 | *d = INTERP_256(c, *s, *d); | ||
275 | d++; s++; l--; | ||
276 | }, | ||
277 | { /* A4OP */ | ||
278 | |||
279 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
280 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
281 | |||
282 | __m128i r0 = interp4_256_sse3(c_packed, s0, d0); | ||
283 | |||
284 | _mm_store_si128((__m128i *)d, r0); | ||
285 | |||
286 | d += 4; s += 4; l -= 4; | ||
287 | }, | ||
288 | { | ||
289 | |||
290 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
291 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
292 | |||
293 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
294 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
295 | |||
296 | __m128i r0 = interp4_256_sse3(c_packed, s0, d0); | ||
297 | __m128i r1 = interp4_256_sse3(c_packed, s1, d1); | ||
298 | |||
299 | _mm_store_si128((__m128i *)d, r0); | ||
300 | _mm_store_si128((__m128i *)(d+4), r1); | ||
301 | |||
302 | d += 8; s += 8; l -= 8; | ||
303 | }) | ||
304 | } | ||
305 | |||
306 | #define _op_blend_pas_c_dp_sse3 _op_blend_p_c_dp_sse3 | ||
307 | #define _op_blend_pas_can_dp_sse3 _op_blend_p_can_dp_sse3 | ||
308 | #define _op_blend_pas_caa_dp_sse3 _op_blend_p_caa_dp_sse3 | ||
309 | |||
310 | #define _op_blend_p_c_dpan_sse3 _op_blend_p_c_dp_sse3 | ||
311 | #define _op_blend_pas_c_dpan_sse3 _op_blend_pas_c_dp_sse3 | ||
312 | #define _op_blend_pan_c_dpan_sse3 _op_blend_pan_c_dp_sse3 | ||
313 | #define _op_blend_p_can_dpan_sse3 _op_blend_p_can_dp_sse3 | ||
314 | #define _op_blend_pas_can_dpan_sse3 _op_blend_pas_can_dp_sse3 | ||
315 | #define _op_blend_pan_can_dpan_sse3 _op_blend_pan_can_dp_sse3 | ||
316 | #define _op_blend_p_caa_dpan_sse3 _op_blend_p_caa_dp_sse3 | ||
317 | #define _op_blend_pas_caa_dpan_sse3 _op_blend_pas_caa_dp_sse3 | ||
318 | #define _op_blend_pan_caa_dpan_sse3 _op_blend_pan_caa_dp_sse3 | ||
319 | |||
320 | static void | ||
321 | init_blend_pixel_color_span_funcs_sse3(void) | ||
322 | { | ||
323 | op_blend_span_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_p_c_dp_sse3; | ||
324 | op_blend_span_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_pas_c_dp_sse3; | ||
325 | op_blend_span_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_pan_c_dp_sse3; | ||
326 | op_blend_span_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_p_can_dp_sse3; | ||
327 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pas_can_dp_sse3; | ||
328 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pan_can_dp_sse3; | ||
329 | op_blend_span_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_p_caa_dp_sse3; | ||
330 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pas_caa_dp_sse3; | ||
331 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pan_caa_dp_sse3; | ||
332 | |||
333 | op_blend_span_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_p_c_dpan_sse3; | ||
334 | op_blend_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pas_c_dpan_sse3; | ||
335 | op_blend_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pan_c_dpan_sse3; | ||
336 | op_blend_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_p_can_dpan_sse3; | ||
337 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pas_can_dpan_sse3; | ||
338 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pan_can_dpan_sse3; | ||
339 | op_blend_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_p_caa_dpan_sse3; | ||
340 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pas_caa_dpan_sse3; | ||
341 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pan_caa_dpan_sse3; | ||
342 | } | ||
343 | |||
344 | #define _op_blend_pt_p_c_dp_sse3 NULL | ||
345 | |||
346 | #define _op_blend_pt_pas_c_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
347 | #define _op_blend_pt_pan_c_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
348 | #define _op_blend_pt_p_can_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
349 | #define _op_blend_pt_pas_can_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
350 | #define _op_blend_pt_pan_can_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
351 | #define _op_blend_pt_p_caa_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
352 | #define _op_blend_pt_pas_caa_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
353 | #define _op_blend_pt_pan_caa_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
354 | |||
355 | #define _op_blend_pt_p_c_dpan_sse3 _op_blend_pt_p_c_dp_sse3 | ||
356 | #define _op_blend_pt_pas_c_dpan_sse3 _op_blend_pt_pas_c_dp_sse3 | ||
357 | #define _op_blend_pt_pan_c_dpan_sse3 _op_blend_pt_pan_c_dp_sse3 | ||
358 | #define _op_blend_pt_p_can_dpan_sse3 _op_blend_pt_p_can_dp_sse3 | ||
359 | #define _op_blend_pt_pas_can_dpan_sse3 _op_blend_pt_pas_can_dp_sse3 | ||
360 | #define _op_blend_pt_pan_can_dpan_sse3 _op_blend_pt_pan_can_dp_sse3 | ||
361 | #define _op_blend_pt_p_caa_dpan_sse3 _op_blend_pt_p_caa_dp_sse3 | ||
362 | #define _op_blend_pt_pas_caa_dpan_sse3 _op_blend_pt_pas_caa_dp_sse3 | ||
363 | #define _op_blend_pt_pan_caa_dpan_sse3 _op_blend_pt_pan_caa_dp_sse3 | ||
364 | |||
365 | static void | ||
366 | init_blend_pixel_color_pt_funcs_sse3(void) | ||
367 | { | ||
368 | op_blend_pt_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_p_c_dp_sse3; | ||
369 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_pas_c_dp_sse3; | ||
370 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_pan_c_dp_sse3; | ||
371 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pt_p_can_dp_sse3; | ||
372 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pt_pas_can_dp_sse3; | ||
373 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pt_pan_can_dp_sse3; | ||
374 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_p_caa_dp_sse3; | ||
375 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_pas_caa_dp_sse3; | ||
376 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_pan_caa_dp_sse3; | ||
377 | |||
378 | op_blend_pt_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_p_c_dpan_sse3; | ||
379 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_pas_c_dpan_sse3; | ||
380 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_pan_c_dpan_sse3; | ||
381 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_p_can_dpan_sse3; | ||
382 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_pas_can_dpan_sse3; | ||
383 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_pan_can_dpan_sse3; | ||
384 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_p_caa_dpan_sse3; | ||
385 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_pas_caa_dpan_sse3; | ||
386 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_pan_caa_dpan_sse3; | ||
387 | } | ||
388 | |||
389 | /*-----*/ | ||
390 | |||
391 | /* blend_rel pixel x color -> dst */ | ||
392 | |||
393 | static void | ||
394 | _op_blend_rel_p_c_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
395 | |||
396 | int alpha; | ||
397 | |||
398 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
399 | |||
400 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
401 | { /* UOP */ | ||
402 | |||
403 | DATA32 sc = MUL4_SYM(c, *s); | ||
404 | alpha = 256 - (sc >> 24); | ||
405 | *d = MUL_SYM(*d >> 24, sc) + MUL_256(alpha, *d); | ||
406 | d++; s++; l--; | ||
407 | }, | ||
408 | { /* A4OP */ | ||
409 | |||
410 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
411 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
412 | |||
413 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
414 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
415 | |||
416 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), sc0); | ||
417 | __m128i r0 = mul_256_sse3(a0, d0); | ||
418 | |||
419 | r0 = _mm_add_epi32(l0, r0); | ||
420 | |||
421 | _mm_store_si128((__m128i *)d, r0); | ||
422 | |||
423 | d += 4; s += 4; l -= 4; | ||
424 | }, | ||
425 | { /* A8OP */ | ||
426 | |||
427 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
428 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
429 | |||
430 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
431 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
432 | |||
433 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
434 | __m128i sc1 = mul4_sym_sse3(c_packed, s1); | ||
435 | |||
436 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
437 | __m128i a1 = sub4_alpha_sse3(sc1); | ||
438 | |||
439 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), sc0); | ||
440 | __m128i r0 = mul_256_sse3(a0, d0); | ||
441 | |||
442 | __m128i l1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), sc1); | ||
443 | __m128i r1 = mul_256_sse3(a1, d1); | ||
444 | |||
445 | r0 = _mm_add_epi32(l0, r0); | ||
446 | r1 = _mm_add_epi32(l1, r1); | ||
447 | |||
448 | _mm_store_si128((__m128i *)d, r0); | ||
449 | _mm_store_si128((__m128i *)(d+4), r1); | ||
450 | |||
451 | d += 8; s += 8; l -= 8; | ||
452 | }) | ||
453 | } | ||
454 | |||
455 | #define _op_blend_rel_pas_c_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
456 | #define _op_blend_rel_pan_c_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
457 | #define _op_blend_rel_p_can_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
458 | #define _op_blend_rel_pas_can_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
459 | #define _op_blend_rel_pan_can_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
460 | #define _op_blend_rel_p_caa_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
461 | #define _op_blend_rel_pas_caa_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
462 | #define _op_blend_rel_pan_caa_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
463 | |||
464 | #define _op_blend_rel_p_c_dpan_sse3 _op_blend_p_c_dpan_sse3 | ||
465 | #define _op_blend_rel_pas_c_dpan_sse3 _op_blend_pas_c_dpan_sse3 | ||
466 | #define _op_blend_rel_pan_c_dpan_sse3 _op_blend_pan_c_dpan_sse3 | ||
467 | #define _op_blend_rel_p_can_dpan_sse3 _op_blend_p_can_dpan_sse3 | ||
468 | #define _op_blend_rel_pas_can_dpan_sse3 _op_blend_pas_can_dpan_sse3 | ||
469 | #define _op_blend_rel_pan_can_dpan_sse3 _op_blend_pan_can_dpan_sse3 | ||
470 | #define _op_blend_rel_p_caa_dpan_sse3 _op_blend_p_caa_dpan_sse3 | ||
471 | #define _op_blend_rel_pas_caa_dpan_sse3 _op_blend_pas_caa_dpan_sse3 | ||
472 | #define _op_blend_rel_pan_caa_dpan_sse3 _op_blend_pan_caa_dpan_sse3 | ||
473 | |||
474 | static void | ||
475 | init_blend_rel_pixel_color_span_funcs_sse3(void) | ||
476 | { | ||
477 | op_blend_rel_span_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_p_c_dp_sse3; | ||
478 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pas_c_dp_sse3; | ||
479 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pan_c_dp_sse3; | ||
480 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_p_can_dp_sse3; | ||
481 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pas_can_dp_sse3; | ||
482 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pan_can_dp_sse3; | ||
483 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_p_caa_dp_sse3; | ||
484 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pas_caa_dp_sse3; | ||
485 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pan_caa_dp_sse3; | ||
486 | |||
487 | op_blend_rel_span_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_p_c_dpan_sse3; | ||
488 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pas_c_dpan_sse3; | ||
489 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pan_c_dpan_sse3; | ||
490 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_p_can_dpan_sse3; | ||
491 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pas_can_dpan_sse3; | ||
492 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pan_can_dpan_sse3; | ||
493 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_p_caa_dpan_sse3; | ||
494 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pas_caa_dpan_sse3; | ||
495 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pan_caa_dpan_sse3; | ||
496 | } | ||
497 | |||
498 | #define _op_blend_rel_pt_p_c_dp_sse3 NULL | ||
499 | |||
500 | #define _op_blend_rel_pt_pas_c_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
501 | #define _op_blend_rel_pt_pan_c_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
502 | #define _op_blend_rel_pt_p_can_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
503 | #define _op_blend_rel_pt_pas_can_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
504 | #define _op_blend_rel_pt_pan_can_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
505 | #define _op_blend_rel_pt_p_caa_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
506 | #define _op_blend_rel_pt_pas_caa_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
507 | #define _op_blend_rel_pt_pan_caa_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
508 | |||
509 | #define _op_blend_rel_pt_p_c_dpan_sse3 _op_blend_pt_p_c_dpan_sse3 | ||
510 | #define _op_blend_rel_pt_pas_c_dpan_sse3 _op_blend_pt_pas_c_dpan_sse3 | ||
511 | #define _op_blend_rel_pt_pan_c_dpan_sse3 _op_blend_pt_pan_c_dpan_sse3 | ||
512 | #define _op_blend_rel_pt_p_can_dpan_sse3 _op_blend_pt_p_can_dpan_sse3 | ||
513 | #define _op_blend_rel_pt_pas_can_dpan_sse3 _op_blend_pt_pas_can_dpan_sse3 | ||
514 | #define _op_blend_rel_pt_pan_can_dpan_sse3 _op_blend_pt_pan_can_dpan_sse3 | ||
515 | #define _op_blend_rel_pt_p_caa_dpan_sse3 _op_blend_pt_p_caa_dpan_sse3 | ||
516 | #define _op_blend_rel_pt_pas_caa_dpan_sse3 _op_blend_pt_pas_caa_dpan_sse3 | ||
517 | #define _op_blend_rel_pt_pan_caa_dpan_sse3 _op_blend_pt_pan_caa_dpan_sse3 | ||
518 | |||
519 | static void | ||
520 | init_blend_rel_pixel_color_pt_funcs_sse3(void) | ||
521 | { | ||
522 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_p_c_dp_sse3; | ||
523 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_pas_c_dp_sse3; | ||
524 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_pan_c_dp_sse3; | ||
525 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_p_can_dp_sse3; | ||
526 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_pas_can_dp_sse3; | ||
527 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_pan_can_dp_sse3; | ||
528 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_p_caa_dp_sse3; | ||
529 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_pas_caa_dp_sse3; | ||
530 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_pan_caa_dp_sse3; | ||
531 | |||
532 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_c_dpan_sse3; | ||
533 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_c_dpan_sse3; | ||
534 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_c_dpan_sse3; | ||
535 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_can_dpan_sse3; | ||
536 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_can_dpan_sse3; | ||
537 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_can_dpan_sse3; | ||
538 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_caa_dpan_sse3; | ||
539 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_caa_dpan_sse3; | ||
540 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_caa_dpan_sse3; | ||
541 | } | ||
542 | |||
543 | #endif | ||