diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/evas/src/lib/include/evas_blend_ops.h | 378 |
1 files changed, 0 insertions, 378 deletions
diff --git a/libraries/evas/src/lib/include/evas_blend_ops.h b/libraries/evas/src/lib/include/evas_blend_ops.h deleted file mode 100644 index 6bd1f16..0000000 --- a/libraries/evas/src/lib/include/evas_blend_ops.h +++ /dev/null | |||
@@ -1,378 +0,0 @@ | |||
1 | #ifndef EVAS_BLEND_OPS_H | ||
2 | #define EVAS_BLEND_OPS_H | ||
3 | |||
4 | #if defined BUILD_MMX || defined BUILD_SSE | ||
5 | #include "evas_mmx.h" | ||
6 | #endif | ||
7 | |||
8 | #include "config.h" | ||
9 | |||
10 | #ifdef NEED_SSE3 | ||
11 | # if defined BUILD_SSE3 | ||
12 | # include <immintrin.h> | ||
13 | # endif | ||
14 | #endif | ||
15 | |||
16 | /* src pixel flags: */ | ||
17 | |||
18 | /* pixels none */ | ||
19 | #define SP_N 0 | ||
20 | /* pixels (argb default) */ | ||
21 | #define SP 1 | ||
22 | /* pixels are rgb (ie. alphas == 255) */ | ||
23 | #define SP_AN 2 | ||
24 | /* pixels alpha are sparse */ | ||
25 | #define SP_AS 3 | ||
26 | /* src pixels flags count */ | ||
27 | #define SP_LAST 4 | ||
28 | |||
29 | /* src mask flags: */ | ||
30 | |||
31 | /* mask none */ | ||
32 | #define SM_N 0 | ||
33 | /* mask (alpha) */ | ||
34 | #define SM 1 | ||
35 | /* mask alphas are 'trivial - ie. only 0 or 255 */ | ||
36 | #define SM_AT 2 | ||
37 | /* mask alphas are sparse */ | ||
38 | #define SM_AS 3 | ||
39 | /* src mask flags count */ | ||
40 | #define SM_LAST 4 | ||
41 | |||
42 | /* src color flags: */ | ||
43 | |||
44 | /* color is 0xffffffff */ | ||
45 | #define SC_N 0 | ||
46 | /* color (argb default) */ | ||
47 | #define SC 1 | ||
48 | /* color is rgb (ie. 0xffrrggbb) */ | ||
49 | #define SC_AN 2 | ||
50 | /* color is 'alpha' (ie. 0xaaaaaaaa) */ | ||
51 | #define SC_AA 3 | ||
52 | /* src color flags count */ | ||
53 | #define SC_LAST 4 | ||
54 | |||
55 | /* dst pixels flags: */ | ||
56 | |||
57 | /* pixels (argb default) */ | ||
58 | #define DP 0 | ||
59 | /* pixels are rgb (ie. alphas == 255) */ | ||
60 | #define DP_AN 1 | ||
61 | /* dst pixels flags count */ | ||
62 | #define DP_LAST 2 | ||
63 | |||
64 | /* cpu types flags */ | ||
65 | |||
66 | /* none, bad news */ | ||
67 | #define CPU_N 0 | ||
68 | /* cpu C */ | ||
69 | #define CPU_C 1 | ||
70 | /* cpu MMX */ | ||
71 | #define CPU_MMX 2 | ||
72 | /* cpu SSE */ | ||
73 | #define CPU_SSE 3 | ||
74 | /* cpu SSE2 */ | ||
75 | #define CPU_SSE2 4 | ||
76 | /* cpu flags count */ | ||
77 | #define CPU_NEON 5 | ||
78 | /* CPU SSE3 */ | ||
79 | #define CPU_SSE3 6 | ||
80 | /* cpu flags count */ | ||
81 | #define CPU_LAST 7 | ||
82 | |||
83 | |||
84 | /* some useful constants */ | ||
85 | |||
86 | extern const DATA32 ALPHA_255; | ||
87 | extern const DATA32 ALPHA_256; | ||
88 | |||
89 | /* some useful C macros */ | ||
90 | |||
91 | #define MUL4_256(a, r, g, b, c) \ | ||
92 | ( (((((c) >> 8) & 0xff0000) * (a)) & 0xff000000) + \ | ||
93 | (((((c) & 0xff0000) * (r)) >> 8) & 0xff0000) + \ | ||
94 | (((((c) & 0xff00) * (g)) >> 8) & 0xff00) + \ | ||
95 | ((((c) & 0xff) * (b)) >> 8) ) | ||
96 | |||
97 | #define MUL3_256(r, g, b, c) \ | ||
98 | ( (((((c) & 0xff0000) * (r)) >> 8) & 0xff0000) + \ | ||
99 | (((((c) & 0xff00) * (g)) >> 8) & 0xff00) + \ | ||
100 | ((((c) & 0xff) * (b)) >> 8) ) | ||
101 | |||
102 | #define MUL_256(a, c) \ | ||
103 | ( (((((c) >> 8) & 0x00ff00ff) * (a)) & 0xff00ff00) + \ | ||
104 | (((((c) & 0x00ff00ff) * (a)) >> 8) & 0x00ff00ff) ) | ||
105 | |||
106 | #define MUL4_SYM(x, y) \ | ||
107 | ( ((((((x) >> 16) & 0xff00) * (((y) >> 16) & 0xff00)) + 0xff0000) & 0xff000000) + \ | ||
108 | ((((((x) >> 8) & 0xff00) * (((y) >> 16) & 0xff)) + 0xff00) & 0xff0000) + \ | ||
109 | ((((((x) & 0xff00) * ((y) & 0xff00)) + 0xff00) >> 16) & 0xff00) + \ | ||
110 | (((((x) & 0xff) * ((y) & 0xff)) + 0xff) >> 8) ) | ||
111 | |||
112 | #define MUL3_SYM(x, y) \ | ||
113 | ( ((((((x) >> 8) & 0xff00) * (((y) >> 16) & 0xff)) + 0xff00) & 0xff0000) + \ | ||
114 | ((((((x) & 0xff00) * ((y) & 0xff00)) + 0xff00) >> 16) & 0xff00) + \ | ||
115 | (((((x) & 0xff) * ((y) & 0xff)) + 0xff) >> 8) ) | ||
116 | |||
117 | #define MUL_SYM(a, x) \ | ||
118 | ( (((((x) >> 8) & 0x00ff00ff) * (a) + 0xff00ff) & 0xff00ff00) + \ | ||
119 | (((((x) & 0x00ff00ff) * (a) + 0xff00ff) >> 8) & 0x00ff00ff) ) | ||
120 | |||
121 | #define MUL_A_256(a, c) \ | ||
122 | ( ((((c) >> 8) & 0x00ff0000) * (a)) & 0xff000000 ) | ||
123 | |||
124 | #define MUL_A_SYM(a, c) \ | ||
125 | ( (((((c) >> 8) & 0x00ff0000) * (a)) + 0x00ff0000) & 0xff000000 ) | ||
126 | |||
127 | #define INTERP_256(a, c0, c1) \ | ||
128 | ( (((((((c0) >> 8) & 0xff00ff) - (((c1) >> 8) & 0xff00ff)) * (a)) \ | ||
129 | + ((c1) & 0xff00ff00)) & 0xff00ff00) + \ | ||
130 | (((((((c0) & 0xff00ff) - ((c1) & 0xff00ff)) * (a)) >> 8) \ | ||
131 | + ((c1) & 0xff00ff)) & 0xff00ff) ) | ||
132 | |||
133 | #define INTERP_RGB_256(a, c0, c1) \ | ||
134 | ( (((((((c0) >> 8) & 0xff) - (((c1) >> 8) & 0xff)) * (a)) \ | ||
135 | + ((c1) & 0xff00)) & 0xff00) + \ | ||
136 | (((((((c0) & 0xff00ff) - ((c1) & 0xff00ff)) * (a)) >> 8) \ | ||
137 | + ((c1) & 0xff00ff)) & 0xff00ff) ) | ||
138 | |||
139 | #define INTERP_A_256(a, c0, c1) \ | ||
140 | ( (((((((c0) >> 8) & 0xff0000) - (((c1) >> 8) & 0xff0000)) * (a)) \ | ||
141 | + ((c1) & 0xff000000)) & 0xff000000) ) | ||
142 | |||
143 | |||
144 | /* some useful MMX macros */ | ||
145 | |||
146 | #ifdef BUILD_MMX | ||
147 | #define MOV_A2R(a, mma) \ | ||
148 | movd_m2r(a, mma); \ | ||
149 | punpcklwd_r2r(mma, mma); \ | ||
150 | punpckldq_r2r(mma, mma); | ||
151 | |||
152 | #define MOV_P2R(c, mmc, mmz) \ | ||
153 | movd_m2r(c, mmc); \ | ||
154 | punpcklbw_r2r(mmz, mmc); | ||
155 | |||
156 | #define MOV_R2P(mmc, c, mmz) \ | ||
157 | packuswb_r2r(mmz, mmc); \ | ||
158 | movd_r2m(mmc, c); | ||
159 | |||
160 | #define MUL4_256_R2R(mmx, mmy) \ | ||
161 | pmullw_r2r(mmx, mmy); \ | ||
162 | psrlw_i2r(8, mmy); | ||
163 | |||
164 | #define MUL4_SYM_R2R(mmx, mmy, mm255) \ | ||
165 | pmullw_r2r(mmx, mmy); \ | ||
166 | paddw_r2r(mm255, mmy); \ | ||
167 | psrlw_i2r(8, mmy); | ||
168 | |||
169 | #define MOV_RA2R(mmx, mma) \ | ||
170 | movq_r2r(mmx, mma); \ | ||
171 | punpckhwd_r2r(mma, mma); \ | ||
172 | punpckhdq_r2r(mma, mma); | ||
173 | |||
174 | #define MOV_PA2R(c, mma) \ | ||
175 | movd_m2r(c, mma); \ | ||
176 | punpcklbw_r2r(mma, mma); \ | ||
177 | punpckhwd_r2r(mma, mma); \ | ||
178 | punpckhdq_r2r(mma, mma); | ||
179 | |||
180 | #define INTERP_256_R2R(mma, mmx, mmy, mm255) \ | ||
181 | psubw_r2r(mmy, mmx); \ | ||
182 | pmullw_r2r(mma, mmx); \ | ||
183 | psrlw_i2r(8, mmx); \ | ||
184 | paddw_r2r(mmx, mmy); \ | ||
185 | pand_r2r(mm255, mmy); | ||
186 | |||
187 | #endif | ||
188 | |||
189 | |||
190 | /* some useful SSE3 inline functions */ | ||
191 | |||
192 | #ifdef NEED_SSE3 | ||
193 | #ifdef BUILD_SSE3 | ||
194 | |||
195 | static __m128i GA_MASK_SSE3; | ||
196 | static __m128i RB_MASK_SSE3; | ||
197 | static __m128i SYM4_MASK_SSE3; | ||
198 | static __m128i RGB_MASK_SSE3; | ||
199 | //static __m128i A_MASK_SSE3; | ||
200 | |||
201 | static __m128i ALPHA_SSE3; | ||
202 | |||
203 | static EFL_ALWAYS_INLINE __m128i | ||
204 | mul_256_sse3(__m128i a, __m128i c) { | ||
205 | |||
206 | /* prepare alpha for word multiplication */ | ||
207 | __m128i a_l = a; | ||
208 | __m128i a_h = a; | ||
209 | a_l = _mm_unpacklo_epi16(a_l, a_l); | ||
210 | a_h = _mm_unpackhi_epi16(a_h, a_h); | ||
211 | __m128i a0 = (__m128i) _mm_shuffle_ps( (__m128)a_l, (__m128)a_h, 0x88); | ||
212 | |||
213 | /* first half of calc */ | ||
214 | __m128i c0 = c; | ||
215 | c0 = _mm_srli_epi32(c0, 8); | ||
216 | c0 = _mm_and_si128(GA_MASK_SSE3, c0); | ||
217 | c0 = _mm_mullo_epi16(a0, c0); | ||
218 | c0 = _mm_and_si128(RB_MASK_SSE3, c0); | ||
219 | |||
220 | /* second half of calc */ | ||
221 | __m128i c1 = c; | ||
222 | c1 = _mm_and_si128(GA_MASK_SSE3, c1); | ||
223 | c1 = _mm_mullo_epi16(a0, c1); | ||
224 | c1 = _mm_srli_epi32(c1, 8); | ||
225 | c1 = _mm_and_si128(GA_MASK_SSE3, c1); | ||
226 | |||
227 | /* combine */ | ||
228 | return _mm_add_epi32(c0, c1); | ||
229 | } | ||
230 | |||
231 | static EFL_ALWAYS_INLINE __m128i | ||
232 | sub4_alpha_sse3(__m128i c) { | ||
233 | |||
234 | __m128i c0 = c; | ||
235 | |||
236 | c0 = _mm_srli_epi32(c0, 24); | ||
237 | return _mm_sub_epi32(ALPHA_SSE3, c0); | ||
238 | } | ||
239 | |||
240 | static EFL_ALWAYS_INLINE __m128i | ||
241 | interp4_256_sse3(__m128i a, __m128i c0, __m128i c1) | ||
242 | { | ||
243 | const __m128i zero = _mm_setzero_si128(); | ||
244 | |||
245 | __m128i a_l = a; | ||
246 | __m128i a_h = a; | ||
247 | a_l = _mm_unpacklo_epi16(a_l, a_l); | ||
248 | a_h = _mm_unpackhi_epi16(a_h, a_h); | ||
249 | |||
250 | __m128i a_t = _mm_slli_epi64(a_l, 32); | ||
251 | __m128i a_t0 = _mm_slli_epi64(a_h, 32); | ||
252 | |||
253 | a_l = _mm_add_epi32(a_l, a_t); | ||
254 | a_h = _mm_add_epi32(a_h, a_t0); | ||
255 | |||
256 | __m128i c0_l = c0; | ||
257 | __m128i c0_h = c0; | ||
258 | |||
259 | c0_l = _mm_unpacklo_epi8(c0_l, zero); | ||
260 | c0_h = _mm_unpackhi_epi8(c0_h, zero); | ||
261 | |||
262 | __m128i c1_l = c1; | ||
263 | __m128i c1_h = c1; | ||
264 | |||
265 | c1_l = _mm_unpacklo_epi8(c1_l, zero); | ||
266 | c1_h = _mm_unpackhi_epi8(c1_h, zero); | ||
267 | |||
268 | __m128i cl_sub = _mm_sub_epi16(c0_l, c1_l); | ||
269 | __m128i ch_sub = _mm_sub_epi16(c0_h, c1_h); | ||
270 | |||
271 | cl_sub = _mm_mullo_epi16(cl_sub, a_l); | ||
272 | ch_sub = _mm_mullo_epi16(ch_sub, a_h); | ||
273 | |||
274 | __m128i c1ls = _mm_slli_epi16(c1_l, 8); | ||
275 | __m128i c1hs = _mm_slli_epi16(c1_h, 8); | ||
276 | |||
277 | cl_sub = _mm_add_epi16(cl_sub, c1ls); | ||
278 | ch_sub = _mm_add_epi16(ch_sub, c1hs); | ||
279 | |||
280 | cl_sub = _mm_and_si128(cl_sub, RB_MASK_SSE3); | ||
281 | ch_sub = _mm_and_si128(ch_sub, RB_MASK_SSE3); | ||
282 | |||
283 | cl_sub = _mm_srli_epi64(cl_sub, 8); | ||
284 | ch_sub = _mm_srli_epi64(ch_sub, 8); | ||
285 | |||
286 | cl_sub = _mm_packus_epi16(cl_sub, cl_sub); | ||
287 | ch_sub = _mm_packus_epi16(ch_sub, ch_sub); | ||
288 | |||
289 | return (__m128i) _mm_shuffle_ps( (__m128)cl_sub, (__m128)ch_sub, 0x44); | ||
290 | } | ||
291 | |||
292 | static EFL_ALWAYS_INLINE __m128i | ||
293 | mul_sym_sse3(__m128i a, __m128i c) { | ||
294 | |||
295 | /* Prepare alpha for word mult */ | ||
296 | __m128i a_l = a; | ||
297 | __m128i a_h = a; | ||
298 | a_l = _mm_unpacklo_epi16(a_l, a_l); | ||
299 | a_h = _mm_unpackhi_epi16(a_h, a_h); | ||
300 | __m128i a0 = (__m128i) _mm_shuffle_ps( (__m128)a_l, (__m128)a_h, 0x88); | ||
301 | |||
302 | /* first part */ | ||
303 | __m128i c0 = c; | ||
304 | c0 = _mm_srli_epi32(c0, 8); | ||
305 | c0 = _mm_and_si128(GA_MASK_SSE3, c0); | ||
306 | c0 = _mm_mullo_epi16(a0, c0); | ||
307 | c0 = _mm_add_epi32(c0, GA_MASK_SSE3); | ||
308 | c0 = _mm_and_si128(RB_MASK_SSE3, c0); | ||
309 | |||
310 | /* second part */ | ||
311 | __m128i c1 = c; | ||
312 | c1 = _mm_and_si128(GA_MASK_SSE3, c1); | ||
313 | c1 = _mm_mullo_epi16(a0, c1); | ||
314 | c1 = _mm_add_epi32(c1, GA_MASK_SSE3); | ||
315 | c1 = _mm_srli_epi32(c1, 8); | ||
316 | c1 = _mm_and_si128(GA_MASK_SSE3, c1); | ||
317 | |||
318 | return _mm_add_epi32(c0, c1); | ||
319 | } | ||
320 | |||
321 | static EFL_ALWAYS_INLINE __m128i | ||
322 | mul4_sym_sse3(__m128i x, __m128i y) { | ||
323 | |||
324 | const __m128i zero = _mm_setzero_si128(); | ||
325 | |||
326 | __m128i x_l = _mm_unpacklo_epi8(x, zero); | ||
327 | __m128i x_h = _mm_unpackhi_epi8(x, zero); | ||
328 | |||
329 | __m128i y_l = _mm_unpacklo_epi8(y, zero); | ||
330 | __m128i y_h = _mm_unpackhi_epi8(y, zero); | ||
331 | |||
332 | __m128i r_l = _mm_mullo_epi16(x_l, y_l); | ||
333 | __m128i r_h = _mm_mullo_epi16(x_h, y_h); | ||
334 | |||
335 | r_l = _mm_add_epi16(r_l, SYM4_MASK_SSE3); | ||
336 | r_h = _mm_add_epi16(r_h, SYM4_MASK_SSE3); | ||
337 | |||
338 | r_l = _mm_srli_epi16(r_l, 8); | ||
339 | r_h = _mm_srli_epi16(r_h, 8); | ||
340 | |||
341 | return _mm_packus_epi16(r_l, r_h); | ||
342 | } | ||
343 | |||
344 | static EFL_ALWAYS_INLINE __m128i | ||
345 | mul3_sym_sse3(__m128i x, __m128i y) { | ||
346 | |||
347 | __m128i res = mul4_sym_sse3(x, y); | ||
348 | return _mm_and_si128(res, RGB_MASK_SSE3); | ||
349 | } | ||
350 | |||
351 | #define LOOP_ALIGNED_U1_A48_SSE3(DEST, LENGTH, UOP, A4OP, A8OP) \ | ||
352 | { \ | ||
353 | while((uintptr_t)DEST & 0xF && LENGTH) UOP \ | ||
354 | \ | ||
355 | while(LENGTH) { \ | ||
356 | switch(LENGTH) { \ | ||
357 | case 3: UOP \ | ||
358 | case 2: UOP \ | ||
359 | case 1: UOP \ | ||
360 | break; \ | ||
361 | case 7: \ | ||
362 | case 6: \ | ||
363 | case 5: \ | ||
364 | case 4: \ | ||
365 | A4OP \ | ||
366 | break; \ | ||
367 | default: \ | ||
368 | A8OP \ | ||
369 | break; \ | ||
370 | } \ | ||
371 | } \ | ||
372 | } | ||
373 | |||
374 | |||
375 | #endif | ||
376 | #endif | ||
377 | |||
378 | #endif | ||