aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c166
1 files changed, 166 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c
new file mode 100644
index 0000000..64d5a86
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c
@@ -0,0 +1,166 @@
1/* blend color -> dst */
2
3#ifdef BUILD_SSE3
4
5static void
6_op_blend_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
7
8 DATA32 a = 256 - (c >> 24);
9
10 const __m128i c_packed = _mm_set_epi32(c, c, c, c);
11 const __m128i a_packed = _mm_set_epi32(a, a, a, a);
12
13 LOOP_ALIGNED_U1_A48_SSE3(d, l,
14 { /* UOP */
15
16 *d = c + MUL_256(a, *d);
17 d++; l--;
18 },
19 { /* A4OP */
20
21 __m128i d0 = _mm_load_si128((__m128i *)d);
22
23 d0 = mul_256_sse3(a_packed, d0);
24 d0 = _mm_add_epi32(d0, c_packed);
25
26 _mm_store_si128((__m128i *)d, d0);
27
28 d += 4; l -= 4;
29 },
30 { /* A8OP */
31
32 __m128i d0 = _mm_load_si128((__m128i *)d);
33 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
34
35 d0 = mul_256_sse3(a_packed, d0);
36 d1 = mul_256_sse3(a_packed, d1);
37
38 d0 = _mm_add_epi32(d0, c_packed);
39 d1 = _mm_add_epi32(d1, c_packed);
40
41 _mm_store_si128((__m128i *)d, d0);
42 _mm_store_si128((__m128i *)(d+4), d1);
43
44 d += 8; l -= 8;
45 })
46}
47
48#define _op_blend_caa_dp_sse3 _op_blend_c_dp_sse3
49
50#define _op_blend_c_dpan_sse3 _op_blend_c_dp_sse3
51#define _op_blend_caa_dpan_sse3 _op_blend_c_dpan_sse3
52
53static void
54init_blend_color_span_funcs_sse3(void)
55{
56 op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_c_dp_sse3;
57 op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_caa_dp_sse3;
58
59// FIXME: BUGGY BUGGY Core i5 750 (32bit), 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text and rectangle)
60// op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_c_dpan_sse3;
61 op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_caa_dpan_sse3;
62}
63
64#define _op_blend_pt_c_dp_sse3 NULL
65#define _op_blend_pt_caa_dp_sse3 _op_blend_pt_c_dp_sse3
66
67#define _op_blend_pt_c_dpan_sse3 _op_blend_pt_c_dp_sse3
68#define _op_blend_pt_caa_dpan_sse3 _op_blend_pt_c_dpan_sse3
69
70#define _op_blend_pt_c_dpas_sse3 _op_blend_pt_c_dp_sse3
71#define _op_blend_pt_caa_dpas_sse3 _op_blend_pt_c_dp_sse3
72
73static void
74init_blend_color_pt_funcs_sse3(void)
75{
76 op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_c_dp_sse3;
77 op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_caa_dp_sse3;
78
79 op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_c_dpan_sse3;
80 op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_caa_dpan_sse3;
81}
82
83
84/*-----*/
85
86/* blend_rel color -> dst */
87
88static void
89_op_blend_rel_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
90
91 int alpha = 256 - (c >> 24);
92
93 const __m128i c_packed = _mm_set_epi32(c, c, c, c);
94 const __m128i alpha_packed = _mm_set_epi32(alpha, alpha, alpha, alpha);
95
96 LOOP_ALIGNED_U1_A48_SSE3(d, l,
97 { /* UOP */
98
99 *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d);
100 d++; l--;
101 },
102 { /* A4OP */
103
104 __m128i d0 = _mm_load_si128((__m128i *)d);
105
106 __m128i mul0 = mul_256_sse3(alpha_packed, d0);
107 __m128i sym0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c_packed);
108
109 d0 = _mm_add_epi32(mul0, sym0);
110
111 _mm_store_si128((__m128i *)d, d0);
112
113 d += 4; l -= 4;
114 },
115 { /* A8OP */
116
117 __m128i d0 = _mm_load_si128((__m128i *)d);
118 __m128i d1 = _mm_load_si128((__m128i *)(d+4));
119
120 __m128i mul0 = mul_256_sse3(alpha_packed, d0);
121 __m128i mul1 = mul_256_sse3(alpha_packed, d1);
122
123 __m128i sym0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c_packed);
124 __m128i sym1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), c_packed);
125
126 d0 = _mm_add_epi32(mul0, sym0);
127 d1 = _mm_add_epi32(mul1, sym1);
128
129 _mm_store_si128((__m128i *)d, d0);
130 _mm_store_si128((__m128i *)(d+4), d1);
131
132 d += 8; l -= 8;
133 })
134}
135
136#define _op_blend_rel_caa_dp_sse3 _op_blend_rel_c_dp_sse3
137#define _op_blend_rel_c_dpan_sse3 _op_blend_c_dpan_sse3
138#define _op_blend_rel_caa_dpan_sse3 _op_blend_caa_dpan_sse3
139
140static void
141init_blend_rel_color_span_funcs_sse3(void)
142{
143 op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_c_dp_sse3;
144 op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_caa_dp_sse3;
145
146 op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_c_dpan_sse3;
147 op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_caa_dpan_sse3;
148}
149
150#define _op_blend_rel_pt_c_dp_sse3 NULL
151#define _op_blend_rel_pt_caa_dp_sse3 _op_blend_rel_pt_c_dp_sse3
152
153#define _op_blend_rel_pt_c_dpan_sse3 _op_blend_pt_c_dpan_sse3
154#define _op_blend_rel_pt_caa_dpan_sse3 _op_blend_pt_caa_dpan_sse3
155
156static void
157init_blend_rel_color_pt_funcs_sse3(void)
158{
159 op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_c_dp_sse3;
160 op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_caa_dp_sse3;
161
162 op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_c_dpan_sse3;
163 op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_caa_dpan_sse3;
164}
165
166#endif