aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c562
1 files changed, 0 insertions, 562 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c
deleted file mode 100644
index f5eb480..0000000
--- a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c
+++ /dev/null
@@ -1,562 +0,0 @@
1#define NEONDEBUG 0
2
3
4#if NEONDEBUG
5#define DEBUG_FNCOUNT(x) \
6 do { \
7 static int _foo = 0; \
8 if (_foo++%10000 ==0) \
9 printf("%s %+d %s: %d (%s)\n",__FILE__,__LINE__,__FUNCTION__,\
10 _foo, x " optimised");\
11 } while (0)
12#else
13#define DEBUG_FNCOUNT(x) ((void)x)
14#endif
15
16
17/* blend mask x color -> dst */
18
19#ifdef BUILD_NEON
20static void
21_op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
22 DATA32 *e;
23
24 DEBUG_FNCOUNT("");
25
26#define AP "blend_mas_c_dp_"
27 asm volatile (
28 ".fpu neon \n\t"
29 " vdup.i32 q15, %[c] \n\t"
30 " vmov.i8 q14, #1 \n\t"
31
32 // If aligned already - straight to quads
33 " andS %[tmp], %[d],$0xf \n\t"
34 " beq "AP"quadloops \n\t"
35
36 " andS %[tmp], %[d],$0x4 \n\t"
37 " beq "AP"dualloop \n\t"
38
39 AP"singleloop: \n\t"
40 " vld1.8 d0[0], [%[m]]! \n\t"
41 " vld1.32 d4[0], [%[d]] \n\t"
42 " vdup.u8 d0, d0[0] \n\t"
43 " vmull.u8 q4, d0, d30 \n\t"
44 " vqrshrn.u16 d12, q4, #8 \n\t"
45 " vmvn.u16 d14, d12 \n\t"
46 " vshr.u32 d16, d14, #24 \n\t"
47 " vmul.u32 d16, d16, d28 \n\t"
48 " vmull.u8 q7, d16, d4 \n\t"
49 " vqrshrn.u16 d0, q7, #8 \n\t"
50 " vqadd.u8 d0, d0, d12 \n\t"
51 " vst1.32 d0[0], [%[d]]! \n\t"
52
53 // Can we go the fast path?
54 " andS %[tmp], %[d],$0xf \n\t"
55 " beq "AP"quadloops \n\t"
56
57 AP"dualloop: \n\t"
58 " sub %[tmp], %[e], %[d] \n\t"
59 " cmp %[tmp], #16 \n\t"
60 " blt "AP"loopout \n\t"
61
62 " vld1.16 d0[0], [%[m]]! \n\t"
63 " vldm %[d], {d4} \n\t"
64 " vmovl.u8 q0, d0 \n\t"
65 " vmovl.u8 q0, d0 \n\t"
66 " vmul.u32 q0, q14 \n\t"
67 " vmull.u8 q4, d0, d30 \n\t"
68 " vqrshrn.u16 d12, q4, #8 \n\t"
69 " vmvn.u16 d14, d12 \n\t"
70 " vshr.u32 d16, d14, #24 \n\t"
71 " vmul.u32 d16, d16, d28 \n\t"
72 " vmull.u8 q7, d16, d4 \n\t"
73 " vqrshrn.u16 d0, q7, #8 \n\t"
74 " vqadd.u8 q0, q0, q6 \n\t"
75 " vstm %[d]!, {d0} \n\t"
76
77 AP"quadloops: \n\t"
78 " sub %[tmp], %[e], %[d] \n\t"
79 " cmp %[tmp], #16 \n\t"
80 " blt "AP"loopout \n\t"
81
82
83 " sub %[tmp], %[e], #15 \n\t"
84
85 " sub %[d], #16 \n\t"
86 AP"fastloop:"
87 " add %[d], #16 \n\t"
88 " cmp %[tmp], %[d] \n\t"
89 " ble "AP"loopout \n\t"
90 AP"quadloopint: \n\t"
91 " ldr %[x], [%[m]] \n\t"
92 " add %[m], #4 \n\t"
93 " cmp %[x], #0 \n\t"
94 " beq "AP"fastloop \n\t"
95 " vmov.32 d0[0], %[x] \n\t"
96 " vldm %[d], {d4,d5} \n\t"
97
98 // Expand M: Fixme: Can we do this quicker?
99 " vmovl.u8 q0, d0 \n\t"
100 " vmovl.u8 q0, d0 \n\t"
101 " vmul.u32 q0, q14 \n\t"
102
103 // Multiply a * c
104 " vmull.u8 q4, d0, d30 \n\t"
105 " vmull.u8 q5, d1, d31 \n\t"
106
107 // Shorten
108 " vqrshrn.u16 d12, q4, #8 \n\t"
109 " vqrshrn.u16 d13, q5, #8 \n\t"
110
111 // extract negated alpha
112 " vmvn.u16 q7, q6 \n\t"
113 " vshr.u32 q8, q7, #24 \n\t"
114 " vmul.u32 q8, q8, q14 \n\t"
115
116 // Multiply
117 " vmull.u8 q7, d16, d4 \n\t"
118 " vmull.u8 q8, d17, d5 \n\t"
119
120 " vqrshrn.u16 d0, q7, #8 \n\t"
121 " vqrshrn.u16 d1, q8, #8 \n\t"
122
123 // Add
124 " vqadd.u8 q0, q0, q6 \n\t"
125
126 " vstm %[d]!, {d0,d1} \n\t"
127
128 " cmp %[tmp], %[d] \n\t"
129 " bhi "AP"quadloopint \n\t"
130
131 AP"loopout: \n\t"
132#if NEONDEBUG
133 "cmp %[d], %[e] \n\t"
134 "ble "AP"foo \n\t"
135 "cmp %[tmp], %[m] \n\t"
136 "sub %[x], %[x] \n\t"
137 "vst1.32 d0[0], [%[x]] \n\t"
138 AP"foo: \n\t"
139#endif
140
141 " cmp %[d], %[e] \n\t"
142 " beq "AP"done \n\t"
143 " sub %[tmp],%[e], %[d] \n\t"
144 " cmp %[tmp],#4 \n\t"
145 " beq "AP"singleout \n\t"
146
147 AP "dualloop2: \n\t"
148 "sub %[tmp],%[e],$0x8 \n\t"
149 " vld1.16 d0[0], [%[m]]! \n\t"
150 " vldm %[d], {d4} \n\t"
151 " vmovl.u8 q0, d0 \n\t"
152 " vmovl.u8 q0, d0 \n\t"
153 " vmul.u32 q0, q14 \n\t"
154 " vmull.u8 q4, d0, d30 \n\t"
155 " vqrshrn.u16 d12, q4, #8 \n\t"
156 " vmvn.u16 d14, d12 \n\t"
157 " vshr.u32 d16, d14, #24 \n\t"
158 " vmul.u32 d16, d16, d28 \n\t"
159 " vmull.u8 q7, d16, d4 \n\t"
160 " vqrshrn.u16 d0, q7, #8 \n\t"
161 " vqadd.u8 q0, q0, q6 \n\t"
162 " vstm %[d]!, {d0} \n\t"
163
164 " cmp %[e], %[d] \n\t"
165 " beq "AP"done \n\t"
166
167 AP"singleout: \n\t"
168 " vld1.8 d0[0], [%[m]]! \n\t"
169 " vld1.32 d4[0], [%[d]] \n\t"
170 " vdup.u8 d0, d0[0] \n\t"
171 " vmull.u8 q4, d0, d30 \n\t"
172 " vqrshrn.u16 d12, q4, #8 \n\t"
173 " vmvn.u16 d14, d12 \n\t"
174 " vshr.u32 d16, d14, #24 \n\t"
175 " vmul.u32 d16, d16, d28 \n\t"
176 " vmull.u8 q7, d16, d4 \n\t"
177 " vqrshrn.u16 d0, q7, #8 \n\t"
178 " vqadd.u8 q0, q0, q6 \n\t"
179 " vst1.32 d0[0], [%[d]]! \n\t"
180
181 AP"done: \n\t"
182#if NEONDEBUG
183 "cmp %[d], %[e] \n\t"
184 "beq "AP"reallydone \n\t"
185 "sub %[tmp], %[tmp] \n\t"
186 "vst1.32 d0[0], [%[tmp]] \n\t"
187 AP"reallydone:"
188#endif
189 : // Out
190 : [e] "r" (d + l), [d] "r" (d), [c] "r" (c),
191 [tmp] "r" (7), [m] "r" (m), [x] "r" (0)
192 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","q14","q15",
193 "memory" // clobbered
194 );
195#undef AP
196}
197#endif
198
199#ifdef BUILD_NEON
200static void
201_op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
202 DATA32 *e,*tmp;
203 int alpha;
204
205 DEBUG_FNCOUNT("");
206
207#define AP "_blend_mas_can_dp_neon_"
208 asm volatile (
209 ".fpu neon \n\t"
210 "vdup.u32 q9, %[c] \n\t"
211 "vmov.i8 q15, #1 \n\t"
212 "vmov.i8 q14, #0 \n\t"
213
214 // Make C 16 bit (C in q3/q2)
215 "vmovl.u8 q3, d19 \n\t"
216 "vmovl.u8 q2, d18 \n\t"
217
218 // Which loop to start
219 " andS %[tmp], %[d],$0xf \n\t"
220 " beq "AP"quadloop \n\t"
221
222 " andS %[tmp], %[d], #4 \n\t"
223 " beq "AP"dualstart \n\t"
224
225
226 AP"singleloop: \n\t"
227 " vld1.8 d0[0], [%[m]]! \n\t"
228 " vld1.32 d8[0], [%[d]] \n\t"
229 " vdup.u8 d0, d0[0] \n\t"
230 " vshr.u8 d0, d0, #1 \n\t"
231 " vmovl.u8 q0, d0 \n\t"
232 " vmovl.u8 q4, d8 \n\t"
233 " vsub.s16 q6, q2, q4 \n\t"
234 " vmul.s16 q6, q0 \n\t"
235 " vshr.s16 q6, #7 \n\t"
236 " vadd.s16 q6, q4 \n\t"
237 " vqmovun.s16 d2, q6 \n\t"
238 " vst1.32 d2[0], [%[d]]! \n\t"
239
240 " andS %[tmp], %[d], $0xf \n\t"
241 " beq "AP"quadloop \n\t"
242
243 AP"dualstart: \n\t"
244 " sub %[tmp], %[e], %[d] \n\t"
245 " cmp %[tmp], #16 \n\t"
246 " blt "AP"loopout \n\t"
247
248 AP"dualloop: \n\t"
249 " vld1.16 d0[0], [%[m]]! \n\t"
250 " vldm %[d], {d8} \n\t"
251 " vmovl.u8 q0, d0 \n\t"
252 " vmovl.u8 q0, d0 \n\t"
253 " vmul.u32 d0, d0, d30 \n\t"
254 " vshr.u8 d0, d0, #1 \n\t"
255 " vmovl.u8 q0, d0 \n\t"
256 " vmovl.u8 q4, d8 \n\t"
257 " vsub.s16 q6, q2, q4 \n\t"
258 " vmul.s16 q6, q0 \n\t"
259 " vshr.s16 q6, #7 \n\t"
260 " vadd.s16 q6, q4 \n\t"
261 " vqmovun.s16 d2, q6 \n\t"
262 " vstm %[d]!, {d2} \n\t"
263
264 AP"quadloop: \n\t"
265 " sub %[tmp], %[e], %[d] \n\t"
266 " cmp %[tmp], #16 \n\t"
267 " blt "AP"loopout \n\t"
268 " sub %[tmp], %[e], #15 \n\t"
269
270 " sub %[d], #16 \n\t"
271 AP"fastloop: \n\t"
272 " add %[d], #16 \n\t"
273 " cmp %[tmp], %[d] \n\t"
274 " blt "AP"loopout \n\t"
275
276 AP"quadloopint: \n\t"
277 // Load the mask: 4 bytes: It has d0/d1
278 " ldr %[x], [%[m]] \n\t"
279 " add %[m], #4 \n\t"
280
281 // Check for shortcuts
282 " cmp %[x], #0 \n\t"
283 " beq "AP"fastloop \n\t"
284
285 " cmp %[x], $0xffffffff \n\t"
286 " beq "AP"quadstore \n\t"
287
288 " vmov.32 d0[0], %[x] \n\t"
289 // Load d into d8/d9 q4
290 " vldm %[d], {d8,d9} \n\t"
291
292 // Get the alpha channel ready (m)
293 " vmovl.u8 q0, d0 \n\t"
294 " vmovl.u8 q0, d0 \n\t"
295 " vmul.u32 q0, q0,q15 \n\t"
296 // Lop a bit off to prevent overflow
297 " vshr.u8 q0, q0, #1 \n\t"
298
299 // Now make it 16 bit
300 " vmovl.u8 q1, d1 \n\t"
301 " vmovl.u8 q0, d0 \n\t"
302
303 // 16 bit 'd'
304 " vmovl.u8 q5, d9 \n\t"
305 " vmovl.u8 q4, d8 \n\t"
306
307 // Diff 'd' & 'c'
308 " vsub.s16 q7, q3, q5 \n\t"
309 " vsub.s16 q6, q2, q4 \n\t"
310
311 " vmul.s16 q7, q1 \n\t"
312 " vmul.s16 q6, q0 \n\t"
313
314 // Shift results a bit
315 " vshr.s16 q7, #7 \n\t"
316 " vshr.s16 q6, #7 \n\t"
317
318 // Add 'd'
319 " vadd.s16 q7, q5 \n\t"
320 " vadd.s16 q6, q4 \n\t"
321
322 // Make sure none are negative
323 " vqmovun.s16 d9, q7 \n\t"
324 " vqmovun.s16 d8, q6 \n\t"
325
326 " vstm %[d]!, {d8,d9} \n\t"
327
328 " cmp %[tmp], %[d] \n\t"
329 " bhi "AP"quadloopint \n\t"
330 " b "AP"loopout \n\t"
331
332 AP"quadstore: \n\t"
333 " vstm %[d]!, {d18,d19} \n\t"
334 " cmp %[tmp], %[d] \n\t"
335 " bhi "AP"quadloopint \n\t"
336
337 AP"loopout: \n\t"
338#if NEONDEBUG
339 "cmp %[d], %[e] \n\t"
340 "ble "AP"foo \n\t"
341 "sub %[tmp], %[tmp] \n\t"
342 "vst1.32 d0[0], [%[tmp]] \n\t"
343 AP"foo: \n\t"
344#endif
345
346 " cmp %[e], %[d] \n\t"
347 " beq "AP"done \n\t"
348
349 " sub %[tmp],%[e], %[d] \n\t"
350 " cmp %[tmp],#8 \n\t"
351
352 " blt "AP"onebyte \n\t"
353
354 // Load the mask: 2 bytes: It has d0
355 " vld1.16 d0[0], [%[m]]! \n\t"
356
357 // Load d into d8/d9 q4
358 " vldm %[d], {d8} \n\t"
359
360 // Get the alpha channel ready (m)
361 " vmovl.u8 q0, d0 \n\t"
362 " vmovl.u8 q0, d0 \n\t"
363 " vmul.u32 d0, d0, d30 \n\t"
364 // Lop a bit off to prevent overflow
365 " vshr.u8 d0, d0, #1 \n\t"
366
367 // Now make it 16 bit
368 " vmovl.u8 q0, d0 \n\t"
369
370 // 16 bit 'd'
371 " vmovl.u8 q4, d8 \n\t"
372
373 // Diff 'd' & 'c'
374 " vsub.s16 q6, q2, q4 \n\t"
375
376 " vmul.s16 q6, q0 \n\t"
377
378 // Shift results a bit
379 " vshr.s16 q6, #7 \n\t"
380
381 // Add 'd'
382 "vadd.s16 q6, q4 \n\t"
383
384 // Make sure none are negative
385 "vqmovun.s16 d2, q6 \n\t"
386
387 "vstm %[d]!, {d2} \n\t"
388
389 "cmp %[e], %[d] \n\t"
390 "beq "AP"done \n\t"
391
392 AP"onebyte: \n\t"
393 "vld1.8 d0[0], [%[m]]! \n\t"
394 "vld1.32 d8[0], [%[d]] \n\t"
395 "vdup.u8 d0, d0[0] \n\t"
396 "vshr.u8 d0, d0, #1 \n\t"
397 "vmovl.u8 q0, d0 \n\t"
398 "vmovl.u8 q4, d8 \n\t"
399 "vsub.s16 q6, q2, q4 \n\t"
400 "vmul.s16 q6, q0 \n\t"
401 "vshr.s16 q6, #7 \n\t"
402 "vadd.s16 q6, q4 \n\t"
403 "vqmovun.s16 d2, q6 \n\t"
404 "vst1.32 d2[0], [%[d]]! \n\t"
405
406
407 AP"done: \n\t"
408#if NEONDEBUG
409 "cmp %[d], %[e] \n\t"
410 "beq "AP"reallydone \n\t"
411 "sub %[m], %[m] \n\t"
412 "vst1.32 d0[0], [%[m]] \n\t"
413 AP"reallydone:"
414#endif
415
416
417 : // output regs
418 // Input
419 : [e] "r" (e = d + l), [d] "r" (d), [c] "r" (c),
420 [m] "r" (m), [tmp] "r" (7), [x] "r" (33)
421 : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q9","q14","q15",
422 "memory" // clobbered
423
424 );
425#undef AP
426}
427#endif
428
429#ifdef BUILD_NEON
430#define _op_blend_mas_cn_dp_neon _op_blend_mas_can_dp_neon
431#define _op_blend_mas_caa_dp_neon _op_blend_mas_c_dp_neon
432
433#define _op_blend_mas_c_dpan_neon _op_blend_mas_c_dp_neon
434#define _op_blend_mas_cn_dpan_neon _op_blend_mas_cn_dp_neon
435#define _op_blend_mas_can_dpan_neon _op_blend_mas_can_dp_neon
436#define _op_blend_mas_caa_dpan_neon _op_blend_mas_caa_dp_neon
437
438static void
439init_blend_mask_color_span_funcs_neon(void)
440{
441 op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_mas_c_dp_neon;
442 op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_mas_cn_dp_neon;
443 op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_mas_can_dp_neon;
444 op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_mas_caa_dp_neon;
445
446 op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_mas_c_dpan_neon;
447 op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_mas_cn_dpan_neon;
448 op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_mas_can_dpan_neon;
449 op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_mas_caa_dpan_neon;
450}
451#endif
452
453#ifdef BUILD_NEON
454static void
455_op_blend_pt_mas_c_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
456 s = MUL_SYM(m, c);
457 c = 256 - (s >> 24);
458 *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
459}
460
461
462#define _op_blend_pt_mas_cn_dp_neon _op_blend_pt_mas_c_dp_neon
463#define _op_blend_pt_mas_can_dp_neon _op_blend_pt_mas_c_dp_neon
464#define _op_blend_pt_mas_caa_dp_neon _op_blend_pt_mas_c_dp_neon
465
466#define _op_blend_pt_mas_c_dpan_neon _op_blend_pt_mas_c_dp_neon
467#define _op_blend_pt_mas_cn_dpan_neon _op_blend_pt_mas_cn_dp_neon
468#define _op_blend_pt_mas_can_dpan_neon _op_blend_pt_mas_can_dp_neon
469#define _op_blend_pt_mas_caa_dpan_neon _op_blend_pt_mas_caa_dp_neon
470
471static void
472init_blend_mask_color_pt_funcs_neon(void)
473{
474 op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_pt_mas_c_dp_neon;
475 op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pt_mas_cn_dp_neon;
476 op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_pt_mas_can_dp_neon;
477 op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_pt_mas_caa_dp_neon;
478
479 op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_pt_mas_c_dpan_neon;
480 op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_mas_cn_dpan_neon;
481 op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_mas_can_dpan_neon;
482 op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_mas_caa_dpan_neon;
483}
484#endif
485
486/*-----*/
487
488/* blend_rel mask x color -> dst */
489
490#ifdef BUILD_NEON
491static void
492_op_blend_rel_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) {
493 DATA32 *e;
494 int alpha;
495
496 DEBUG_FNCOUNT("not");
497
498 UNROLL8_PLD_WHILE(d, l, e,
499 {
500 DATA32 mc = MUL_SYM(*m, c);
501 alpha = 256 - (mc >> 24);
502 *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d);
503 d++;
504 m++;
505 });
506}
507
508#define _op_blend_rel_mas_cn_dp_neon _op_blend_rel_mas_c_dp_neon
509#define _op_blend_rel_mas_can_dp_neon _op_blend_rel_mas_c_dp_neon
510#define _op_blend_rel_mas_caa_dp_neon _op_blend_rel_mas_c_dp_neon
511
512#define _op_blend_rel_mas_c_dpan_neon _op_blend_mas_c_dpan_neon
513#define _op_blend_rel_mas_cn_dpan_neon _op_blend_mas_cn_dpan_neon
514#define _op_blend_rel_mas_can_dpan_neon _op_blend_mas_can_dpan_neon
515#define _op_blend_rel_mas_caa_dpan_neon _op_blend_mas_caa_dpan_neon
516
517static void
518init_blend_rel_mask_color_span_funcs_neon(void)
519{
520 op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_rel_mas_c_dp_neon;
521 op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_rel_mas_cn_dp_neon;
522 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_rel_mas_can_dp_neon;
523 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_rel_mas_caa_dp_neon;
524
525 op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_rel_mas_c_dpan_neon;
526 op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_mas_cn_dpan_neon;
527 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_mas_can_dpan_neon;
528 op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_mas_caa_dpan_neon;
529}
530#endif
531
532#ifdef BUILD_NEON
533static void
534_op_blend_rel_pt_mas_c_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) {
535 s = MUL_SYM(m, c);
536 c = 256 - (s >> 24);
537 *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d);
538}
539
540#define _op_blend_rel_pt_mas_cn_dp_neon _op_blend_rel_pt_mas_c_dp_neon
541#define _op_blend_rel_pt_mas_can_dp_neon _op_blend_rel_pt_mas_c_dp_neon
542#define _op_blend_rel_pt_mas_caa_dp_neon _op_blend_rel_pt_mas_c_dp_neon
543
544#define _op_blend_rel_pt_mas_c_dpan_neon _op_blend_pt_mas_c_dpan_neon
545#define _op_blend_rel_pt_mas_cn_dpan_neon _op_blend_pt_mas_cn_dpan_neon
546#define _op_blend_rel_pt_mas_can_dpan_neon _op_blend_pt_mas_can_dpan_neon
547#define _op_blend_rel_pt_mas_caa_dpan_neon _op_blend_pt_mas_caa_dpan_neon
548
549static void
550init_blend_rel_mask_color_pt_funcs_neon(void)
551{
552 op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_rel_pt_mas_c_dp_neon;
553 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_rel_pt_mas_cn_dp_neon;
554 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_rel_pt_mas_can_dp_neon;
555 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_rel_pt_mas_caa_dp_neon;
556
557 op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_c_dpan_neon;
558 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_cn_dpan_neon;
559 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_can_dpan_neon;
560 op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_caa_dpan_neon;
561}
562#endif