aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c')
-rw-r--r--libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c570
1 files changed, 570 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c
new file mode 100644
index 0000000..6e35970
--- /dev/null
+++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c
@@ -0,0 +1,570 @@
1/* blend pixel x color --> dst */
2#ifdef BUILD_NEON
3/* Note: Optimisation is based on keeping _dest_ aligned: else it's a pair of
4 * reads, then two writes, a miss on read is 'just' two reads */
5static void
6_op_blend_p_c_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
7#define AP "blend_p_c_dp_"
8 asm volatile (
9 ".fpu neon \n\t"
10 // Load 'c'
11 "vdup.u32 q7, %[c] \n\t"
12 "vmov.i8 q6, #1 \n\t"
13
14 // Choose a loop
15 "andS %[tmp], %[d], $0xf \n\t"
16 "beq "AP"quadstart \n\t"
17
18 "andS %[tmp],%[d], $0x4 \n\t"
19 "beq "AP"dualloop \n\t"
20
21 AP"singleloop:"
22 "vld1.32 d0[0], [%[s]]! \n\t"
23 "vld1.32 d2[0], [%[d]] \n\t"
24 // Mulitply s * c (= sc)
25 "vmull.u8 q4, d0,d14 \n\t"
26 // sc in d8
27 "vqrshrn.u16 d4, q4, #8 \n\t"
28
29 // sca in d9
30 "vmvn.u32 d6, d4 \n\t"
31 "vshr.u32 d6, d6, #24 \n\t"
32
33 "vmul.u32 d6, d12, d6 \n\t"
34
35 /* d * alpha */
36 "vmull.u8 q4, d6, d2 \n\t"
37 "vqrshrn.u16 d0, q4, #8 \n\t"
38
39 "vqadd.u8 d2, d0, d4 \n\t"
40
41 // Save dsc + sc
42 "vst1.32 d2[0], [%[d]]! \n\t"
43
44 // Now where?
45 // Can we go the fast path?
46 "andS %[tmp], %[d],$0xf \n\t"
47 "beq "AP"quadstart \n\t"
48
49 AP"dualloop: \n\t"
50 // Check we have enough to bother with!
51 "sub %[tmp], %[e], %[d] \n\t"
52 "cmp %[tmp], #16 \n\t"
53 "blt "AP"loopout \n\t"
54
55 // load 's' -> q0, 'd' -> q1
56 "vldm %[s]!, {d0} \n\t"
57 "vldm %[d], {d2} \n\t"
58 // Mulitply s * c (= sc)
59 "vmull.u8 q4, d0,d14 \n\t"
60 // sc in d8
61 "vqrshrn.u16 d4, q4, #8 \n\t"
62
63 // sca in d9
64 "vmvn.u32 d6, d4 \n\t"
65 "vshr.u32 d6, d6, #24 \n\t"
66
67 "vmul.u32 d6, d12, d6 \n\t"
68
69 /* d * alpha */
70 "vmull.u8 q4, d6, d2 \n\t"
71 "vqrshrn.u16 d0, q4, #8 \n\t"
72
73 "vqadd.u8 d2, d0, d4 \n\t"
74
75 // Save dsc + sc
76 "vst1.32 d2, [%[d]]! \n\t"
77
78 AP"quadstart: \n\t"
79 "sub %[tmp], %[e], %[d] \n\t"
80 "cmp %[tmp], #16 \n\t"
81 "blt "AP"loopout \n\t"
82
83 "sub %[tmp], %[e], #15 \n\t"
84
85 AP"quadloop:\n\t"
86 // load 's' -> q0, 'd' -> q1
87 "vldm %[s]!, {d0,d1} \n\t"
88 "vldm %[d], {d2,d3} \n\t"
89 // Mulitply s * c (= sc)
90 "vmull.u8 q4, d0,d14 \n\t"
91 "vmull.u8 q5, d1,d14 \n\t"
92
93 // Get sc & sc alpha
94 "vqrshrn.u16 d4, q4, #8 \n\t"
95 "vqrshrn.u16 d5, q5, #8 \n\t"
96 // sc is now in q2, 8bpp
97 // Shift out, then spread alpha for q2
98 "vmvn.u32 q3, q2 \n\t"
99 "vshr.u32 q3, q3, $0x18 \n\t"
100 "vmul.u32 q3, q6,q3 \n\t"
101
102 // Multiply 'd' by sc.alpha (dsca)
103 "vmull.u8 q4, d6,d2 \n\t"
104 "vmull.u8 q5, d7,d3 \n\t"
105
106 "vqrshrn.u16 d0, q4, #8 \n\t"
107 "vqrshrn.u16 d1, q5, #8 \n\t"
108
109 "vqadd.u8 q1, q0, q2 \n\t"
110
111 // Save dsc + sc
112 "vstm %[d]!, {d2,d3} \n\t"
113
114 "cmp %[tmp], %[d] \n\t"
115
116 "bhi "AP"quadloop \n\t"
117
118 /* Trailing stuff */
119 AP"loopout: \n\t"
120
121 "cmp %[d], %[e] \n\t"
122 "beq "AP"done\n\t"
123 "sub %[tmp],%[e], %[d] \n\t"
124 "cmp %[tmp],$0x04 \n\t"
125 "beq "AP"singleloop2 \n\t"
126
127 "sub %[tmp], %[e], #7 \n\t"
128 /* Dual loop */
129 AP"dualloop2: \n\t"
130 "vldm %[s]!, {d0} \n\t"
131 "vldm %[d], {d2} \n\t"
132 // Mulitply s * c (= sc)
133 "vmull.u8 q4, d0,d14 \n\t"
134 // sc in d8
135 "vqrshrn.u16 d4, q4, #8 \n\t"
136
137 // sca in d9
138 // XXX: I can probably squash one of these 3
139 "vmvn.u32 d6, d4 \n\t"
140 "vshr.u32 d6, d6, #24 \n\t"
141 "vmul.u32 d6, d6, d12 \n\t"
142
143 /* d * alpha */
144 "vmull.u8 q4, d6, d2 \n\t"
145 "vqrshrn.u16 d0, q4, #8 \n\t"
146
147 "vqadd.u8 d2, d0, d4 \n\t"
148
149 // Save dsc + sc
150 "vstm %[d]!, {d2} \n\t"
151
152 "cmp %[tmp], %[d] \n\t"
153 "bhi "AP"dualloop2 \n\t"
154
155 "cmp %[d], %[e] \n\t"
156 "beq "AP"done \n\t"
157
158 AP"singleloop2: \n\t"
159 "vld1.32 d0[0], [%[s]]! \n\t"
160 "vld1.32 d2[0], [%[d]] \n\t"
161 // Mulitply s * c (= sc)
162 "vmull.u8 q4, d0,d14 \n\t"
163 // sc in d8
164 "vqrshrn.u16 d4, q4, #8 \n\t"
165
166 // sca in d6
167 "vmvn.u32 d6, d4 \n\t"
168 "vshr.u32 d6, d6, #24 \n\t"
169 "vmul.u32 d6, d12,d6 \n\t"
170
171 /* d * alpha */
172 "vmull.u8 q4, d6, d2 \n\t"
173 "vqrshrn.u16 d0, q4, #8 \n\t"
174
175 "vqadd.u8 d2, d0, d4 \n\t"
176
177 // Save dsc + sc
178 "vst1.32 d2[0], [%[d]]! \n\t"
179
180
181 AP"done:"
182 : // No output
183 //
184 : [s] "r" (s), [e] "r" (d + l), [d] "r" (d), [c] "r" (c),
185 [tmp] "r" (12)
186 : "q0","q1","q2","q3","q4","q5","q6","q7","memory"
187 );
188#undef AP
189}
190
191static void
192_op_blend_pan_can_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
193 DATA32 *e;
194 UNROLL8_PLD_WHILE(d, l, e,
195 {
196 *d++ = 0xff000000 + MUL3_SYM(c, *s);
197 s++;
198 });
199}
200
201static void
202_op_blend_pan_caa_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) {
203#if 1
204 DATA32 *e;
205 int alpha;
206 c = 1 + (c & 0xff);
207 UNROLL8_PLD_WHILE(d, l, e,
208 {
209 DATA32 sc = MUL_256(c, *s);
210 alpha = 256 - (sc >> 24);
211 *d = sc + MUL_256(alpha, *d);
212 d++;
213 s++;
214 });
215#else // the below neon is buggy!! misses rendering of spans, i think with alignment. quick - just disable this.
216#define AP "_op_blend_pan_caa_dp_"
217 DATA32 *e = d + l, *tmp = (void*)73;
218 asm volatile (
219 ".fpu neon \n\t"
220 /* Set up 'c' */
221 "vdup.u8 d14, %[c] \n\t"
222 "vmov.i8 d15, #1 \n\t"
223 "vaddl.u8 q15, d14, d15 \n\t"
224 "vshr.u8 q15,#1 \n\t"
225
226 // Pick a loop
227 "andS %[tmp], %[d], $0xf \n\t"
228 "beq "AP"quadstart \n\t"
229
230 "andS %[tmp], %[d], $0x4 \n\t"
231 "beq "AP"dualstart \n\t"
232
233 AP"singleloop: \n\t"
234 "vld1.32 d4[0], [%[d]] \n\t"
235 "vld1.32 d0[0], [%[s]]! \n\t"
236
237 // Long version of 'd'
238 "vmovl.u8 q8, d4 \n\t"
239
240 // Long version of 's'
241 "vmovl.u8 q6, d0 \n\t"
242
243 // d8 = s -d
244 "vsub.s16 d8, d12, d16 \n\t"
245
246 // Multiply
247 "vmul.s16 d8, d8, d30 \n\t"
248
249 // Shift down
250 "vshr.s16 d8, #7 \n\t"
251
252 // Add 'd'
253 "vqadd.s16 d8, d8, d16 \n\t"
254
255 // Shrink to save
256 "vqmovun.s16 d0, q4 \n\t"
257 "vst1.32 d0[0], [%[d]]! \n\t"
258
259 // Now where?
260 "andS %[tmp], %[d], $0xf \n\t"
261 "beq "AP"quadstart \n\t"
262
263 AP"dualstart: \n\t"
264 // Check we have enough
265 "sub %[tmp], %[e], %[d] \n\t"
266 "cmp %[tmp], #16 \n\t"
267 "blt "AP"loopout \n\t"
268
269 AP"dualloop:"
270 "vldm %[d], {d4} \n\t"
271 "vldm %[s]!, {d0} \n\t"
272
273 // Long version of d
274 "vmovl.u8 q8, d4 \n\t"
275
276 // Long version of s
277 "vmovl.u8 q6, d0 \n\t"
278
279 // q4/q5 = s-d
280 "vsub.s16 q4, q6, q8 \n\t"
281
282 // Multiply
283 "vmul.s16 q4, q4,q15 \n\t"
284
285 // Shift down
286 "vshr.s16 q4, #7 \n\t"
287
288 // Add d
289 "vqadd.s16 q4, q4, q8 \n\t"
290
291 // Shrink to save
292 "vqmovun.s16 d0, q4 \n\t"
293
294 "vstm %[d]!, {d0} \n\t"
295 AP"quadstart: \n\t"
296 "sub %[tmp], %[e], %[d] \n\t"
297 "cmp %[tmp], #16 \n\t"
298 "blt "AP"loopout \n\t"
299
300 "sub %[tmp], %[e], #15 \n\t"
301
302 AP"quadloop: \n\t"
303 // load 's' -> q0, 'd' -> q2
304 "vldm %[d], {d4,d5} \n\t"
305 "vldm %[s]!, {d0,d1} \n\t"
306
307 // Long version of d
308 "vmovl.u8 q8, d4 \n\t"
309 "vmovl.u8 q9, d5 \n\t"
310
311 // Long version of s
312 "vmovl.u8 q6, d0 \n\t"
313 "vmovl.u8 q7, d1 \n\t"
314
315 // q4/q5 = s-d
316 "vsub.s16 q4, q6, q8 \n\t"
317 "vsub.s16 q5, q7, q9 \n\t"
318
319 // Multiply
320 "vmul.s16 q4, q4,q15 \n\t"
321 "vmul.s16 q5, q5,q15 \n\t"
322
323 // Shift down
324 "vshr.s16 q4, #7 \n\t"
325 "vshr.s16 q5, #7 \n\t"
326
327 // Add d
328 "vqadd.s16 q4, q4, q8 \n\t"
329 "vqadd.s16 q5, q5, q9 \n\t"
330
331 // Shrink to save
332 "vqmovun.s16 d0, q4 \n\t"
333 "vqmovun.s16 d1, q5 \n\t"
334 "vstm %[d]!, {d0,d1} \n\t"
335 "cmp %[tmp], %[d] \n\t"
336
337 "bhi "AP"quadloop\n\t"
338
339
340 "b "AP"done\n\t"
341 AP"loopout: \n\t"
342 "cmp %[d], %[e] \n\t"
343 "beq "AP"done\n\t"
344 "sub %[tmp],%[e], %[d] \n\t"
345 "cmp %[tmp],$0x04 \n\t"
346 "beq "AP"singleloop2 \n\t"
347
348 AP"dualloop2: \n\t"
349 "vldm %[d], {d4} \n\t"
350 "vldm %[s]!, {d0} \n\t"
351
352 // Long version of d
353 "vmovl.u8 q8, d4 \n\t"
354
355 // Long version of s
356 "vmovl.u8 q6, d0 \n\t"
357
358 // q4/q5 = s-d
359 "vsub.s16 q4, q6, q8 \n\t"
360
361 // Multiply
362 "vmul.s16 q4, q4,q15 \n\t"
363
364 // Shift down
365 "vshr.s16 q4, #7 \n\t"
366
367 // Add d
368 "vqadd.s16 q4, q4, q8 \n\t"
369
370 // Shrink to save
371 "vqmovun.s16 d0, q4 \n\t"
372
373 "vstm %[d]!, {d0} \n\t"
374
375 "cmp %[d], %[e] \n\t"
376 "beq "AP"done \n\t"
377
378 AP"singleloop2: \n\t"
379 "vld1.32 d4[0], [%[d]] \n\t"
380 "vld1.32 d0[0], [%[s]]! \n\t"
381
382 // Long version of 'd'
383 "vmovl.u8 q8, d4 \n\t"
384
385 // Long version of 's'
386 "vmovl.u8 q6, d0 \n\t"
387
388 // d8 = s -d
389 "vsub.s16 d8, d12, d16 \n\t"
390
391 // Multiply
392 "vmul.s16 d8, d8, d30 \n\t"
393
394 // Shift down
395 "vshr.s16 d8, #7 \n\t"
396
397 // Add 'd'
398 "vqadd.s16 d8, d8, d16 \n\t"
399
400 // Shrink to save
401 "vqmovun.s16 d0, q4 \n\t"
402
403 "vst1.32 d0[0], [%[d]] \n\t"
404
405
406 AP"done: \n\t"
407
408 // No output
409 :
410 // Input
411 : [s] "r" (s), [d] "r" (d), [e] "r" (e), [c] "r" (c), [tmp] "r" (tmp)
412 // Clobbered
413 : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "memory"
414 );
415#undef AP
416#endif
417}
418
419#define _op_blend_pas_c_dp_neon _op_blend_p_c_dp_neon
420#define _op_blend_pan_c_dp_neon _op_blend_p_c_dp_neon
421#define _op_blend_p_can_dp_neon _op_blend_p_c_dp_neon
422#define _op_blend_pas_can_dp_neon _op_blend_p_c_dp_neon
423#define _op_blend_p_caa_dp_neon _op_blend_p_c_dp_neon
424#define _op_blend_pas_caa_dp_neon _op_blend_p_c_dp_neon
425
426#define _op_blend_p_c_dpan_neon _op_blend_p_c_dp_neon
427#define _op_blend_pas_c_dpan_neon _op_blend_pas_c_dp_neon
428#define _op_blend_pan_c_dpan_neon _op_blend_pan_c_dp_neon
429#define _op_blend_p_can_dpan_neon _op_blend_p_can_dp_neon
430#define _op_blend_pas_can_dpan_neon _op_blend_pas_can_dp_neon
431#define _op_blend_pan_can_dpan_neon _op_blend_pan_can_dp_neon
432#define _op_blend_p_caa_dpan_neon _op_blend_p_caa_dp_neon
433#define _op_blend_pas_caa_dpan_neon _op_blend_pas_caa_dp_neon
434#define _op_blend_pan_caa_dpan_neon _op_blend_pan_caa_dp_neon
435
436
437static void
438init_blend_pixel_color_span_funcs_neon(void)
439{
440 op_blend_span_funcs[SP][SM_N][SC][DP][CPU_NEON] = _op_blend_p_c_dp_neon;
441 op_blend_span_funcs[SP_AS][SM_N][SC][DP][CPU_NEON] = _op_blend_pas_c_dp_neon;
442 op_blend_span_funcs[SP_AN][SM_N][SC][DP][CPU_NEON] = _op_blend_pan_c_dp_neon;
443 op_blend_span_funcs[SP][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_p_can_dp_neon;
444 op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pas_can_dp_neon;
445 op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pan_can_dp_neon;
446 op_blend_span_funcs[SP][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_p_caa_dp_neon;
447 op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pas_caa_dp_neon;
448 op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pan_caa_dp_neon;
449
450 op_blend_span_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_p_c_dpan_neon;
451 op_blend_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pas_c_dpan_neon;
452 op_blend_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pan_c_dpan_neon;
453 op_blend_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_p_can_dpan_neon;
454 op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pas_can_dpan_neon;
455 op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pan_can_dpan_neon;
456 op_blend_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_p_caa_dpan_neon;
457 op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pas_caa_dpan_neon;
458 op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pan_caa_dpan_neon;
459}
460#endif
461
462#ifdef BUILD_NEON
463static void
464_op_blend_pt_p_c_dp_neon(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) {
465 s = MUL4_SYM(c, s);
466 c = 256 - (s >> 24);
467 *d = s + MUL_256(c, *d);
468}
469
470#define _op_blend_pt_pas_c_dp_neon _op_blend_pt_p_c_dp_neon
471#define _op_blend_pt_pan_c_dp_neon _op_blend_pt_p_c_dp_neon
472#define _op_blend_pt_p_can_dp_neon _op_blend_pt_p_c_dp_neon
473#define _op_blend_pt_pas_can_dp_neon _op_blend_pt_p_c_dp_neon
474#define _op_blend_pt_pan_can_dp_neon _op_blend_pt_p_c_dp_neon
475#define _op_blend_pt_p_caa_dp_neon _op_blend_pt_p_c_dp_neon
476#define _op_blend_pt_pas_caa_dp_neon _op_blend_pt_p_c_dp_neon
477#define _op_blend_pt_pan_caa_dp_neon _op_blend_pt_p_c_dp_neon
478
479#define _op_blend_pt_p_c_dpan_neon _op_blend_pt_p_c_dp_neon
480#define _op_blend_pt_pas_c_dpan_neon _op_blend_pt_p_c_dp_neon
481#define _op_blend_pt_pan_c_dpan_neon _op_blend_pt_p_c_dp_neon
482#define _op_blend_pt_p_can_dpan_neon _op_blend_pt_p_c_dp_neon
483#define _op_blend_pt_pas_can_dpan_neon _op_blend_pt_p_c_dp_neon
484#define _op_blend_pt_pan_can_dpan_neon _op_blend_pt_p_c_dp_neon
485#define _op_blend_pt_p_caa_dpan_neon _op_blend_pt_p_c_dp_neon
486#define _op_blend_pt_pas_caa_dpan_neon _op_blend_pt_p_c_dp_neon
487#define _op_blend_pt_pan_caa_dpan_neon _op_blend_pt_p_c_dp_neon
488
489static void
490init_blend_pixel_color_pt_funcs_neon(void)
491{
492 op_blend_pt_funcs[SP][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_p_c_dp_neon;
493 op_blend_pt_funcs[SP_AS][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_pas_c_dp_neon;
494 op_blend_pt_funcs[SP_AN][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_pan_c_dp_neon;
495 op_blend_pt_funcs[SP][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pt_p_can_dp_neon;
496 op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pt_pas_can_dp_neon;
497 op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pt_pan_can_dp_neon;
498 op_blend_pt_funcs[SP][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_p_caa_dp_neon;
499 op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_pas_caa_dp_neon;
500 op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_pan_caa_dp_neon;
501
502 op_blend_pt_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_p_c_dpan_neon;
503 op_blend_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_pas_c_dpan_neon;
504 op_blend_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_pan_c_dpan_neon;
505 op_blend_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_p_can_dpan_neon;
506 op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_pas_can_dpan_neon;
507 op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_pan_can_dpan_neon;
508 op_blend_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_p_caa_dpan_neon;
509 op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_pas_caa_dpan_neon;
510 op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_pan_caa_dpan_neon;
511}
512#endif
513
514/*-----*/
515
516/* blend_rel pixel x color -> dst */
517
518#ifdef BUILD_NEON
519
520#define _op_blend_rel_p_c_dpan_neon _op_blend_p_c_dpan_neon
521#define _op_blend_rel_pas_c_dpan_neon _op_blend_pas_c_dpan_neon
522#define _op_blend_rel_pan_c_dpan_neon _op_blend_pan_c_dpan_neon
523#define _op_blend_rel_p_can_dpan_neon _op_blend_p_can_dpan_neon
524#define _op_blend_rel_pas_can_dpan_neon _op_blend_pas_can_dpan_neon
525#define _op_blend_rel_pan_can_dpan_neon _op_blend_pan_can_dpan_neon
526#define _op_blend_rel_p_caa_dpan_neon _op_blend_p_caa_dpan_neon
527#define _op_blend_rel_pas_caa_dpan_neon _op_blend_pas_caa_dpan_neon
528#define _op_blend_rel_pan_caa_dpan_neon _op_blend_pan_caa_dpan_neon
529
530static void
531init_blend_rel_pixel_color_span_funcs_neon(void)
532{
533 op_blend_rel_span_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_p_c_dpan_neon;
534 op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pas_c_dpan_neon;
535 op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pan_c_dpan_neon;
536 op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_p_can_dpan_neon;
537 op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pas_can_dpan_neon;
538 op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pan_can_dpan_neon;
539 op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_p_caa_dpan_neon;
540 op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pas_caa_dpan_neon;
541 op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pan_caa_dpan_neon;
542}
543#endif
544
545#ifdef BUILD_NEON
546
547#define _op_blend_rel_pt_p_c_dpan_neon _op_blend_pt_p_c_dpan_neon
548#define _op_blend_rel_pt_pas_c_dpan_neon _op_blend_pt_pas_c_dpan_neon
549#define _op_blend_rel_pt_pan_c_dpan_neon _op_blend_pt_pan_c_dpan_neon
550#define _op_blend_rel_pt_p_can_dpan_neon _op_blend_pt_p_can_dpan_neon
551#define _op_blend_rel_pt_pas_can_dpan_neon _op_blend_pt_pas_can_dpan_neon
552#define _op_blend_rel_pt_pan_can_dpan_neon _op_blend_pt_pan_can_dpan_neon
553#define _op_blend_rel_pt_p_caa_dpan_neon _op_blend_pt_p_caa_dpan_neon
554#define _op_blend_rel_pt_pas_caa_dpan_neon _op_blend_pt_pas_caa_dpan_neon
555#define _op_blend_rel_pt_pan_caa_dpan_neon _op_blend_pt_pan_caa_dpan_neon
556
557static void
558init_blend_rel_pixel_color_pt_funcs_neon(void)
559{
560 op_blend_rel_pt_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_c_dpan_neon;
561 op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_c_dpan_neon;
562 op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_c_dpan_neon;
563 op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_can_dpan_neon;
564 op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_can_dpan_neon;
565 op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_can_dpan_neon;
566 op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_caa_dpan_neon;
567 op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_caa_dpan_neon;
568 op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_caa_dpan_neon;
569}
570#endif