diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c | 158 |
1 files changed, 0 insertions, 158 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c b/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c deleted file mode 100644 index 5b8bd60..0000000 --- a/libraries/evas/src/lib/engines/common/evas_op_copy/op_copy_pixel_neon.c +++ /dev/null | |||
@@ -1,158 +0,0 @@ | |||
1 | /* copy pixel --> dst */ | ||
2 | |||
3 | #ifdef BUILD_NEON | ||
4 | static void | ||
5 | _op_copy_p_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
6 | //#define USENEON 1 | ||
7 | #ifndef USENEON | ||
8 | memcpy(d, s, l * sizeof(DATA32)); | ||
9 | return; | ||
10 | #else | ||
11 | DATA32 *e; | ||
12 | e = d + l - 23; | ||
13 | if (e > d) | ||
14 | { | ||
15 | int dl; | ||
16 | |||
17 | asm volatile | ||
18 | (".fpu neon \n\t" | ||
19 | "_op_copy_p_dp_neon_asmloop: \n\t" | ||
20 | "pld [%[s], #192] \n\t" // preload 256 bytes ahead | ||
21 | "pld [%[s], #320] \n\t" // preload 320 bytes ahead | ||
22 | "vld1.32 {d0-d3}, [%[s]]! \n\t" // load 256bits (32 bytes 8 pix), 32bit aligned | ||
23 | "vld1.32 {d4-d7} , [%[s]]! \n\t" // load 256bits (32 bytes 8 pix), 32bit aligned | ||
24 | "vld1.32 {d8-d11}, [%[s]]! \n\t" // load 256bits (32 bytes 8 pix), 32bit aligned | ||
25 | "vst1.32 {d0-d3}, [%[d]]! \n\t" // store 256bits (32 bytes 8 pix), 32bit aligned | ||
26 | "vst1.32 {d4-d7}, [%[d]]! \n\t" // store 256bits (32 bytes 8 pix), 32bit aligned | ||
27 | "vst1.32 {d8-d11}, [%[d]]! \n\t" // store 256bits (32 bytes 8 pix), 32bit aligned | ||
28 | "cmp %[e], %[d] \n\t" // compare current and end ptr | ||
29 | "bgt _op_copy_p_dp_neon_asmloop \n\t" | ||
30 | : /*out*/ | ||
31 | : /*in */ [s] "r" (s), [e] "r" (e), [d] "r" (d) | ||
32 | : /*clobber*/ | ||
33 | "q0", "q1", "q2","q3", "q4", "q5", "q6", | ||
34 | "d0", "d1", "d2", "d3", | ||
35 | "d4", "d5", "d6", "d7", | ||
36 | "d8", "d9", "d10", "d11", | ||
37 | "memory" // clobbered | ||
38 | ); | ||
39 | dl = l % 24; // dl is how many pixels at end that is not a multiple of 24 | ||
40 | l = l - dl; // jump to there at the end of the run? | ||
41 | s = s + l; | ||
42 | d = d + l; | ||
43 | } | ||
44 | e += 23; | ||
45 | for (;d < e; d++, s++) *d = *s; | ||
46 | #endif | ||
47 | } | ||
48 | |||
49 | #define _op_copy_pan_dp_neon _op_copy_p_dp_neon | ||
50 | #define _op_copy_pas_dp_neon _op_copy_p_dp_neon | ||
51 | |||
52 | #define _op_copy_p_dpan_neon _op_copy_p_dp_neon | ||
53 | #define _op_copy_pan_dpan_neon _op_copy_pan_dp_neon | ||
54 | #define _op_copy_pas_dpan_neon _op_copy_pas_dp_neon | ||
55 | |||
56 | static void | ||
57 | init_copy_pixel_span_funcs_neon(void) | ||
58 | { | ||
59 | op_copy_span_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_p_dp_neon; | ||
60 | op_copy_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pan_dp_neon; | ||
61 | op_copy_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pas_dp_neon; | ||
62 | |||
63 | op_copy_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_p_dpan_neon; | ||
64 | op_copy_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pan_dpan_neon; | ||
65 | op_copy_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pas_dpan_neon; | ||
66 | } | ||
67 | #endif | ||
68 | |||
69 | #ifdef BUILD_NEON | ||
70 | static void | ||
71 | _op_copy_pt_p_dp_neon(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) { | ||
72 | *d = s; | ||
73 | } | ||
74 | |||
75 | #define _op_copy_pt_pan_dp_neon _op_copy_pt_p_dp_neon | ||
76 | #define _op_copy_pt_pas_dp_neon _op_copy_pt_p_dp_neon | ||
77 | |||
78 | #define _op_copy_pt_p_dpan_neon _op_copy_pt_p_dp_neon | ||
79 | #define _op_copy_pt_pan_dpan_neon _op_copy_pt_pan_dp_neon | ||
80 | #define _op_copy_pt_pas_dpan_neon _op_copy_pt_pas_dp_neon | ||
81 | |||
82 | static void | ||
83 | init_copy_pixel_pt_funcs_neon(void) | ||
84 | { | ||
85 | op_copy_pt_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_p_dp_neon; | ||
86 | op_copy_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_pan_dp_neon; | ||
87 | op_copy_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_pt_pas_dp_neon; | ||
88 | |||
89 | op_copy_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_p_dpan_neon; | ||
90 | op_copy_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_pan_dpan_neon; | ||
91 | op_copy_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_pt_pas_dpan_neon; | ||
92 | } | ||
93 | #endif | ||
94 | |||
95 | /*-----*/ | ||
96 | |||
97 | /* copy_rel pixel --> dst */ | ||
98 | |||
99 | #ifdef BUILD_NEON | ||
100 | static void | ||
101 | _op_copy_rel_p_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
102 | // FIXME: neon-it | ||
103 | DATA32 *e; | ||
104 | UNROLL8_PLD_WHILE(d, l, e, | ||
105 | { | ||
106 | *d = MUL_SYM(*d >> 24, c); | ||
107 | d++; | ||
108 | }); | ||
109 | } | ||
110 | |||
111 | |||
112 | #define _op_copy_rel_pas_dp_neon _op_copy_rel_p_dp_neon | ||
113 | #define _op_copy_rel_pan_dp_neon _op_copy_rel_p_dp_neon | ||
114 | |||
115 | #define _op_copy_rel_p_dpan_neon _op_copy_p_dpan_neon | ||
116 | #define _op_copy_rel_pan_dpan_neon _op_copy_pan_dpan_neon | ||
117 | #define _op_copy_rel_pas_dpan_neon _op_copy_pas_dpan_neon | ||
118 | |||
119 | static void | ||
120 | init_copy_rel_pixel_span_funcs_neon(void) | ||
121 | { | ||
122 | op_copy_rel_span_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_p_dp_neon; | ||
123 | op_copy_rel_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pan_dp_neon; | ||
124 | op_copy_rel_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pas_dp_neon; | ||
125 | |||
126 | op_copy_rel_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_p_dpan_neon; | ||
127 | op_copy_rel_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pan_dpan_neon; | ||
128 | op_copy_rel_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pas_dpan_neon; | ||
129 | } | ||
130 | #endif | ||
131 | |||
132 | #ifdef BUILD_NEON | ||
133 | static void | ||
134 | _op_copy_rel_pt_p_dp_neon(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
135 | s = 1 + (*d >> 24); | ||
136 | *d = MUL_256(s, c); | ||
137 | } | ||
138 | |||
139 | |||
140 | #define _op_copy_rel_pt_pan_dp_neon _op_copy_rel_pt_p_dp_neon | ||
141 | #define _op_copy_rel_pt_pas_dp_neon _op_copy_rel_pt_p_dp_neon | ||
142 | |||
143 | #define _op_copy_rel_pt_p_dpan_neon _op_copy_pt_p_dpan_neon | ||
144 | #define _op_copy_rel_pt_pan_dpan_neon _op_copy_pt_pan_dpan_neon | ||
145 | #define _op_copy_rel_pt_pas_dpan_neon _op_copy_pt_pas_dpan_neon | ||
146 | |||
147 | static void | ||
148 | init_copy_rel_pixel_pt_funcs_neon(void) | ||
149 | { | ||
150 | op_copy_rel_pt_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_p_dp_neon; | ||
151 | op_copy_rel_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_pan_dp_neon; | ||
152 | op_copy_rel_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_copy_rel_pt_pas_dp_neon; | ||
153 | |||
154 | op_copy_rel_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_p_dpan_neon; | ||
155 | op_copy_rel_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_pan_dpan_neon; | ||
156 | op_copy_rel_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_copy_rel_pt_pas_dpan_neon; | ||
157 | } | ||
158 | #endif | ||