diff options
Diffstat (limited to '')
24 files changed, 7090 insertions, 0 deletions
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/Makefile.am b/libraries/evas/src/lib/engines/common/evas_op_blend/Makefile.am new file mode 100644 index 0000000..84add38 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/Makefile.am | |||
@@ -0,0 +1,50 @@ | |||
1 | MAINTAINERCLEANFILES = Makefile.in | ||
2 | |||
3 | EXTRA_DIST = \ | ||
4 | op_blend_color_.c \ | ||
5 | op_blend_color_i386.c \ | ||
6 | op_blend_color_neon.c \ | ||
7 | op_blend_color_sse3.c \ | ||
8 | op_blend_mask_color_.c \ | ||
9 | op_blend_mask_color_i386.c \ | ||
10 | op_blend_mask_color_neon.c \ | ||
11 | op_blend_mask_color_sse3.c \ | ||
12 | op_blend_pixel_.c \ | ||
13 | op_blend_pixel_color_.c \ | ||
14 | op_blend_pixel_color_i386.c \ | ||
15 | op_blend_pixel_color_neon.c \ | ||
16 | op_blend_pixel_color_sse3.c \ | ||
17 | op_blend_pixel_i386.c \ | ||
18 | op_blend_pixel_mask_.c \ | ||
19 | op_blend_pixel_mask_i386.c \ | ||
20 | op_blend_pixel_mask_neon.c \ | ||
21 | op_blend_pixel_mask_sse3.c \ | ||
22 | op_blend_pixel_neon.c \ | ||
23 | op_blend_pixel_sse3.c | ||
24 | |||
25 | noinst_LTLIBRARIES = libevas_engine_common_op_blend_master_sse3.la | ||
26 | |||
27 | libevas_engine_common_op_blend_master_sse3_la_SOURCES = \ | ||
28 | op_blend_master_sse3.c | ||
29 | |||
30 | libevas_engine_common_op_blend_master_sse3_la_CFLAGS = \ | ||
31 | -I. \ | ||
32 | -I$(top_srcdir)/src/lib \ | ||
33 | -I$(top_srcdir)/src/lib/engines/common \ | ||
34 | -I$(top_srcdir)/src/lib/engines/common/evas_op_blend \ | ||
35 | -I$(top_srcdir)/src/lib/cserve \ | ||
36 | -I$(top_srcdir)/src/lib/include \ | ||
37 | -DPACKAGE_BIN_DIR=\"$(bindir)\" \ | ||
38 | -DPACKAGE_LIB_DIR=\"$(libdir)\" \ | ||
39 | -DPACKAGE_DATA_DIR=\"$(datadir)/$(PACKAGE)\" \ | ||
40 | @FREETYPE_CFLAGS@ @VALGRIND_CFLAGS@ \ | ||
41 | @PIXMAN_CFLAGS@ \ | ||
42 | @EET_CFLAGS@ @pthread_cflags@ \ | ||
43 | @WIN32_CFLAGS@ @EINA_CFLAGS@ \ | ||
44 | @FRIBIDI_CFLAGS@ @HARFBUZZ_CFLAGS@ \ | ||
45 | @PIXMAN_CFLAGS@ \ | ||
46 | @EVAS_SSE3_CFLAGS@ | ||
47 | |||
48 | libevas_engine_common_op_blend_master_sse3_la_DEPENENCIES = \ | ||
49 | $(top_builddir)/config.h | ||
50 | |||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/Makefile.in b/libraries/evas/src/lib/engines/common/evas_op_blend/Makefile.in new file mode 100644 index 0000000..e796ef2 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/Makefile.in | |||
@@ -0,0 +1,720 @@ | |||
1 | # Makefile.in generated by automake 1.11.1 from Makefile.am. | ||
2 | # @configure_input@ | ||
3 | |||
4 | # Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, | ||
5 | # 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, | ||
6 | # Inc. | ||
7 | # This Makefile.in is free software; the Free Software Foundation | ||
8 | # gives unlimited permission to copy and/or distribute it, | ||
9 | # with or without modifications, as long as this notice is preserved. | ||
10 | |||
11 | # This program is distributed in the hope that it will be useful, | ||
12 | # but WITHOUT ANY WARRANTY, to the extent permitted by law; without | ||
13 | # even the implied warranty of MERCHANTABILITY or FITNESS FOR A | ||
14 | # PARTICULAR PURPOSE. | ||
15 | |||
16 | @SET_MAKE@ | ||
17 | |||
18 | VPATH = @srcdir@ | ||
19 | pkgdatadir = $(datadir)/@PACKAGE@ | ||
20 | pkgincludedir = $(includedir)/@PACKAGE@ | ||
21 | pkglibdir = $(libdir)/@PACKAGE@ | ||
22 | pkglibexecdir = $(libexecdir)/@PACKAGE@ | ||
23 | am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd | ||
24 | install_sh_DATA = $(install_sh) -c -m 644 | ||
25 | install_sh_PROGRAM = $(install_sh) -c | ||
26 | install_sh_SCRIPT = $(install_sh) -c | ||
27 | INSTALL_HEADER = $(INSTALL_DATA) | ||
28 | transform = $(program_transform_name) | ||
29 | NORMAL_INSTALL = : | ||
30 | PRE_INSTALL = : | ||
31 | POST_INSTALL = : | ||
32 | NORMAL_UNINSTALL = : | ||
33 | PRE_UNINSTALL = : | ||
34 | POST_UNINSTALL = : | ||
35 | build_triplet = @build@ | ||
36 | host_triplet = @host@ | ||
37 | subdir = src/lib/engines/common/evas_op_blend | ||
38 | DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in | ||
39 | ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 | ||
40 | am__aclocal_m4_deps = $(top_srcdir)/m4/efl_attribute.m4 \ | ||
41 | $(top_srcdir)/m4/efl_coverage.m4 \ | ||
42 | $(top_srcdir)/m4/efl_doxygen.m4 \ | ||
43 | $(top_srcdir)/m4/efl_fnmatch.m4 \ | ||
44 | $(top_srcdir)/m4/efl_path_max.m4 $(top_srcdir)/m4/efl_tests.m4 \ | ||
45 | $(top_srcdir)/m4/evas_check_engine.m4 \ | ||
46 | $(top_srcdir)/m4/evas_check_loader.m4 \ | ||
47 | $(top_srcdir)/m4/evas_converter.m4 \ | ||
48 | $(top_srcdir)/m4/evas_dither.m4 \ | ||
49 | $(top_srcdir)/m4/evas_scaler.m4 $(top_srcdir)/m4/libtool.m4 \ | ||
50 | $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ | ||
51 | $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ | ||
52 | $(top_srcdir)/configure.ac | ||
53 | am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ | ||
54 | $(ACLOCAL_M4) | ||
55 | mkinstalldirs = $(install_sh) -d | ||
56 | CONFIG_HEADER = $(top_builddir)/config.h | ||
57 | CONFIG_CLEAN_FILES = | ||
58 | CONFIG_CLEAN_VPATH_FILES = | ||
59 | LTLIBRARIES = $(noinst_LTLIBRARIES) | ||
60 | libevas_engine_common_op_blend_master_sse3_la_LIBADD = | ||
61 | am_libevas_engine_common_op_blend_master_sse3_la_OBJECTS = libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.lo | ||
62 | libevas_engine_common_op_blend_master_sse3_la_OBJECTS = \ | ||
63 | $(am_libevas_engine_common_op_blend_master_sse3_la_OBJECTS) | ||
64 | AM_V_lt = $(am__v_lt_$(V)) | ||
65 | am__v_lt_ = $(am__v_lt_$(AM_DEFAULT_VERBOSITY)) | ||
66 | am__v_lt_0 = --silent | ||
67 | libevas_engine_common_op_blend_master_sse3_la_LINK = $(LIBTOOL) \ | ||
68 | $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \ | ||
69 | --mode=link $(CCLD) \ | ||
70 | $(libevas_engine_common_op_blend_master_sse3_la_CFLAGS) \ | ||
71 | $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ | ||
72 | DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) | ||
73 | depcomp = $(SHELL) $(top_srcdir)/depcomp | ||
74 | am__depfiles_maybe = depfiles | ||
75 | am__mv = mv -f | ||
76 | COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ | ||
77 | $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) | ||
78 | LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ | ||
79 | $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ | ||
80 | $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ | ||
81 | $(AM_CFLAGS) $(CFLAGS) | ||
82 | AM_V_CC = $(am__v_CC_$(V)) | ||
83 | am__v_CC_ = $(am__v_CC_$(AM_DEFAULT_VERBOSITY)) | ||
84 | am__v_CC_0 = @echo " CC " $@; | ||
85 | AM_V_at = $(am__v_at_$(V)) | ||
86 | am__v_at_ = $(am__v_at_$(AM_DEFAULT_VERBOSITY)) | ||
87 | am__v_at_0 = @ | ||
88 | CCLD = $(CC) | ||
89 | LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ | ||
90 | $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ | ||
91 | $(AM_LDFLAGS) $(LDFLAGS) -o $@ | ||
92 | AM_V_CCLD = $(am__v_CCLD_$(V)) | ||
93 | am__v_CCLD_ = $(am__v_CCLD_$(AM_DEFAULT_VERBOSITY)) | ||
94 | am__v_CCLD_0 = @echo " CCLD " $@; | ||
95 | AM_V_GEN = $(am__v_GEN_$(V)) | ||
96 | am__v_GEN_ = $(am__v_GEN_$(AM_DEFAULT_VERBOSITY)) | ||
97 | am__v_GEN_0 = @echo " GEN " $@; | ||
98 | SOURCES = $(libevas_engine_common_op_blend_master_sse3_la_SOURCES) | ||
99 | DIST_SOURCES = \ | ||
100 | $(libevas_engine_common_op_blend_master_sse3_la_SOURCES) | ||
101 | ETAGS = etags | ||
102 | CTAGS = ctags | ||
103 | DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) | ||
104 | ACLOCAL = @ACLOCAL@ | ||
105 | ALLOCA = @ALLOCA@ | ||
106 | AMTAR = @AMTAR@ | ||
107 | AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ | ||
108 | AR = @AR@ | ||
109 | AS = @AS@ | ||
110 | AUTOCONF = @AUTOCONF@ | ||
111 | AUTOHEADER = @AUTOHEADER@ | ||
112 | AUTOMAKE = @AUTOMAKE@ | ||
113 | AWK = @AWK@ | ||
114 | CC = @CC@ | ||
115 | CCDEPMODE = @CCDEPMODE@ | ||
116 | CFLAGS = @CFLAGS@ | ||
117 | CHECK_CFLAGS = @CHECK_CFLAGS@ | ||
118 | CHECK_LIBS = @CHECK_LIBS@ | ||
119 | CPP = @CPP@ | ||
120 | CPPFLAGS = @CPPFLAGS@ | ||
121 | CXX = @CXX@ | ||
122 | CXXCPP = @CXXCPP@ | ||
123 | CXXDEPMODE = @CXXDEPMODE@ | ||
124 | CXXFLAGS = @CXXFLAGS@ | ||
125 | CYGPATH_W = @CYGPATH_W@ | ||
126 | DEFS = @DEFS@ | ||
127 | DEPDIR = @DEPDIR@ | ||
128 | DIRECTFB_CFLAGS = @DIRECTFB_CFLAGS@ | ||
129 | DIRECTFB_LIBS = @DIRECTFB_LIBS@ | ||
130 | DLLTOOL = @DLLTOOL@ | ||
131 | DSYMUTIL = @DSYMUTIL@ | ||
132 | DUMPBIN = @DUMPBIN@ | ||
133 | ECHO_C = @ECHO_C@ | ||
134 | ECHO_N = @ECHO_N@ | ||
135 | ECHO_T = @ECHO_T@ | ||
136 | ECORE_EVAS_CFLAGS = @ECORE_EVAS_CFLAGS@ | ||
137 | ECORE_EVAS_LIBS = @ECORE_EVAS_LIBS@ | ||
138 | EDB_CFLAGS = @EDB_CFLAGS@ | ||
139 | EDB_LIBS = @EDB_LIBS@ | ||
140 | EDJE_CFLAGS = @EDJE_CFLAGS@ | ||
141 | EDJE_LIBS = @EDJE_LIBS@ | ||
142 | EET_CFLAGS = @EET_CFLAGS@ | ||
143 | EET_LIBS = @EET_LIBS@ | ||
144 | EFL_COVERAGE_CFLAGS = @EFL_COVERAGE_CFLAGS@ | ||
145 | EFL_COVERAGE_LIBS = @EFL_COVERAGE_LIBS@ | ||
146 | EFL_FNMATCH_LIBS = @EFL_FNMATCH_LIBS@ | ||
147 | EGREP = @EGREP@ | ||
148 | EINA_CFLAGS = @EINA_CFLAGS@ | ||
149 | EINA_LIBS = @EINA_LIBS@ | ||
150 | EVAS_CFLAGS = @EVAS_CFLAGS@ | ||
151 | EVAS_LIBS = @EVAS_LIBS@ | ||
152 | EVAS_SSE3_CFLAGS = @EVAS_SSE3_CFLAGS@ | ||
153 | EVIL_CFLAGS = @EVIL_CFLAGS@ | ||
154 | EVIL_LIBS = @EVIL_LIBS@ | ||
155 | EXEEXT = @EXEEXT@ | ||
156 | FGREP = @FGREP@ | ||
157 | FONTCONFIG_CFLAGS = @FONTCONFIG_CFLAGS@ | ||
158 | FONTCONFIG_LIBS = @FONTCONFIG_LIBS@ | ||
159 | FREETYPE_CFLAGS = @FREETYPE_CFLAGS@ | ||
160 | FREETYPE_LIBS = @FREETYPE_LIBS@ | ||
161 | FRIBIDI_CFLAGS = @FRIBIDI_CFLAGS@ | ||
162 | FRIBIDI_LIBS = @FRIBIDI_LIBS@ | ||
163 | GL_EET_CFLAGS = @GL_EET_CFLAGS@ | ||
164 | GL_EET_LIBS = @GL_EET_LIBS@ | ||
165 | GREP = @GREP@ | ||
166 | HARFBUZZ_CFLAGS = @HARFBUZZ_CFLAGS@ | ||
167 | HARFBUZZ_LIBS = @HARFBUZZ_LIBS@ | ||
168 | INSTALL = @INSTALL@ | ||
169 | INSTALL_DATA = @INSTALL_DATA@ | ||
170 | INSTALL_PROGRAM = @INSTALL_PROGRAM@ | ||
171 | INSTALL_SCRIPT = @INSTALL_SCRIPT@ | ||
172 | INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ | ||
173 | LD = @LD@ | ||
174 | LDFLAGS = @LDFLAGS@ | ||
175 | LIBOBJS = @LIBOBJS@ | ||
176 | LIBS = @LIBS@ | ||
177 | LIBTOOL = @LIBTOOL@ | ||
178 | LINEBREAK_CFLAGS = @LINEBREAK_CFLAGS@ | ||
179 | LINEBREAK_LIBS = @LINEBREAK_LIBS@ | ||
180 | LIPO = @LIPO@ | ||
181 | LN_S = @LN_S@ | ||
182 | LTLIBOBJS = @LTLIBOBJS@ | ||
183 | MAKEINFO = @MAKEINFO@ | ||
184 | MKDIR_P = @MKDIR_P@ | ||
185 | MODULE_ARCH = @MODULE_ARCH@ | ||
186 | NM = @NM@ | ||
187 | NMEDIT = @NMEDIT@ | ||
188 | OBJC = @OBJC@ | ||
189 | OBJCDEPMODE = @OBJCDEPMODE@ | ||
190 | OBJCFLAGS = @OBJCFLAGS@ | ||
191 | OBJDUMP = @OBJDUMP@ | ||
192 | OBJEXT = @OBJEXT@ | ||
193 | OTOOL = @OTOOL@ | ||
194 | OTOOL64 = @OTOOL64@ | ||
195 | PACKAGE = @PACKAGE@ | ||
196 | PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ | ||
197 | PACKAGE_NAME = @PACKAGE_NAME@ | ||
198 | PACKAGE_STRING = @PACKAGE_STRING@ | ||
199 | PACKAGE_TARNAME = @PACKAGE_TARNAME@ | ||
200 | PACKAGE_URL = @PACKAGE_URL@ | ||
201 | PACKAGE_VERSION = @PACKAGE_VERSION@ | ||
202 | PATH_SEPARATOR = @PATH_SEPARATOR@ | ||
203 | PIXMAN_CFLAGS = @PIXMAN_CFLAGS@ | ||
204 | PIXMAN_LIBS = @PIXMAN_LIBS@ | ||
205 | PKG_CONFIG = @PKG_CONFIG@ | ||
206 | PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ | ||
207 | PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ | ||
208 | PNG_CFLAGS = @PNG_CFLAGS@ | ||
209 | PNG_LIBS = @PNG_LIBS@ | ||
210 | RANLIB = @RANLIB@ | ||
211 | SDL_CFLAGS = @SDL_CFLAGS@ | ||
212 | SDL_LIBS = @SDL_LIBS@ | ||
213 | SED = @SED@ | ||
214 | SET_MAKE = @SET_MAKE@ | ||
215 | SHELL = @SHELL@ | ||
216 | SHM_OPEN_LINK = @SHM_OPEN_LINK@ | ||
217 | STRIP = @STRIP@ | ||
218 | SVG_CFLAGS = @SVG_CFLAGS@ | ||
219 | SVG_LIBS = @SVG_LIBS@ | ||
220 | VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ | ||
221 | VALGRIND_LIBS = @VALGRIND_LIBS@ | ||
222 | VERSION = @VERSION@ | ||
223 | VMAJ = @VMAJ@ | ||
224 | WIN32_CFLAGS = @WIN32_CFLAGS@ | ||
225 | WIN32_CPPFLAGS = @WIN32_CPPFLAGS@ | ||
226 | XCB_CFLAGS = @XCB_CFLAGS@ | ||
227 | XCB_GL_CFLAGS = @XCB_GL_CFLAGS@ | ||
228 | XCB_GL_LIBS = @XCB_GL_LIBS@ | ||
229 | XCB_LIBS = @XCB_LIBS@ | ||
230 | XEXT_CFLAGS = @XEXT_CFLAGS@ | ||
231 | XEXT_LIBS = @XEXT_LIBS@ | ||
232 | XMKMF = @XMKMF@ | ||
233 | X_CFLAGS = @X_CFLAGS@ | ||
234 | X_EXTRA_LIBS = @X_EXTRA_LIBS@ | ||
235 | X_LIBS = @X_LIBS@ | ||
236 | X_PRE_LIBS = @X_PRE_LIBS@ | ||
237 | abs_builddir = @abs_builddir@ | ||
238 | abs_srcdir = @abs_srcdir@ | ||
239 | abs_top_builddir = @abs_top_builddir@ | ||
240 | abs_top_srcdir = @abs_top_srcdir@ | ||
241 | ac_ct_CC = @ac_ct_CC@ | ||
242 | ac_ct_CXX = @ac_ct_CXX@ | ||
243 | ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ | ||
244 | ac_ct_OBJC = @ac_ct_OBJC@ | ||
245 | altivec_cflags = @altivec_cflags@ | ||
246 | am__include = @am__include@ | ||
247 | am__leading_dot = @am__leading_dot@ | ||
248 | am__quote = @am__quote@ | ||
249 | am__tar = @am__tar@ | ||
250 | am__untar = @am__untar@ | ||
251 | bindir = @bindir@ | ||
252 | build = @build@ | ||
253 | build_alias = @build_alias@ | ||
254 | build_cpu = @build_cpu@ | ||
255 | build_os = @build_os@ | ||
256 | build_vendor = @build_vendor@ | ||
257 | builddir = @builddir@ | ||
258 | datadir = @datadir@ | ||
259 | datarootdir = @datarootdir@ | ||
260 | dlopen_libs = @dlopen_libs@ | ||
261 | docdir = @docdir@ | ||
262 | dvidir = @dvidir@ | ||
263 | edje_cc = @edje_cc@ | ||
264 | efl_doxygen = @efl_doxygen@ | ||
265 | efl_have_doxygen = @efl_have_doxygen@ | ||
266 | evas_engine_buffer_cflags = @evas_engine_buffer_cflags@ | ||
267 | evas_engine_buffer_libs = @evas_engine_buffer_libs@ | ||
268 | evas_engine_direct3d_cflags = @evas_engine_direct3d_cflags@ | ||
269 | evas_engine_direct3d_libs = @evas_engine_direct3d_libs@ | ||
270 | evas_engine_directfb_cflags = @evas_engine_directfb_cflags@ | ||
271 | evas_engine_directfb_libs = @evas_engine_directfb_libs@ | ||
272 | evas_engine_fb_cflags = @evas_engine_fb_cflags@ | ||
273 | evas_engine_fb_libs = @evas_engine_fb_libs@ | ||
274 | evas_engine_gl_cocoa_cflags = @evas_engine_gl_cocoa_cflags@ | ||
275 | evas_engine_gl_cocoa_libs = @evas_engine_gl_cocoa_libs@ | ||
276 | evas_engine_gl_common_libs = @evas_engine_gl_common_libs@ | ||
277 | evas_engine_gl_sdl_cflags = @evas_engine_gl_sdl_cflags@ | ||
278 | evas_engine_gl_sdl_libs = @evas_engine_gl_sdl_libs@ | ||
279 | evas_engine_gl_xcb_cflags = @evas_engine_gl_xcb_cflags@ | ||
280 | evas_engine_gl_xcb_libs = @evas_engine_gl_xcb_libs@ | ||
281 | evas_engine_gl_xlib_cflags = @evas_engine_gl_xlib_cflags@ | ||
282 | evas_engine_gl_xlib_libs = @evas_engine_gl_xlib_libs@ | ||
283 | evas_engine_psl1ght_cflags = @evas_engine_psl1ght_cflags@ | ||
284 | evas_engine_psl1ght_libs = @evas_engine_psl1ght_libs@ | ||
285 | evas_engine_software_16_ddraw_cflags = @evas_engine_software_16_ddraw_cflags@ | ||
286 | evas_engine_software_16_ddraw_libs = @evas_engine_software_16_ddraw_libs@ | ||
287 | evas_engine_software_16_sdl_cflags = @evas_engine_software_16_sdl_cflags@ | ||
288 | evas_engine_software_16_sdl_libs = @evas_engine_software_16_sdl_libs@ | ||
289 | evas_engine_software_16_wince_cflags = @evas_engine_software_16_wince_cflags@ | ||
290 | evas_engine_software_16_wince_libs = @evas_engine_software_16_wince_libs@ | ||
291 | evas_engine_software_16_x11_cflags = @evas_engine_software_16_x11_cflags@ | ||
292 | evas_engine_software_16_x11_libs = @evas_engine_software_16_x11_libs@ | ||
293 | evas_engine_software_8_x11_cflags = @evas_engine_software_8_x11_cflags@ | ||
294 | evas_engine_software_8_x11_libs = @evas_engine_software_8_x11_libs@ | ||
295 | evas_engine_software_ddraw_cflags = @evas_engine_software_ddraw_cflags@ | ||
296 | evas_engine_software_ddraw_libs = @evas_engine_software_ddraw_libs@ | ||
297 | evas_engine_software_gdi_cflags = @evas_engine_software_gdi_cflags@ | ||
298 | evas_engine_software_gdi_libs = @evas_engine_software_gdi_libs@ | ||
299 | evas_engine_software_sdl_cflags = @evas_engine_software_sdl_cflags@ | ||
300 | evas_engine_software_sdl_libs = @evas_engine_software_sdl_libs@ | ||
301 | evas_engine_software_xcb_cflags = @evas_engine_software_xcb_cflags@ | ||
302 | evas_engine_software_xcb_libs = @evas_engine_software_xcb_libs@ | ||
303 | evas_engine_software_xlib_cflags = @evas_engine_software_xlib_cflags@ | ||
304 | evas_engine_software_xlib_libs = @evas_engine_software_xlib_libs@ | ||
305 | evas_image_loader_bmp_cflags = @evas_image_loader_bmp_cflags@ | ||
306 | evas_image_loader_bmp_libs = @evas_image_loader_bmp_libs@ | ||
307 | evas_image_loader_edb_cflags = @evas_image_loader_edb_cflags@ | ||
308 | evas_image_loader_edb_libs = @evas_image_loader_edb_libs@ | ||
309 | evas_image_loader_eet_cflags = @evas_image_loader_eet_cflags@ | ||
310 | evas_image_loader_eet_libs = @evas_image_loader_eet_libs@ | ||
311 | evas_image_loader_generic_cflags = @evas_image_loader_generic_cflags@ | ||
312 | evas_image_loader_generic_libs = @evas_image_loader_generic_libs@ | ||
313 | evas_image_loader_gif_cflags = @evas_image_loader_gif_cflags@ | ||
314 | evas_image_loader_gif_libs = @evas_image_loader_gif_libs@ | ||
315 | evas_image_loader_ico_cflags = @evas_image_loader_ico_cflags@ | ||
316 | evas_image_loader_ico_libs = @evas_image_loader_ico_libs@ | ||
317 | evas_image_loader_jpeg_cflags = @evas_image_loader_jpeg_cflags@ | ||
318 | evas_image_loader_jpeg_libs = @evas_image_loader_jpeg_libs@ | ||
319 | evas_image_loader_pmaps_cflags = @evas_image_loader_pmaps_cflags@ | ||
320 | evas_image_loader_pmaps_libs = @evas_image_loader_pmaps_libs@ | ||
321 | evas_image_loader_png_cflags = @evas_image_loader_png_cflags@ | ||
322 | evas_image_loader_png_libs = @evas_image_loader_png_libs@ | ||
323 | evas_image_loader_psd_cflags = @evas_image_loader_psd_cflags@ | ||
324 | evas_image_loader_psd_libs = @evas_image_loader_psd_libs@ | ||
325 | evas_image_loader_svg_cflags = @evas_image_loader_svg_cflags@ | ||
326 | evas_image_loader_svg_libs = @evas_image_loader_svg_libs@ | ||
327 | evas_image_loader_tga_cflags = @evas_image_loader_tga_cflags@ | ||
328 | evas_image_loader_tga_libs = @evas_image_loader_tga_libs@ | ||
329 | evas_image_loader_tiff_cflags = @evas_image_loader_tiff_cflags@ | ||
330 | evas_image_loader_tiff_libs = @evas_image_loader_tiff_libs@ | ||
331 | evas_image_loader_wbmp_cflags = @evas_image_loader_wbmp_cflags@ | ||
332 | evas_image_loader_wbmp_libs = @evas_image_loader_wbmp_libs@ | ||
333 | evas_image_loader_xpm_cflags = @evas_image_loader_xpm_cflags@ | ||
334 | evas_image_loader_xpm_libs = @evas_image_loader_xpm_libs@ | ||
335 | exec_prefix = @exec_prefix@ | ||
336 | have_evas_engine_gl_x11 = @have_evas_engine_gl_x11@ | ||
337 | have_evas_engine_gl_xcb = @have_evas_engine_gl_xcb@ | ||
338 | have_evas_engine_gl_xlib = @have_evas_engine_gl_xlib@ | ||
339 | have_evas_engine_software_x11 = @have_evas_engine_software_x11@ | ||
340 | have_evas_engine_software_xcb = @have_evas_engine_software_xcb@ | ||
341 | have_evas_engine_software_xlib = @have_evas_engine_software_xlib@ | ||
342 | have_lcov = @have_lcov@ | ||
343 | host = @host@ | ||
344 | host_alias = @host_alias@ | ||
345 | host_cpu = @host_cpu@ | ||
346 | host_os = @host_os@ | ||
347 | host_vendor = @host_vendor@ | ||
348 | htmldir = @htmldir@ | ||
349 | includedir = @includedir@ | ||
350 | infodir = @infodir@ | ||
351 | install_sh = @install_sh@ | ||
352 | libdir = @libdir@ | ||
353 | libexecdir = @libexecdir@ | ||
354 | localedir = @localedir@ | ||
355 | localstatedir = @localstatedir@ | ||
356 | lt_ECHO = @lt_ECHO@ | ||
357 | lt_enable_auto_import = @lt_enable_auto_import@ | ||
358 | mandir = @mandir@ | ||
359 | mkdir_p = @mkdir_p@ | ||
360 | oldincludedir = @oldincludedir@ | ||
361 | pdfdir = @pdfdir@ | ||
362 | pkgconfig_requires_private = @pkgconfig_requires_private@ | ||
363 | prefix = @prefix@ | ||
364 | program_transform_name = @program_transform_name@ | ||
365 | psdir = @psdir@ | ||
366 | pthread_cflags = @pthread_cflags@ | ||
367 | pthread_libs = @pthread_libs@ | ||
368 | release_info = @release_info@ | ||
369 | requirement_evas = @requirement_evas@ | ||
370 | sbindir = @sbindir@ | ||
371 | sharedstatedir = @sharedstatedir@ | ||
372 | srcdir = @srcdir@ | ||
373 | sysconfdir = @sysconfdir@ | ||
374 | target_alias = @target_alias@ | ||
375 | top_build_prefix = @top_build_prefix@ | ||
376 | top_builddir = @top_builddir@ | ||
377 | top_srcdir = @top_srcdir@ | ||
378 | version_info = @version_info@ | ||
379 | MAINTAINERCLEANFILES = Makefile.in | ||
380 | EXTRA_DIST = \ | ||
381 | op_blend_color_.c \ | ||
382 | op_blend_color_i386.c \ | ||
383 | op_blend_color_neon.c \ | ||
384 | op_blend_color_sse3.c \ | ||
385 | op_blend_mask_color_.c \ | ||
386 | op_blend_mask_color_i386.c \ | ||
387 | op_blend_mask_color_neon.c \ | ||
388 | op_blend_mask_color_sse3.c \ | ||
389 | op_blend_pixel_.c \ | ||
390 | op_blend_pixel_color_.c \ | ||
391 | op_blend_pixel_color_i386.c \ | ||
392 | op_blend_pixel_color_neon.c \ | ||
393 | op_blend_pixel_color_sse3.c \ | ||
394 | op_blend_pixel_i386.c \ | ||
395 | op_blend_pixel_mask_.c \ | ||
396 | op_blend_pixel_mask_i386.c \ | ||
397 | op_blend_pixel_mask_neon.c \ | ||
398 | op_blend_pixel_mask_sse3.c \ | ||
399 | op_blend_pixel_neon.c \ | ||
400 | op_blend_pixel_sse3.c | ||
401 | |||
402 | noinst_LTLIBRARIES = libevas_engine_common_op_blend_master_sse3.la | ||
403 | libevas_engine_common_op_blend_master_sse3_la_SOURCES = \ | ||
404 | op_blend_master_sse3.c | ||
405 | |||
406 | libevas_engine_common_op_blend_master_sse3_la_CFLAGS = \ | ||
407 | -I. \ | ||
408 | -I$(top_srcdir)/src/lib \ | ||
409 | -I$(top_srcdir)/src/lib/engines/common \ | ||
410 | -I$(top_srcdir)/src/lib/engines/common/evas_op_blend \ | ||
411 | -I$(top_srcdir)/src/lib/cserve \ | ||
412 | -I$(top_srcdir)/src/lib/include \ | ||
413 | -DPACKAGE_BIN_DIR=\"$(bindir)\" \ | ||
414 | -DPACKAGE_LIB_DIR=\"$(libdir)\" \ | ||
415 | -DPACKAGE_DATA_DIR=\"$(datadir)/$(PACKAGE)\" \ | ||
416 | @FREETYPE_CFLAGS@ @VALGRIND_CFLAGS@ \ | ||
417 | @PIXMAN_CFLAGS@ \ | ||
418 | @EET_CFLAGS@ @pthread_cflags@ \ | ||
419 | @WIN32_CFLAGS@ @EINA_CFLAGS@ \ | ||
420 | @FRIBIDI_CFLAGS@ @HARFBUZZ_CFLAGS@ \ | ||
421 | @PIXMAN_CFLAGS@ \ | ||
422 | @EVAS_SSE3_CFLAGS@ | ||
423 | |||
424 | libevas_engine_common_op_blend_master_sse3_la_DEPENENCIES = \ | ||
425 | $(top_builddir)/config.h | ||
426 | |||
427 | all: all-am | ||
428 | |||
429 | .SUFFIXES: | ||
430 | .SUFFIXES: .c .lo .o .obj | ||
431 | $(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) | ||
432 | @for dep in $?; do \ | ||
433 | case '$(am__configure_deps)' in \ | ||
434 | *$$dep*) \ | ||
435 | ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ | ||
436 | && { if test -f $@; then exit 0; else break; fi; }; \ | ||
437 | exit 1;; \ | ||
438 | esac; \ | ||
439 | done; \ | ||
440 | echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/lib/engines/common/evas_op_blend/Makefile'; \ | ||
441 | $(am__cd) $(top_srcdir) && \ | ||
442 | $(AUTOMAKE) --gnu src/lib/engines/common/evas_op_blend/Makefile | ||
443 | .PRECIOUS: Makefile | ||
444 | Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status | ||
445 | @case '$?' in \ | ||
446 | *config.status*) \ | ||
447 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ | ||
448 | *) \ | ||
449 | echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ | ||
450 | cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ | ||
451 | esac; | ||
452 | |||
453 | $(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) | ||
454 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh | ||
455 | |||
456 | $(top_srcdir)/configure: $(am__configure_deps) | ||
457 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh | ||
458 | $(ACLOCAL_M4): $(am__aclocal_m4_deps) | ||
459 | cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh | ||
460 | $(am__aclocal_m4_deps): | ||
461 | |||
462 | clean-noinstLTLIBRARIES: | ||
463 | -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) | ||
464 | @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \ | ||
465 | dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \ | ||
466 | test "$$dir" != "$$p" || dir=.; \ | ||
467 | echo "rm -f \"$${dir}/so_locations\""; \ | ||
468 | rm -f "$${dir}/so_locations"; \ | ||
469 | done | ||
470 | libevas_engine_common_op_blend_master_sse3.la: $(libevas_engine_common_op_blend_master_sse3_la_OBJECTS) $(libevas_engine_common_op_blend_master_sse3_la_DEPENDENCIES) | ||
471 | $(AM_V_CCLD)$(libevas_engine_common_op_blend_master_sse3_la_LINK) $(libevas_engine_common_op_blend_master_sse3_la_OBJECTS) $(libevas_engine_common_op_blend_master_sse3_la_LIBADD) $(LIBS) | ||
472 | |||
473 | mostlyclean-compile: | ||
474 | -rm -f *.$(OBJEXT) | ||
475 | |||
476 | distclean-compile: | ||
477 | -rm -f *.tab.c | ||
478 | |||
479 | @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.Plo@am__quote@ | ||
480 | |||
481 | .c.o: | ||
482 | @am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< | ||
483 | @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po | ||
484 | @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ | ||
485 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ | ||
486 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ | ||
487 | @am__fastdepCC_FALSE@ $(COMPILE) -c $< | ||
488 | |||
489 | .c.obj: | ||
490 | @am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` | ||
491 | @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po | ||
492 | @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ | ||
493 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ | ||
494 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ | ||
495 | @am__fastdepCC_FALSE@ $(COMPILE) -c `$(CYGPATH_W) '$<'` | ||
496 | |||
497 | .c.lo: | ||
498 | @am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< | ||
499 | @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo | ||
500 | @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ | ||
501 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ | ||
502 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ | ||
503 | @am__fastdepCC_FALSE@ $(LTCOMPILE) -c -o $@ $< | ||
504 | |||
505 | libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.lo: op_blend_master_sse3.c | ||
506 | @am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libevas_engine_common_op_blend_master_sse3_la_CFLAGS) $(CFLAGS) -MT libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.lo -MD -MP -MF $(DEPDIR)/libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.Tpo -c -o libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.lo `test -f 'op_blend_master_sse3.c' || echo '$(srcdir)/'`op_blend_master_sse3.c | ||
507 | @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.Tpo $(DEPDIR)/libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.Plo | ||
508 | @am__fastdepCC_FALSE@ $(AM_V_CC) @AM_BACKSLASH@ | ||
509 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ source='op_blend_master_sse3.c' object='libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.lo' libtool=yes @AMDEPBACKSLASH@ | ||
510 | @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ | ||
511 | @am__fastdepCC_FALSE@ $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libevas_engine_common_op_blend_master_sse3_la_CFLAGS) $(CFLAGS) -c -o libevas_engine_common_op_blend_master_sse3_la-op_blend_master_sse3.lo `test -f 'op_blend_master_sse3.c' || echo '$(srcdir)/'`op_blend_master_sse3.c | ||
512 | |||
513 | mostlyclean-libtool: | ||
514 | -rm -f *.lo | ||
515 | |||
516 | clean-libtool: | ||
517 | -rm -rf .libs _libs | ||
518 | |||
519 | ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) | ||
520 | list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ | ||
521 | unique=`for i in $$list; do \ | ||
522 | if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ | ||
523 | done | \ | ||
524 | $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ | ||
525 | END { if (nonempty) { for (i in files) print i; }; }'`; \ | ||
526 | mkid -fID $$unique | ||
527 | tags: TAGS | ||
528 | |||
529 | TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ | ||
530 | $(TAGS_FILES) $(LISP) | ||
531 | set x; \ | ||
532 | here=`pwd`; \ | ||
533 | list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ | ||
534 | unique=`for i in $$list; do \ | ||
535 | if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ | ||
536 | done | \ | ||
537 | $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ | ||
538 | END { if (nonempty) { for (i in files) print i; }; }'`; \ | ||
539 | shift; \ | ||
540 | if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ | ||
541 | test -n "$$unique" || unique=$$empty_fix; \ | ||
542 | if test $$# -gt 0; then \ | ||
543 | $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ | ||
544 | "$$@" $$unique; \ | ||
545 | else \ | ||
546 | $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ | ||
547 | $$unique; \ | ||
548 | fi; \ | ||
549 | fi | ||
550 | ctags: CTAGS | ||
551 | CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ | ||
552 | $(TAGS_FILES) $(LISP) | ||
553 | list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \ | ||
554 | unique=`for i in $$list; do \ | ||
555 | if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ | ||
556 | done | \ | ||
557 | $(AWK) '{ files[$$0] = 1; nonempty = 1; } \ | ||
558 | END { if (nonempty) { for (i in files) print i; }; }'`; \ | ||
559 | test -z "$(CTAGS_ARGS)$$unique" \ | ||
560 | || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ | ||
561 | $$unique | ||
562 | |||
563 | GTAGS: | ||
564 | here=`$(am__cd) $(top_builddir) && pwd` \ | ||
565 | && $(am__cd) $(top_srcdir) \ | ||
566 | && gtags -i $(GTAGS_ARGS) "$$here" | ||
567 | |||
568 | distclean-tags: | ||
569 | -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags | ||
570 | |||
571 | distdir: $(DISTFILES) | ||
572 | @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ | ||
573 | topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ | ||
574 | list='$(DISTFILES)'; \ | ||
575 | dist_files=`for file in $$list; do echo $$file; done | \ | ||
576 | sed -e "s|^$$srcdirstrip/||;t" \ | ||
577 | -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ | ||
578 | case $$dist_files in \ | ||
579 | */*) $(MKDIR_P) `echo "$$dist_files" | \ | ||
580 | sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ | ||
581 | sort -u` ;; \ | ||
582 | esac; \ | ||
583 | for file in $$dist_files; do \ | ||
584 | if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ | ||
585 | if test -d $$d/$$file; then \ | ||
586 | dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ | ||
587 | if test -d "$(distdir)/$$file"; then \ | ||
588 | find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ | ||
589 | fi; \ | ||
590 | if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ | ||
591 | cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ | ||
592 | find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ | ||
593 | fi; \ | ||
594 | cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ | ||
595 | else \ | ||
596 | test -f "$(distdir)/$$file" \ | ||
597 | || cp -p $$d/$$file "$(distdir)/$$file" \ | ||
598 | || exit 1; \ | ||
599 | fi; \ | ||
600 | done | ||
601 | check-am: all-am | ||
602 | check: check-am | ||
603 | all-am: Makefile $(LTLIBRARIES) | ||
604 | installdirs: | ||
605 | install: install-am | ||
606 | install-exec: install-exec-am | ||
607 | install-data: install-data-am | ||
608 | uninstall: uninstall-am | ||
609 | |||
610 | install-am: all-am | ||
611 | @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am | ||
612 | |||
613 | installcheck: installcheck-am | ||
614 | install-strip: | ||
615 | $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ | ||
616 | install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ | ||
617 | `test -z '$(STRIP)' || \ | ||
618 | echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install | ||
619 | mostlyclean-generic: | ||
620 | |||
621 | clean-generic: | ||
622 | |||
623 | distclean-generic: | ||
624 | -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) | ||
625 | -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) | ||
626 | |||
627 | maintainer-clean-generic: | ||
628 | @echo "This command is intended for maintainers to use" | ||
629 | @echo "it deletes files that may require special tools to rebuild." | ||
630 | -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) | ||
631 | clean: clean-am | ||
632 | |||
633 | clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ | ||
634 | mostlyclean-am | ||
635 | |||
636 | distclean: distclean-am | ||
637 | -rm -rf ./$(DEPDIR) | ||
638 | -rm -f Makefile | ||
639 | distclean-am: clean-am distclean-compile distclean-generic \ | ||
640 | distclean-tags | ||
641 | |||
642 | dvi: dvi-am | ||
643 | |||
644 | dvi-am: | ||
645 | |||
646 | html: html-am | ||
647 | |||
648 | html-am: | ||
649 | |||
650 | info: info-am | ||
651 | |||
652 | info-am: | ||
653 | |||
654 | install-data-am: | ||
655 | |||
656 | install-dvi: install-dvi-am | ||
657 | |||
658 | install-dvi-am: | ||
659 | |||
660 | install-exec-am: | ||
661 | |||
662 | install-html: install-html-am | ||
663 | |||
664 | install-html-am: | ||
665 | |||
666 | install-info: install-info-am | ||
667 | |||
668 | install-info-am: | ||
669 | |||
670 | install-man: | ||
671 | |||
672 | install-pdf: install-pdf-am | ||
673 | |||
674 | install-pdf-am: | ||
675 | |||
676 | install-ps: install-ps-am | ||
677 | |||
678 | install-ps-am: | ||
679 | |||
680 | installcheck-am: | ||
681 | |||
682 | maintainer-clean: maintainer-clean-am | ||
683 | -rm -rf ./$(DEPDIR) | ||
684 | -rm -f Makefile | ||
685 | maintainer-clean-am: distclean-am maintainer-clean-generic | ||
686 | |||
687 | mostlyclean: mostlyclean-am | ||
688 | |||
689 | mostlyclean-am: mostlyclean-compile mostlyclean-generic \ | ||
690 | mostlyclean-libtool | ||
691 | |||
692 | pdf: pdf-am | ||
693 | |||
694 | pdf-am: | ||
695 | |||
696 | ps: ps-am | ||
697 | |||
698 | ps-am: | ||
699 | |||
700 | uninstall-am: | ||
701 | |||
702 | .MAKE: install-am install-strip | ||
703 | |||
704 | .PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \ | ||
705 | clean-libtool clean-noinstLTLIBRARIES ctags distclean \ | ||
706 | distclean-compile distclean-generic distclean-libtool \ | ||
707 | distclean-tags distdir dvi dvi-am html html-am info info-am \ | ||
708 | install install-am install-data install-data-am install-dvi \ | ||
709 | install-dvi-am install-exec install-exec-am install-html \ | ||
710 | install-html-am install-info install-info-am install-man \ | ||
711 | install-pdf install-pdf-am install-ps install-ps-am \ | ||
712 | install-strip installcheck installcheck-am installdirs \ | ||
713 | maintainer-clean maintainer-clean-generic mostlyclean \ | ||
714 | mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ | ||
715 | pdf pdf-am ps ps-am tags uninstall uninstall-am | ||
716 | |||
717 | |||
718 | # Tell versions [3.59,3.63) of GNU make to not export all variables. | ||
719 | # Otherwise a system limit (for SysV at least) may be exceeded. | ||
720 | .NOEXPORT: | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_.c new file mode 100644 index 0000000..be70065 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_.c | |||
@@ -0,0 +1,109 @@ | |||
1 | /* blend color -> dst */ | ||
2 | |||
3 | #ifdef BUILD_C | ||
4 | static void | ||
5 | _op_blend_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e, a = 256 - (c >> 24); | ||
7 | UNROLL8_PLD_WHILE(d, l, e, | ||
8 | { | ||
9 | *d = c + MUL_256(a, *d); | ||
10 | d++; | ||
11 | }); | ||
12 | } | ||
13 | |||
14 | #define _op_blend_caa_dp _op_blend_c_dp | ||
15 | |||
16 | #define _op_blend_c_dpan _op_blend_c_dp | ||
17 | #define _op_blend_caa_dpan _op_blend_c_dpan | ||
18 | |||
19 | static void | ||
20 | init_blend_color_span_funcs_c(void) | ||
21 | { | ||
22 | op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_C] = _op_blend_c_dp; | ||
23 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_C] = _op_blend_caa_dp; | ||
24 | |||
25 | op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_C] = _op_blend_c_dpan; | ||
26 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_caa_dpan; | ||
27 | } | ||
28 | #endif | ||
29 | |||
30 | #ifdef BUILD_C | ||
31 | static void | ||
32 | _op_blend_pt_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
33 | s = 256 - (c >> 24); | ||
34 | *d = c + MUL_256(s, *d); | ||
35 | } | ||
36 | |||
37 | #define _op_blend_pt_caa_dp _op_blend_pt_c_dp | ||
38 | |||
39 | #define _op_blend_pt_c_dpan _op_blend_pt_c_dp | ||
40 | #define _op_blend_pt_caa_dpan _op_blend_pt_c_dpan | ||
41 | |||
42 | #define _op_blend_pt_c_dpas _op_blend_pt_c_dp | ||
43 | #define _op_blend_pt_caa_dpas _op_blend_pt_c_dp | ||
44 | |||
45 | static void | ||
46 | init_blend_color_pt_funcs_c(void) | ||
47 | { | ||
48 | op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_C] = _op_blend_pt_c_dp; | ||
49 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_C] = _op_blend_pt_caa_dp; | ||
50 | |||
51 | op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_C] = _op_blend_pt_c_dpan; | ||
52 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_pt_caa_dpan; | ||
53 | } | ||
54 | #endif | ||
55 | |||
56 | /*-----*/ | ||
57 | |||
58 | /* blend_rel color -> dst */ | ||
59 | |||
60 | #ifdef BUILD_C | ||
61 | static void | ||
62 | _op_blend_rel_c_dp(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
63 | DATA32 *e; | ||
64 | int alpha = 256 - (c >> 24); | ||
65 | UNROLL8_PLD_WHILE(d, l, e, | ||
66 | { | ||
67 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
68 | d++; | ||
69 | }); | ||
70 | } | ||
71 | |||
72 | #define _op_blend_rel_caa_dp _op_blend_rel_c_dp | ||
73 | |||
74 | #define _op_blend_rel_c_dpan _op_blend_c_dpan | ||
75 | #define _op_blend_rel_caa_dpan _op_blend_caa_dpan | ||
76 | |||
77 | static void | ||
78 | init_blend_rel_color_span_funcs_c(void) | ||
79 | { | ||
80 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_C] = _op_blend_rel_c_dp; | ||
81 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_caa_dp; | ||
82 | |||
83 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_c_dpan; | ||
84 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_caa_dpan; | ||
85 | } | ||
86 | #endif | ||
87 | |||
88 | #ifdef BUILD_C | ||
89 | static void | ||
90 | _op_blend_rel_pt_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
91 | s = *d >> 24; | ||
92 | *d = MUL_SYM(s, c) + MUL_256(256 - (c >> 24), *d); | ||
93 | } | ||
94 | |||
95 | #define _op_blend_rel_pt_caa_dp _op_blend_rel_pt_c_dp | ||
96 | |||
97 | #define _op_blend_rel_pt_c_dpan _op_blend_pt_c_dpan | ||
98 | #define _op_blend_rel_pt_caa_dpan _op_blend_pt_caa_dpan | ||
99 | |||
100 | static void | ||
101 | init_blend_rel_color_pt_funcs_c(void) | ||
102 | { | ||
103 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_C] = _op_blend_rel_pt_c_dp; | ||
104 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_pt_caa_dp; | ||
105 | |||
106 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_pt_c_dpan; | ||
107 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pt_caa_dpan; | ||
108 | } | ||
109 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_i386.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_i386.c new file mode 100644 index 0000000..a05af7d --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_i386.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* blend color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_MMX | ||
4 | static void | ||
5 | _op_blend_c_dp_mmx(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e = d + l; | ||
7 | pxor_r2r(mm0, mm0); | ||
8 | MOV_P2R(c, mm2, mm0) | ||
9 | c = 256 - (c >> 24); | ||
10 | MOV_A2R(c, mm3) | ||
11 | while (d < e) { | ||
12 | MOV_P2R(*d, mm1, mm0) | ||
13 | MUL4_256_R2R(mm3, mm1) | ||
14 | paddw_r2r(mm2, mm1); | ||
15 | MOV_R2P(mm1, *d, mm0) | ||
16 | d++; | ||
17 | } | ||
18 | } | ||
19 | |||
20 | #define _op_blend_caa_dp_mmx _op_blend_c_dp_mmx | ||
21 | |||
22 | #define _op_blend_c_dpan_mmx _op_blend_c_dp_mmx | ||
23 | #define _op_blend_caa_dpan_mmx _op_blend_c_dpan_mmx | ||
24 | |||
25 | static void | ||
26 | init_blend_color_span_funcs_mmx(void) | ||
27 | { | ||
28 | op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_MMX] = _op_blend_c_dp_mmx; | ||
29 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_caa_dp_mmx; | ||
30 | |||
31 | op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_c_dpan_mmx; | ||
32 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_caa_dpan_mmx; | ||
33 | } | ||
34 | #endif | ||
35 | |||
36 | #ifdef BUILD_MMX | ||
37 | static void | ||
38 | _op_blend_pt_c_dp_mmx(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
39 | pxor_r2r(mm0, mm0); | ||
40 | MOV_P2R(c, mm2, mm0) | ||
41 | c = 256 - (c >> 24); | ||
42 | MOV_A2R(c, mm3) | ||
43 | MOV_P2R(*d, mm1, mm0) | ||
44 | MUL4_256_R2R(mm3, mm1) | ||
45 | paddw_r2r(mm2, mm1); | ||
46 | MOV_R2P(mm1, *d, mm0) | ||
47 | } | ||
48 | |||
49 | #define _op_blend_pt_caa_dp_mmx _op_blend_pt_c_dp_mmx | ||
50 | |||
51 | #define _op_blend_pt_c_dpan_mmx _op_blend_pt_c_dp_mmx | ||
52 | #define _op_blend_pt_caa_dpan_mmx _op_blend_pt_c_dpan_mmx | ||
53 | |||
54 | static void | ||
55 | init_blend_color_pt_funcs_mmx(void) | ||
56 | { | ||
57 | op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_MMX] = _op_blend_pt_c_dp_mmx; | ||
58 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_pt_caa_dp_mmx; | ||
59 | |||
60 | op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_pt_c_dpan_mmx; | ||
61 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_pt_caa_dpan_mmx; | ||
62 | } | ||
63 | #endif | ||
64 | /*-----*/ | ||
65 | |||
66 | /* blend_rel color -> dst */ | ||
67 | |||
68 | #ifdef BUILD_MMX | ||
69 | static void | ||
70 | _op_blend_rel_c_dp_mmx(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
71 | DATA32 *e = d + l; | ||
72 | pxor_r2r(mm0, mm0); | ||
73 | MOV_P2R(c, mm2, mm0) | ||
74 | c = 256 - (c >> 24); | ||
75 | MOV_A2R(c, mm3) | ||
76 | MOV_A2R(ALPHA_255, mm5) | ||
77 | while (d < e) { | ||
78 | MOV_P2R(*d, mm1, mm0) | ||
79 | MOV_RA2R(mm1, mm4) | ||
80 | MUL4_256_R2R(mm3, mm1) | ||
81 | MUL4_SYM_R2R(mm2, mm4, mm5) | ||
82 | paddw_r2r(mm4, mm1); | ||
83 | MOV_R2P(mm1, *d, mm0) | ||
84 | d++; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | #define _op_blend_rel_caa_dp_mmx _op_blend_rel_c_dp_mmx | ||
89 | |||
90 | #define _op_blend_rel_c_dpan_mmx _op_blend_c_dpan_mmx | ||
91 | #define _op_blend_rel_caa_dpan_mmx _op_blend_caa_dpan_mmx | ||
92 | |||
93 | static void | ||
94 | init_blend_rel_color_span_funcs_mmx(void) | ||
95 | { | ||
96 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_MMX] = _op_blend_rel_c_dp_mmx; | ||
97 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_rel_caa_dp_mmx; | ||
98 | |||
99 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_c_dpan_mmx; | ||
100 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_caa_dpan_mmx; | ||
101 | } | ||
102 | #endif | ||
103 | |||
104 | #ifdef BUILD_MMX | ||
105 | static void | ||
106 | _op_blend_rel_pt_c_dp_mmx(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
107 | pxor_r2r(mm0, mm0); | ||
108 | MOV_A2R(ALPHA_256, mm6) | ||
109 | MOV_A2R(ALPHA_255, mm5) | ||
110 | |||
111 | MOV_P2R(c, mm2, mm0) | ||
112 | MOV_RA2R(mm2, mm1) | ||
113 | psubw_r2r(mm1, mm6); | ||
114 | |||
115 | MOV_P2R(*d, mm1, mm0) | ||
116 | MOV_RA2R(mm1, mm4) | ||
117 | MUL4_256_R2R(mm6, mm1) | ||
118 | |||
119 | MUL4_SYM_R2R(mm4, mm2, mm5) | ||
120 | paddw_r2r(mm2, mm1); | ||
121 | MOV_R2P(mm1, *d, mm0) | ||
122 | } | ||
123 | |||
124 | #define _op_blend_rel_pt_caa_dp_mmx _op_blend_rel_pt_c_dp_mmx | ||
125 | |||
126 | #define _op_blend_rel_pt_c_dpan_mmx _op_blend_pt_c_dpan_mmx | ||
127 | #define _op_blend_rel_pt_caa_dpan_mmx _op_blend_pt_caa_dpan_mmx | ||
128 | |||
129 | static void | ||
130 | init_blend_rel_color_pt_funcs_mmx(void) | ||
131 | { | ||
132 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_MMX] = _op_blend_rel_pt_c_dp_mmx; | ||
133 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_rel_pt_caa_dp_mmx; | ||
134 | |||
135 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_pt_c_dpan_mmx; | ||
136 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pt_caa_dpan_mmx; | ||
137 | } | ||
138 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c new file mode 100644 index 0000000..53b9991 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_neon.c | |||
@@ -0,0 +1,223 @@ | |||
1 | /* blend color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_NEON | ||
4 | static void | ||
5 | _op_blend_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e, *tmp = 0; | ||
7 | #define AP "B_C_DP" | ||
8 | asm volatile ( | ||
9 | ".fpu neon \n\t" | ||
10 | "vdup.u32 q6, %[c] \n\t" | ||
11 | "vmov.i8 q5, #1 \n\t" | ||
12 | "vmvn.u8 q7,q6 \n\t" | ||
13 | "vshr.u32 q7, q7, $0x18 \n\t" | ||
14 | "vmul.u32 q7,q5, q7 \n\t" | ||
15 | "bic %[e], #3 \n\t" | ||
16 | "bic %[d], #3 \n\t" | ||
17 | |||
18 | AP "loopchoose: \n\t" | ||
19 | // If aligned already - straight to quads | ||
20 | "andS %[tmp], %[d],$0x1f \n\t" | ||
21 | "beq "AP"quadloops \n\t" | ||
22 | |||
23 | "andS %[tmp], %[d],$0x4 \n\t" | ||
24 | "beq "AP"dualloop \n\t" | ||
25 | |||
26 | // Only ever executes once, fall through to dual | ||
27 | AP "singleloop: \n\t" | ||
28 | // Use 'tmp' not 'd' | ||
29 | "vld1.32 d0[0], [%[d]] \n\t" | ||
30 | // Only touch d1 | ||
31 | "vmull.u8 q0, d0, d14 \n\t" | ||
32 | "vqrshrn.u16 d0, q0, #8 \n\t" | ||
33 | "vadd.u8 d0, d12, d0 \n\t" | ||
34 | "vst1.32 d0[0], [%[d]] \n\t" | ||
35 | |||
36 | "add %[d], #4 \n\t" | ||
37 | |||
38 | // Can we go the fast path? | ||
39 | "andS %[tmp], %[d],$0x1f \n\t" | ||
40 | "beq "AP"quadloops \n\t" | ||
41 | |||
42 | AP "dualloop: \n\t" | ||
43 | "sub %[tmp], %[e], %[d] \n\t" | ||
44 | "cmp %[tmp], #32 \n\t" | ||
45 | "blt "AP"loopout \n\t" | ||
46 | |||
47 | |||
48 | AP "dualloopint: \n\t" | ||
49 | "vldr.32 d0, [%[d]] \n\t" | ||
50 | "vmull.u8 q1, d0, d14 \n\t" | ||
51 | "vqrshrn.u16 d0, q1, #8 \n\t" | ||
52 | "vqadd.u8 d0, d0, d12 \n\t" | ||
53 | |||
54 | "vstm %[d]!, {d0} \n\t" | ||
55 | |||
56 | "ands %[tmp], %[d], $0x1f \n\t" | ||
57 | "bne "AP"dualloopint \n\t" | ||
58 | |||
59 | AP "quadloops: \n\t" | ||
60 | "sub %[tmp], %[e], %[d] \n\t" | ||
61 | "cmp %[tmp], #32 \n\t" | ||
62 | "blt "AP"loopout \n\t" | ||
63 | |||
64 | "sub %[tmp],%[e],#31 \n\t" | ||
65 | |||
66 | AP "quadloopint:\n\t" | ||
67 | "vldm %[d], {d0,d1,d2,d3} \n\t" | ||
68 | |||
69 | "vmull.u8 q2, d0, d14 \n\t" | ||
70 | "vmull.u8 q3, d1, d15 \n\t" | ||
71 | "vmull.u8 q4, d2, d14 \n\t" | ||
72 | "vmull.u8 q5, d3, d15 \n\t" | ||
73 | |||
74 | "vqrshrn.u16 d0, q2, #8 \n\t" | ||
75 | "vqrshrn.u16 d1, q3, #8 \n\t" | ||
76 | "vqrshrn.u16 d2, q4, #8 \n\t" | ||
77 | "vqrshrn.u16 d3, q5, #8 \n\t" | ||
78 | |||
79 | "vqadd.u8 q0, q6, q0 \n\t" | ||
80 | "vqadd.u8 q1, q6, q1 \n\t" | ||
81 | |||
82 | "vstm %[d]!, {d0,d1,d2,d3} \n\t" | ||
83 | |||
84 | "cmp %[tmp], %[d]\n\t" | ||
85 | "bhi "AP"quadloopint\n\t" | ||
86 | |||
87 | AP "loopout: \n\t" | ||
88 | "cmp %[d], %[e]\n\t" | ||
89 | "beq "AP"done\n\t" | ||
90 | "sub %[tmp],%[e], %[d] \n\t" | ||
91 | "cmp %[tmp],#8 \n\t" | ||
92 | "blt "AP"singleloop2 \n\t" | ||
93 | |||
94 | AP "dualloop2: \n\t" | ||
95 | "sub %[tmp],%[e],$0x7 \n\t" | ||
96 | AP "dualloop2int: \n\t" | ||
97 | "vldr.64 d0, [%[d]] \n\t" | ||
98 | "vmull.u8 q1, d0, d14 \n\t" | ||
99 | "vqrshrn.u16 d0, q1, #8 \n\t" | ||
100 | "vqadd.u8 d0, d0, d12 \n\t" | ||
101 | |||
102 | "vstr.64 d0, [%[d]] \n\t" | ||
103 | |||
104 | "add %[d], #8 \n\t" | ||
105 | "cmp %[tmp], %[d] \n\t" | ||
106 | "bhi "AP"dualloop2int \n\t" | ||
107 | |||
108 | // Single ?? | ||
109 | "cmp %[e], %[d] \n\t" | ||
110 | "beq "AP"done \n\t" | ||
111 | |||
112 | AP "singleloop2: \n\t" | ||
113 | "vld1.32 d0[0], [%[d]] \n\t" | ||
114 | "vmull.u8 q1, d0, d14 \n\t" | ||
115 | "vqrshrn.u16 d0, q1, #8 \n\t" | ||
116 | "vqadd.u8 d0, d0, d12 \n\t" | ||
117 | |||
118 | "vst1.32 d0[0], [%[d]] \n\t" | ||
119 | |||
120 | AP "done:\n\t" | ||
121 | |||
122 | : // output regs | ||
123 | // Input | ||
124 | : [e] "r" (e = d + l), [d] "r" (d), [c] "r" (c), [tmp] "r" (tmp) | ||
125 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","memory" // clobbered | ||
126 | |||
127 | ); | ||
128 | #undef AP | ||
129 | |||
130 | } | ||
131 | |||
132 | #define _op_blend_caa_dp_neon _op_blend_c_dp_neon | ||
133 | |||
134 | #define _op_blend_c_dpan_neon _op_blend_c_dp_neon | ||
135 | #define _op_blend_caa_dpan_neon _op_blend_c_dpan_neon | ||
136 | |||
137 | static void | ||
138 | init_blend_color_span_funcs_neon(void) | ||
139 | { | ||
140 | op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_c_dp_neon; | ||
141 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_caa_dp_neon; | ||
142 | |||
143 | op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_c_dpan_neon; | ||
144 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_caa_dpan_neon; | ||
145 | } | ||
146 | #endif | ||
147 | |||
148 | #ifdef BUILD_NEON | ||
149 | static void | ||
150 | _op_blend_pt_c_dp_neon(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
151 | s = 256 - (c >> 24); | ||
152 | *d = c + MUL_256(s, *d); | ||
153 | } | ||
154 | |||
155 | #define _op_blend_pt_caa_dp_neon _op_blend_pt_c_dp_neon | ||
156 | |||
157 | #define _op_blend_pt_c_dpan_neon _op_blend_pt_c_dp_neon | ||
158 | #define _op_blend_pt_caa_dpan_neon _op_blend_pt_c_dpan_neon | ||
159 | |||
160 | static void | ||
161 | init_blend_color_pt_funcs_neon(void) | ||
162 | { | ||
163 | op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_c_dp_neon; | ||
164 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_caa_dp_neon; | ||
165 | |||
166 | op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_c_dpan_neon; | ||
167 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_caa_dpan_neon; | ||
168 | } | ||
169 | #endif | ||
170 | /*-----*/ | ||
171 | |||
172 | /* blend_rel color -> dst */ | ||
173 | |||
174 | #ifdef BUILD_NEON | ||
175 | static void | ||
176 | _op_blend_rel_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
177 | DATA32 *e; | ||
178 | int alpha = 256 - (c >> 24); | ||
179 | UNROLL8_PLD_WHILE(d, l, e, | ||
180 | { | ||
181 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
182 | d++; | ||
183 | }); | ||
184 | } | ||
185 | |||
186 | #define _op_blend_rel_caa_dp_neon _op_blend_rel_c_dp_neon | ||
187 | |||
188 | #define _op_blend_rel_c_dpan_neon _op_blend_c_dpan_neon | ||
189 | #define _op_blend_rel_caa_dpan_neon _op_blend_caa_dpan_neon | ||
190 | |||
191 | static void | ||
192 | init_blend_rel_color_span_funcs_neon(void) | ||
193 | { | ||
194 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_rel_c_dp_neon; | ||
195 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_rel_caa_dp_neon; | ||
196 | |||
197 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_c_dpan_neon; | ||
198 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_caa_dpan_neon; | ||
199 | } | ||
200 | #endif | ||
201 | |||
202 | #ifdef BUILD_NEON | ||
203 | static void | ||
204 | _op_blend_rel_pt_c_dp_neon(DATA32 s __UNUSED__, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
205 | s = *d >> 24; | ||
206 | *d = MUL_SYM(s, c) + MUL_256(256 - (c >> 24), *d); | ||
207 | } | ||
208 | |||
209 | #define _op_blend_rel_pt_caa_dp_neon _op_blend_rel_pt_c_dp_neon | ||
210 | |||
211 | #define _op_blend_rel_pt_c_dpan_neon _op_blend_pt_c_dpan_neon | ||
212 | #define _op_blend_rel_pt_caa_dpan_neon _op_blend_pt_caa_dpan_neon | ||
213 | |||
214 | static void | ||
215 | init_blend_rel_color_pt_funcs_neon(void) | ||
216 | { | ||
217 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_NEON] = _op_blend_rel_pt_c_dp_neon; | ||
218 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_rel_pt_caa_dp_neon; | ||
219 | |||
220 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_c_dpan_neon; | ||
221 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_caa_dpan_neon; | ||
222 | } | ||
223 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c new file mode 100644 index 0000000..64d5a86 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_color_sse3.c | |||
@@ -0,0 +1,166 @@ | |||
1 | /* blend color -> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | DATA32 a = 256 - (c >> 24); | ||
9 | |||
10 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
11 | const __m128i a_packed = _mm_set_epi32(a, a, a, a); | ||
12 | |||
13 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
14 | { /* UOP */ | ||
15 | |||
16 | *d = c + MUL_256(a, *d); | ||
17 | d++; l--; | ||
18 | }, | ||
19 | { /* A4OP */ | ||
20 | |||
21 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
22 | |||
23 | d0 = mul_256_sse3(a_packed, d0); | ||
24 | d0 = _mm_add_epi32(d0, c_packed); | ||
25 | |||
26 | _mm_store_si128((__m128i *)d, d0); | ||
27 | |||
28 | d += 4; l -= 4; | ||
29 | }, | ||
30 | { /* A8OP */ | ||
31 | |||
32 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
33 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
34 | |||
35 | d0 = mul_256_sse3(a_packed, d0); | ||
36 | d1 = mul_256_sse3(a_packed, d1); | ||
37 | |||
38 | d0 = _mm_add_epi32(d0, c_packed); | ||
39 | d1 = _mm_add_epi32(d1, c_packed); | ||
40 | |||
41 | _mm_store_si128((__m128i *)d, d0); | ||
42 | _mm_store_si128((__m128i *)(d+4), d1); | ||
43 | |||
44 | d += 8; l -= 8; | ||
45 | }) | ||
46 | } | ||
47 | |||
48 | #define _op_blend_caa_dp_sse3 _op_blend_c_dp_sse3 | ||
49 | |||
50 | #define _op_blend_c_dpan_sse3 _op_blend_c_dp_sse3 | ||
51 | #define _op_blend_caa_dpan_sse3 _op_blend_c_dpan_sse3 | ||
52 | |||
53 | static void | ||
54 | init_blend_color_span_funcs_sse3(void) | ||
55 | { | ||
56 | op_blend_span_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_c_dp_sse3; | ||
57 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_caa_dp_sse3; | ||
58 | |||
59 | // FIXME: BUGGY BUGGY Core i5 750 (32bit), 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text and rectangle) | ||
60 | // op_blend_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_c_dpan_sse3; | ||
61 | op_blend_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_caa_dpan_sse3; | ||
62 | } | ||
63 | |||
64 | #define _op_blend_pt_c_dp_sse3 NULL | ||
65 | #define _op_blend_pt_caa_dp_sse3 _op_blend_pt_c_dp_sse3 | ||
66 | |||
67 | #define _op_blend_pt_c_dpan_sse3 _op_blend_pt_c_dp_sse3 | ||
68 | #define _op_blend_pt_caa_dpan_sse3 _op_blend_pt_c_dpan_sse3 | ||
69 | |||
70 | #define _op_blend_pt_c_dpas_sse3 _op_blend_pt_c_dp_sse3 | ||
71 | #define _op_blend_pt_caa_dpas_sse3 _op_blend_pt_c_dp_sse3 | ||
72 | |||
73 | static void | ||
74 | init_blend_color_pt_funcs_sse3(void) | ||
75 | { | ||
76 | op_blend_pt_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_c_dp_sse3; | ||
77 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_caa_dp_sse3; | ||
78 | |||
79 | op_blend_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_c_dpan_sse3; | ||
80 | op_blend_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_caa_dpan_sse3; | ||
81 | } | ||
82 | |||
83 | |||
84 | /*-----*/ | ||
85 | |||
86 | /* blend_rel color -> dst */ | ||
87 | |||
88 | static void | ||
89 | _op_blend_rel_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
90 | |||
91 | int alpha = 256 - (c >> 24); | ||
92 | |||
93 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
94 | const __m128i alpha_packed = _mm_set_epi32(alpha, alpha, alpha, alpha); | ||
95 | |||
96 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
97 | { /* UOP */ | ||
98 | |||
99 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
100 | d++; l--; | ||
101 | }, | ||
102 | { /* A4OP */ | ||
103 | |||
104 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
105 | |||
106 | __m128i mul0 = mul_256_sse3(alpha_packed, d0); | ||
107 | __m128i sym0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c_packed); | ||
108 | |||
109 | d0 = _mm_add_epi32(mul0, sym0); | ||
110 | |||
111 | _mm_store_si128((__m128i *)d, d0); | ||
112 | |||
113 | d += 4; l -= 4; | ||
114 | }, | ||
115 | { /* A8OP */ | ||
116 | |||
117 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
118 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
119 | |||
120 | __m128i mul0 = mul_256_sse3(alpha_packed, d0); | ||
121 | __m128i mul1 = mul_256_sse3(alpha_packed, d1); | ||
122 | |||
123 | __m128i sym0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c_packed); | ||
124 | __m128i sym1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), c_packed); | ||
125 | |||
126 | d0 = _mm_add_epi32(mul0, sym0); | ||
127 | d1 = _mm_add_epi32(mul1, sym1); | ||
128 | |||
129 | _mm_store_si128((__m128i *)d, d0); | ||
130 | _mm_store_si128((__m128i *)(d+4), d1); | ||
131 | |||
132 | d += 8; l -= 8; | ||
133 | }) | ||
134 | } | ||
135 | |||
136 | #define _op_blend_rel_caa_dp_sse3 _op_blend_rel_c_dp_sse3 | ||
137 | #define _op_blend_rel_c_dpan_sse3 _op_blend_c_dpan_sse3 | ||
138 | #define _op_blend_rel_caa_dpan_sse3 _op_blend_caa_dpan_sse3 | ||
139 | |||
140 | static void | ||
141 | init_blend_rel_color_span_funcs_sse3(void) | ||
142 | { | ||
143 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_c_dp_sse3; | ||
144 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_caa_dp_sse3; | ||
145 | |||
146 | op_blend_rel_span_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_c_dpan_sse3; | ||
147 | op_blend_rel_span_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_caa_dpan_sse3; | ||
148 | } | ||
149 | |||
150 | #define _op_blend_rel_pt_c_dp_sse3 NULL | ||
151 | #define _op_blend_rel_pt_caa_dp_sse3 _op_blend_rel_pt_c_dp_sse3 | ||
152 | |||
153 | #define _op_blend_rel_pt_c_dpan_sse3 _op_blend_pt_c_dpan_sse3 | ||
154 | #define _op_blend_rel_pt_caa_dpan_sse3 _op_blend_pt_caa_dpan_sse3 | ||
155 | |||
156 | static void | ||
157 | init_blend_rel_color_pt_funcs_sse3(void) | ||
158 | { | ||
159 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_c_dp_sse3; | ||
160 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_caa_dp_sse3; | ||
161 | |||
162 | op_blend_rel_pt_funcs[SP_N][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_c_dpan_sse3; | ||
163 | op_blend_rel_pt_funcs[SP_N][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_caa_dpan_sse3; | ||
164 | } | ||
165 | |||
166 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_.c new file mode 100644 index 0000000..12a0a75 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_.c | |||
@@ -0,0 +1,185 @@ | |||
1 | /* blend mask x color -> dst */ | ||
2 | |||
3 | #ifdef BUILD_C | ||
4 | static void | ||
5 | _op_blend_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e; | ||
7 | int alpha = 256 - (c >> 24); | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | DATA32 a = *m; | ||
11 | switch(a) | ||
12 | { | ||
13 | case 0: | ||
14 | break; | ||
15 | case 255: | ||
16 | *d = c + MUL_256(alpha, *d); | ||
17 | break; | ||
18 | default: | ||
19 | { | ||
20 | DATA32 mc = MUL_SYM(a, c); | ||
21 | a = 256 - (mc >> 24); | ||
22 | *d = mc + MUL_256(a, *d); | ||
23 | } | ||
24 | break; | ||
25 | } | ||
26 | m++; d++; | ||
27 | }); | ||
28 | } | ||
29 | |||
30 | static void | ||
31 | _op_blend_mas_can_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
32 | DATA32 *e; | ||
33 | int alpha; | ||
34 | UNROLL8_PLD_WHILE(d, l, e, | ||
35 | { | ||
36 | alpha = *m; | ||
37 | switch(alpha) | ||
38 | { | ||
39 | case 0: | ||
40 | break; | ||
41 | case 255: | ||
42 | *d = c; | ||
43 | break; | ||
44 | default: | ||
45 | alpha++; | ||
46 | *d = INTERP_256(alpha, c, *d); | ||
47 | break; | ||
48 | } | ||
49 | m++; d++; | ||
50 | }); | ||
51 | } | ||
52 | |||
53 | #define _op_blend_mas_cn_dp _op_blend_mas_can_dp | ||
54 | #define _op_blend_mas_caa_dp _op_blend_mas_c_dp | ||
55 | |||
56 | #define _op_blend_mas_c_dpan _op_blend_mas_c_dp | ||
57 | #define _op_blend_mas_cn_dpan _op_blend_mas_cn_dp | ||
58 | #define _op_blend_mas_can_dpan _op_blend_mas_can_dp | ||
59 | #define _op_blend_mas_caa_dpan _op_blend_mas_caa_dp | ||
60 | |||
61 | static void | ||
62 | init_blend_mask_color_span_funcs_c(void) | ||
63 | { | ||
64 | op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_C] = _op_blend_mas_c_dp; | ||
65 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_C] = _op_blend_mas_cn_dp; | ||
66 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_C] = _op_blend_mas_can_dp; | ||
67 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_C] = _op_blend_mas_caa_dp; | ||
68 | |||
69 | op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_C] = _op_blend_mas_c_dpan; | ||
70 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_mas_cn_dpan; | ||
71 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_C] = _op_blend_mas_can_dpan; | ||
72 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_C] = _op_blend_mas_caa_dpan; | ||
73 | } | ||
74 | #endif | ||
75 | |||
76 | #ifdef BUILD_C | ||
77 | static void | ||
78 | _op_blend_pt_mas_c_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
79 | s = MUL_SYM(m, c); | ||
80 | m = 256 - (s >> 24); | ||
81 | *d = s + MUL_256(m, *d); | ||
82 | } | ||
83 | |||
84 | static void | ||
85 | _op_blend_pt_mas_can_dp(DATA32 s __UNUSED__, DATA8 m, DATA32 c, DATA32 *d) { | ||
86 | *d = INTERP_256(m + 1, c, *d); | ||
87 | } | ||
88 | |||
89 | #define _op_blend_pt_mas_cn_dp _op_blend_pt_mas_can_dp | ||
90 | #define _op_blend_pt_mas_caa_dp _op_blend_pt_mas_c_dp | ||
91 | |||
92 | #define _op_blend_pt_mas_c_dpan _op_blend_pt_mas_c_dp | ||
93 | #define _op_blend_pt_mas_cn_dpan _op_blend_pt_mas_cn_dp | ||
94 | #define _op_blend_pt_mas_can_dpan _op_blend_pt_mas_can_dp | ||
95 | #define _op_blend_pt_mas_caa_dpan _op_blend_pt_mas_caa_dp | ||
96 | |||
97 | static void | ||
98 | init_blend_mask_color_pt_funcs_c(void) | ||
99 | { | ||
100 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_C] = _op_blend_pt_mas_c_dp; | ||
101 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_C] = _op_blend_pt_mas_cn_dp; | ||
102 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_C] = _op_blend_pt_mas_can_dp; | ||
103 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_C] = _op_blend_pt_mas_caa_dp; | ||
104 | |||
105 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_C] = _op_blend_pt_mas_c_dpan; | ||
106 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_pt_mas_cn_dpan; | ||
107 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_C] = _op_blend_pt_mas_can_dpan; | ||
108 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_C] = _op_blend_pt_mas_caa_dpan; | ||
109 | } | ||
110 | #endif | ||
111 | |||
112 | /*-----*/ | ||
113 | |||
114 | /* blend_rel mask x color --> dst */ | ||
115 | |||
116 | #ifdef BUILD_C | ||
117 | static void | ||
118 | _op_blend_rel_mas_c_dp(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
119 | DATA32 *e; | ||
120 | int alpha; | ||
121 | UNROLL8_PLD_WHILE(d, l, e, | ||
122 | { | ||
123 | DATA32 mc = MUL_SYM(*m, c); | ||
124 | alpha = 256 - (mc >> 24); | ||
125 | *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d); | ||
126 | d++; | ||
127 | m++; | ||
128 | }); | ||
129 | } | ||
130 | |||
131 | #define _op_blend_rel_mas_cn_dp _op_blend_rel_mas_c_dp | ||
132 | #define _op_blend_rel_mas_can_dp _op_blend_rel_mas_c_dp | ||
133 | #define _op_blend_rel_mas_caa_dp _op_blend_rel_mas_c_dp | ||
134 | |||
135 | #define _op_blend_rel_mas_c_dpan _op_blend_mas_c_dpan | ||
136 | #define _op_blend_rel_mas_cn_dpan _op_blend_mas_cn_dpan | ||
137 | #define _op_blend_rel_mas_can_dpan _op_blend_mas_can_dpan | ||
138 | #define _op_blend_rel_mas_caa_dpan _op_blend_mas_caa_dpan | ||
139 | |||
140 | static void | ||
141 | init_blend_rel_mask_color_span_funcs_c(void) | ||
142 | { | ||
143 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_C] = _op_blend_rel_mas_c_dp; | ||
144 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_mas_can_dp; | ||
145 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_C] = _op_blend_rel_mas_can_dp; | ||
146 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_C] = _op_blend_rel_mas_caa_dp; | ||
147 | |||
148 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_C] = _op_blend_rel_mas_c_dpan; | ||
149 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_mas_cn_dpan; | ||
150 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_C] = _op_blend_rel_mas_can_dpan; | ||
151 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_C] = _op_blend_rel_mas_caa_dpan; | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | #ifdef BUILD_C | ||
156 | static void | ||
157 | _op_blend_rel_pt_mas_c_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
158 | s = MUL_SYM(m, c); | ||
159 | c = 256 - (s >> 24); | ||
160 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
161 | } | ||
162 | |||
163 | #define _op_blend_rel_pt_mas_cn_dp _op_blend_rel_pt_mas_c_dp | ||
164 | #define _op_blend_rel_pt_mas_can_dp _op_blend_rel_pt_mas_c_dp | ||
165 | #define _op_blend_rel_pt_mas_caa_dp _op_blend_rel_pt_mas_c_dp | ||
166 | |||
167 | #define _op_blend_rel_pt_mas_c_dpan _op_blend_pt_mas_c_dpan | ||
168 | #define _op_blend_rel_pt_mas_cn_dpan _op_blend_pt_mas_cn_dpan | ||
169 | #define _op_blend_rel_pt_mas_can_dpan _op_blend_pt_mas_can_dpan | ||
170 | #define _op_blend_rel_pt_mas_caa_dpan _op_blend_pt_mas_caa_dpan | ||
171 | |||
172 | static void | ||
173 | init_blend_rel_mask_color_pt_funcs_c(void) | ||
174 | { | ||
175 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_C] = _op_blend_rel_pt_mas_c_dp; | ||
176 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_pt_mas_cn_dp; | ||
177 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_C] = _op_blend_rel_pt_mas_can_dp; | ||
178 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_C] = _op_blend_rel_pt_mas_caa_dp; | ||
179 | |||
180 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_C] = _op_blend_rel_pt_mas_c_dpan; | ||
181 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_mas_cn_dpan; | ||
182 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_C] = _op_blend_rel_pt_mas_can_dpan; | ||
183 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pt_mas_caa_dpan; | ||
184 | } | ||
185 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_i386.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_i386.c new file mode 100644 index 0000000..f8fe02e --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_i386.c | |||
@@ -0,0 +1,251 @@ | |||
1 | /* blend mask x color -> dst */ | ||
2 | |||
3 | #ifdef BUILD_MMX | ||
4 | static void | ||
5 | _op_blend_mas_c_dp_mmx(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e = d + l; | ||
7 | pxor_r2r(mm0, mm0); | ||
8 | MOV_A2R(ALPHA_256, mm6) | ||
9 | MOV_P2R(c, mm2, mm0) | ||
10 | c = 256 - (c >> 24); | ||
11 | MOV_A2R(c, mm4) | ||
12 | while (d < e) { | ||
13 | l = *m; | ||
14 | switch(l) | ||
15 | { | ||
16 | case 0: | ||
17 | break; | ||
18 | case 255: | ||
19 | MOV_P2R(*d, mm1, mm0) | ||
20 | MUL4_256_R2R(mm4, mm1) | ||
21 | paddw_r2r(mm2, mm1); | ||
22 | MOV_R2P(mm1, *d, mm0) | ||
23 | break; | ||
24 | default: | ||
25 | l++; | ||
26 | MOV_A2R(l, mm3) | ||
27 | MUL4_256_R2R(mm2, mm3) | ||
28 | |||
29 | MOV_RA2R(mm3, mm1) | ||
30 | movq_r2r(mm6, mm7); | ||
31 | psubw_r2r(mm1, mm7); | ||
32 | |||
33 | MOV_P2R(*d, mm1, mm0) | ||
34 | MUL4_256_R2R(mm7, mm1) | ||
35 | |||
36 | paddw_r2r(mm3, mm1); | ||
37 | MOV_R2P(mm1, *d, mm0) | ||
38 | break; | ||
39 | } | ||
40 | m++; d++; | ||
41 | } | ||
42 | } | ||
43 | |||
44 | static void | ||
45 | _op_blend_mas_can_dp_mmx(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
46 | DATA32 *e = d + l; | ||
47 | pxor_r2r(mm0, mm0); | ||
48 | MOV_P2R(c, mm2, mm0) | ||
49 | MOV_A2R(ALPHA_255, mm5) | ||
50 | while (d < e) { | ||
51 | l = *m; | ||
52 | switch(l) | ||
53 | { | ||
54 | case 0: | ||
55 | break; | ||
56 | case 255: | ||
57 | *d = c; | ||
58 | break; | ||
59 | default: | ||
60 | l++; | ||
61 | MOV_A2R(l, mm3) | ||
62 | MOV_P2R(*d, mm1, mm0) | ||
63 | movq_r2r(mm2, mm4); | ||
64 | INTERP_256_R2R(mm3, mm4, mm1, mm5) | ||
65 | MOV_R2P(mm1, *d, mm0) | ||
66 | break; | ||
67 | } | ||
68 | m++; d++; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | #define _op_blend_mas_cn_dp_mmx _op_blend_mas_can_dp_mmx | ||
73 | #define _op_blend_mas_caa_dp_mmx _op_blend_mas_c_dp_mmx | ||
74 | |||
75 | #define _op_blend_mas_c_dpan_mmx _op_blend_mas_c_dp_mmx | ||
76 | #define _op_blend_mas_cn_dpan_mmx _op_blend_mas_cn_dp_mmx | ||
77 | #define _op_blend_mas_can_dpan_mmx _op_blend_mas_can_dp_mmx | ||
78 | #define _op_blend_mas_caa_dpan_mmx _op_blend_mas_caa_dp_mmx | ||
79 | |||
80 | static void | ||
81 | init_blend_mask_color_span_funcs_mmx(void) | ||
82 | { | ||
83 | op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_MMX] = _op_blend_mas_c_dp_mmx; | ||
84 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_mas_cn_dp_mmx; | ||
85 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_MMX] = _op_blend_mas_can_dp_mmx; | ||
86 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_MMX] = _op_blend_mas_caa_dp_mmx; | ||
87 | |||
88 | op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_MMX] = _op_blend_mas_c_dpan_mmx; | ||
89 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_mas_cn_dpan_mmx; | ||
90 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_MMX] = _op_blend_mas_can_dpan_mmx; | ||
91 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_MMX] = _op_blend_mas_caa_dpan_mmx; | ||
92 | } | ||
93 | #endif | ||
94 | |||
95 | #ifdef BUILD_MMX | ||
96 | static void | ||
97 | _op_blend_pt_mas_c_dp_mmx(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
98 | s = m + 1; | ||
99 | MOV_A2R(s, mm3) | ||
100 | MOV_A2R(ALPHA_256, mm6) | ||
101 | pxor_r2r(mm0, mm0); | ||
102 | MOV_P2R(c, mm2, mm0) | ||
103 | MUL4_256_R2R(mm2, mm3) | ||
104 | |||
105 | MOV_RA2R(mm3, mm1) | ||
106 | psubw_r2r(mm1, mm6); | ||
107 | |||
108 | MOV_P2R(*d, mm1, mm0) | ||
109 | MUL4_256_R2R(mm6, mm1) | ||
110 | |||
111 | paddw_r2r(mm3, mm1); | ||
112 | MOV_R2P(mm1, *d, mm0) | ||
113 | } | ||
114 | |||
115 | |||
116 | #define _op_blend_pt_mas_cn_dp_mmx _op_blend_pt_mas_c_dp_mmx | ||
117 | #define _op_blend_pt_mas_can_dp_mmx _op_blend_pt_mas_c_dp_mmx | ||
118 | #define _op_blend_pt_mas_caa_dp_mmx _op_blend_pt_mas_c_dp_mmx | ||
119 | |||
120 | #define _op_blend_pt_mas_c_dpan_mmx _op_blend_pt_mas_c_dp_mmx | ||
121 | #define _op_blend_pt_mas_cn_dpan_mmx _op_blend_pt_mas_cn_dp_mmx | ||
122 | #define _op_blend_pt_mas_can_dpan_mmx _op_blend_pt_mas_can_dp_mmx | ||
123 | #define _op_blend_pt_mas_caa_dpan_mmx _op_blend_pt_mas_caa_dp_mmx | ||
124 | |||
125 | static void | ||
126 | init_blend_mask_color_pt_funcs_mmx(void) | ||
127 | { | ||
128 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_MMX] = _op_blend_pt_mas_c_dp_mmx; | ||
129 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_pt_mas_cn_dp_mmx; | ||
130 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_MMX] = _op_blend_pt_mas_can_dp_mmx; | ||
131 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_MMX] = _op_blend_pt_mas_caa_dp_mmx; | ||
132 | |||
133 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_MMX] = _op_blend_pt_mas_c_dpan_mmx; | ||
134 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_mas_cn_dpan_mmx; | ||
135 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_MMX] = _op_blend_pt_mas_can_dpan_mmx; | ||
136 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_MMX] = _op_blend_pt_mas_caa_dpan_mmx; | ||
137 | } | ||
138 | #endif | ||
139 | |||
140 | /*-----*/ | ||
141 | |||
142 | /* blend_rel mask x color -> dst */ | ||
143 | |||
144 | #ifdef BUILD_MMX | ||
145 | static void | ||
146 | _op_blend_rel_mas_c_dp_mmx(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
147 | DATA32 *e = d + l; | ||
148 | pxor_r2r(mm0, mm0); | ||
149 | MOV_A2R(ALPHA_256, mm6) | ||
150 | MOV_A2R(ALPHA_255, mm5) | ||
151 | MOV_P2R(c, mm2, mm0) | ||
152 | while (d < e) { | ||
153 | l = *m; | ||
154 | switch(l) | ||
155 | { | ||
156 | case 0: | ||
157 | break; | ||
158 | default: | ||
159 | l++; | ||
160 | MOV_A2R(l, mm3) | ||
161 | MUL4_256_R2R(mm2, mm3) | ||
162 | |||
163 | MOV_RA2R(mm3, mm1) | ||
164 | movq_r2r(mm6, mm7); | ||
165 | psubw_r2r(mm1, mm7); | ||
166 | |||
167 | MOV_P2R(*d, mm1, mm0) | ||
168 | MOV_RA2R(mm1, mm4) | ||
169 | MUL4_256_R2R(mm7, mm1) | ||
170 | |||
171 | MUL4_SYM_R2R(mm4, mm3, mm5) | ||
172 | |||
173 | paddw_r2r(mm3, mm1); | ||
174 | MOV_R2P(mm1, *d, mm0) | ||
175 | break; | ||
176 | } | ||
177 | m++; d++; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | #define _op_blend_rel_mas_cn_dp_mmx _op_blend_rel_mas_c_dp_mmx | ||
182 | #define _op_blend_rel_mas_can_dp_mmx _op_blend_rel_mas_c_dp_mmx | ||
183 | #define _op_blend_rel_mas_caa_dp_mmx _op_blend_rel_mas_c_dp_mmx | ||
184 | |||
185 | #define _op_blend_rel_mas_c_dpan_mmx _op_blend_mas_c_dpan_mmx | ||
186 | #define _op_blend_rel_mas_cn_dpan_mmx _op_blend_mas_cn_dpan_mmx | ||
187 | #define _op_blend_rel_mas_can_dpan_mmx _op_blend_mas_can_dpan_mmx | ||
188 | #define _op_blend_rel_mas_caa_dpan_mmx _op_blend_mas_caa_dpan_mmx | ||
189 | |||
190 | static void | ||
191 | init_blend_rel_mask_color_span_funcs_mmx(void) | ||
192 | { | ||
193 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_MMX] = _op_blend_rel_mas_c_dp_mmx; | ||
194 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_rel_mas_cn_dp_mmx; | ||
195 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_MMX] = _op_blend_rel_mas_can_dp_mmx; | ||
196 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_MMX] = _op_blend_rel_mas_caa_dp_mmx; | ||
197 | |||
198 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_MMX] = _op_blend_rel_mas_c_dpan_mmx; | ||
199 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_mas_cn_dpan_mmx; | ||
200 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_mas_can_dpan_mmx; | ||
201 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_mas_caa_dpan_mmx; | ||
202 | } | ||
203 | #endif | ||
204 | |||
205 | #ifdef BUILD_MMX | ||
206 | static void | ||
207 | _op_blend_rel_pt_mas_c_dp_mmx(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
208 | pxor_r2r(mm0, mm0); | ||
209 | MOV_A2R(ALPHA_256, mm6) | ||
210 | MOV_A2R(ALPHA_255, mm5) | ||
211 | s = m + 1; | ||
212 | MOV_A2R(s, mm3) | ||
213 | MOV_P2R(c, mm2, mm0) | ||
214 | MUL4_256_R2R(mm2, mm3) | ||
215 | |||
216 | MOV_RA2R(mm3, mm1) | ||
217 | psubw_r2r(mm1, mm6); | ||
218 | |||
219 | MOV_P2R(*d, mm1, mm0) | ||
220 | MOV_RA2R(mm1, mm4) | ||
221 | MUL4_256_R2R(mm6, mm1) | ||
222 | |||
223 | MUL4_SYM_R2R(mm4, mm3, mm5) | ||
224 | |||
225 | paddw_r2r(mm3, mm1); | ||
226 | MOV_R2P(mm1, *d, mm0) | ||
227 | } | ||
228 | |||
229 | #define _op_blend_rel_pt_mas_cn_dp_mmx _op_blend_rel_pt_mas_c_dp_mmx | ||
230 | #define _op_blend_rel_pt_mas_can_dp_mmx _op_blend_rel_pt_mas_c_dp_mmx | ||
231 | #define _op_blend_rel_pt_mas_caa_dp_mmx _op_blend_rel_pt_mas_c_dp_mmx | ||
232 | |||
233 | #define _op_blend_rel_pt_mas_c_dpan_mmx _op_blend_pt_mas_c_dpan_mmx | ||
234 | #define _op_blend_rel_pt_mas_cn_dpan_mmx _op_blend_pt_mas_cn_dpan_mmx | ||
235 | #define _op_blend_rel_pt_mas_can_dpan_mmx _op_blend_pt_mas_can_dpan_mmx | ||
236 | #define _op_blend_rel_pt_mas_caa_dpan_mmx _op_blend_pt_mas_caa_dpan_mmx | ||
237 | |||
238 | static void | ||
239 | init_blend_rel_mask_color_pt_funcs_mmx(void) | ||
240 | { | ||
241 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_MMX] = _op_blend_rel_pt_mas_c_dp_mmx; | ||
242 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_rel_pt_mas_cn_dp_mmx; | ||
243 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_MMX] = _op_blend_rel_pt_mas_can_dp_mmx; | ||
244 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_MMX] = _op_blend_rel_pt_mas_caa_dp_mmx; | ||
245 | |||
246 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_MMX] = _op_blend_rel_pt_mas_c_dpan_mmx; | ||
247 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_mas_cn_dpan_mmx; | ||
248 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_pt_mas_can_dpan_mmx; | ||
249 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pt_mas_caa_dpan_mmx; | ||
250 | } | ||
251 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c new file mode 100644 index 0000000..f5eb480 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_neon.c | |||
@@ -0,0 +1,562 @@ | |||
1 | #define NEONDEBUG 0 | ||
2 | |||
3 | |||
4 | #if NEONDEBUG | ||
5 | #define DEBUG_FNCOUNT(x) \ | ||
6 | do { \ | ||
7 | static int _foo = 0; \ | ||
8 | if (_foo++%10000 ==0) \ | ||
9 | printf("%s %+d %s: %d (%s)\n",__FILE__,__LINE__,__FUNCTION__,\ | ||
10 | _foo, x " optimised");\ | ||
11 | } while (0) | ||
12 | #else | ||
13 | #define DEBUG_FNCOUNT(x) ((void)x) | ||
14 | #endif | ||
15 | |||
16 | |||
17 | /* blend mask x color -> dst */ | ||
18 | |||
19 | #ifdef BUILD_NEON | ||
20 | static void | ||
21 | _op_blend_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
22 | DATA32 *e; | ||
23 | |||
24 | DEBUG_FNCOUNT(""); | ||
25 | |||
26 | #define AP "blend_mas_c_dp_" | ||
27 | asm volatile ( | ||
28 | ".fpu neon \n\t" | ||
29 | " vdup.i32 q15, %[c] \n\t" | ||
30 | " vmov.i8 q14, #1 \n\t" | ||
31 | |||
32 | // If aligned already - straight to quads | ||
33 | " andS %[tmp], %[d],$0xf \n\t" | ||
34 | " beq "AP"quadloops \n\t" | ||
35 | |||
36 | " andS %[tmp], %[d],$0x4 \n\t" | ||
37 | " beq "AP"dualloop \n\t" | ||
38 | |||
39 | AP"singleloop: \n\t" | ||
40 | " vld1.8 d0[0], [%[m]]! \n\t" | ||
41 | " vld1.32 d4[0], [%[d]] \n\t" | ||
42 | " vdup.u8 d0, d0[0] \n\t" | ||
43 | " vmull.u8 q4, d0, d30 \n\t" | ||
44 | " vqrshrn.u16 d12, q4, #8 \n\t" | ||
45 | " vmvn.u16 d14, d12 \n\t" | ||
46 | " vshr.u32 d16, d14, #24 \n\t" | ||
47 | " vmul.u32 d16, d16, d28 \n\t" | ||
48 | " vmull.u8 q7, d16, d4 \n\t" | ||
49 | " vqrshrn.u16 d0, q7, #8 \n\t" | ||
50 | " vqadd.u8 d0, d0, d12 \n\t" | ||
51 | " vst1.32 d0[0], [%[d]]! \n\t" | ||
52 | |||
53 | // Can we go the fast path? | ||
54 | " andS %[tmp], %[d],$0xf \n\t" | ||
55 | " beq "AP"quadloops \n\t" | ||
56 | |||
57 | AP"dualloop: \n\t" | ||
58 | " sub %[tmp], %[e], %[d] \n\t" | ||
59 | " cmp %[tmp], #16 \n\t" | ||
60 | " blt "AP"loopout \n\t" | ||
61 | |||
62 | " vld1.16 d0[0], [%[m]]! \n\t" | ||
63 | " vldm %[d], {d4} \n\t" | ||
64 | " vmovl.u8 q0, d0 \n\t" | ||
65 | " vmovl.u8 q0, d0 \n\t" | ||
66 | " vmul.u32 q0, q14 \n\t" | ||
67 | " vmull.u8 q4, d0, d30 \n\t" | ||
68 | " vqrshrn.u16 d12, q4, #8 \n\t" | ||
69 | " vmvn.u16 d14, d12 \n\t" | ||
70 | " vshr.u32 d16, d14, #24 \n\t" | ||
71 | " vmul.u32 d16, d16, d28 \n\t" | ||
72 | " vmull.u8 q7, d16, d4 \n\t" | ||
73 | " vqrshrn.u16 d0, q7, #8 \n\t" | ||
74 | " vqadd.u8 q0, q0, q6 \n\t" | ||
75 | " vstm %[d]!, {d0} \n\t" | ||
76 | |||
77 | AP"quadloops: \n\t" | ||
78 | " sub %[tmp], %[e], %[d] \n\t" | ||
79 | " cmp %[tmp], #16 \n\t" | ||
80 | " blt "AP"loopout \n\t" | ||
81 | |||
82 | |||
83 | " sub %[tmp], %[e], #15 \n\t" | ||
84 | |||
85 | " sub %[d], #16 \n\t" | ||
86 | AP"fastloop:" | ||
87 | " add %[d], #16 \n\t" | ||
88 | " cmp %[tmp], %[d] \n\t" | ||
89 | " ble "AP"loopout \n\t" | ||
90 | AP"quadloopint: \n\t" | ||
91 | " ldr %[x], [%[m]] \n\t" | ||
92 | " add %[m], #4 \n\t" | ||
93 | " cmp %[x], #0 \n\t" | ||
94 | " beq "AP"fastloop \n\t" | ||
95 | " vmov.32 d0[0], %[x] \n\t" | ||
96 | " vldm %[d], {d4,d5} \n\t" | ||
97 | |||
98 | // Expand M: Fixme: Can we do this quicker? | ||
99 | " vmovl.u8 q0, d0 \n\t" | ||
100 | " vmovl.u8 q0, d0 \n\t" | ||
101 | " vmul.u32 q0, q14 \n\t" | ||
102 | |||
103 | // Multiply a * c | ||
104 | " vmull.u8 q4, d0, d30 \n\t" | ||
105 | " vmull.u8 q5, d1, d31 \n\t" | ||
106 | |||
107 | // Shorten | ||
108 | " vqrshrn.u16 d12, q4, #8 \n\t" | ||
109 | " vqrshrn.u16 d13, q5, #8 \n\t" | ||
110 | |||
111 | // extract negated alpha | ||
112 | " vmvn.u16 q7, q6 \n\t" | ||
113 | " vshr.u32 q8, q7, #24 \n\t" | ||
114 | " vmul.u32 q8, q8, q14 \n\t" | ||
115 | |||
116 | // Multiply | ||
117 | " vmull.u8 q7, d16, d4 \n\t" | ||
118 | " vmull.u8 q8, d17, d5 \n\t" | ||
119 | |||
120 | " vqrshrn.u16 d0, q7, #8 \n\t" | ||
121 | " vqrshrn.u16 d1, q8, #8 \n\t" | ||
122 | |||
123 | // Add | ||
124 | " vqadd.u8 q0, q0, q6 \n\t" | ||
125 | |||
126 | " vstm %[d]!, {d0,d1} \n\t" | ||
127 | |||
128 | " cmp %[tmp], %[d] \n\t" | ||
129 | " bhi "AP"quadloopint \n\t" | ||
130 | |||
131 | AP"loopout: \n\t" | ||
132 | #if NEONDEBUG | ||
133 | "cmp %[d], %[e] \n\t" | ||
134 | "ble "AP"foo \n\t" | ||
135 | "cmp %[tmp], %[m] \n\t" | ||
136 | "sub %[x], %[x] \n\t" | ||
137 | "vst1.32 d0[0], [%[x]] \n\t" | ||
138 | AP"foo: \n\t" | ||
139 | #endif | ||
140 | |||
141 | " cmp %[d], %[e] \n\t" | ||
142 | " beq "AP"done \n\t" | ||
143 | " sub %[tmp],%[e], %[d] \n\t" | ||
144 | " cmp %[tmp],#4 \n\t" | ||
145 | " beq "AP"singleout \n\t" | ||
146 | |||
147 | AP "dualloop2: \n\t" | ||
148 | "sub %[tmp],%[e],$0x8 \n\t" | ||
149 | " vld1.16 d0[0], [%[m]]! \n\t" | ||
150 | " vldm %[d], {d4} \n\t" | ||
151 | " vmovl.u8 q0, d0 \n\t" | ||
152 | " vmovl.u8 q0, d0 \n\t" | ||
153 | " vmul.u32 q0, q14 \n\t" | ||
154 | " vmull.u8 q4, d0, d30 \n\t" | ||
155 | " vqrshrn.u16 d12, q4, #8 \n\t" | ||
156 | " vmvn.u16 d14, d12 \n\t" | ||
157 | " vshr.u32 d16, d14, #24 \n\t" | ||
158 | " vmul.u32 d16, d16, d28 \n\t" | ||
159 | " vmull.u8 q7, d16, d4 \n\t" | ||
160 | " vqrshrn.u16 d0, q7, #8 \n\t" | ||
161 | " vqadd.u8 q0, q0, q6 \n\t" | ||
162 | " vstm %[d]!, {d0} \n\t" | ||
163 | |||
164 | " cmp %[e], %[d] \n\t" | ||
165 | " beq "AP"done \n\t" | ||
166 | |||
167 | AP"singleout: \n\t" | ||
168 | " vld1.8 d0[0], [%[m]]! \n\t" | ||
169 | " vld1.32 d4[0], [%[d]] \n\t" | ||
170 | " vdup.u8 d0, d0[0] \n\t" | ||
171 | " vmull.u8 q4, d0, d30 \n\t" | ||
172 | " vqrshrn.u16 d12, q4, #8 \n\t" | ||
173 | " vmvn.u16 d14, d12 \n\t" | ||
174 | " vshr.u32 d16, d14, #24 \n\t" | ||
175 | " vmul.u32 d16, d16, d28 \n\t" | ||
176 | " vmull.u8 q7, d16, d4 \n\t" | ||
177 | " vqrshrn.u16 d0, q7, #8 \n\t" | ||
178 | " vqadd.u8 q0, q0, q6 \n\t" | ||
179 | " vst1.32 d0[0], [%[d]]! \n\t" | ||
180 | |||
181 | AP"done: \n\t" | ||
182 | #if NEONDEBUG | ||
183 | "cmp %[d], %[e] \n\t" | ||
184 | "beq "AP"reallydone \n\t" | ||
185 | "sub %[tmp], %[tmp] \n\t" | ||
186 | "vst1.32 d0[0], [%[tmp]] \n\t" | ||
187 | AP"reallydone:" | ||
188 | #endif | ||
189 | : // Out | ||
190 | : [e] "r" (d + l), [d] "r" (d), [c] "r" (c), | ||
191 | [tmp] "r" (7), [m] "r" (m), [x] "r" (0) | ||
192 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","q14","q15", | ||
193 | "memory" // clobbered | ||
194 | ); | ||
195 | #undef AP | ||
196 | } | ||
197 | #endif | ||
198 | |||
199 | #ifdef BUILD_NEON | ||
200 | static void | ||
201 | _op_blend_mas_can_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
202 | DATA32 *e,*tmp; | ||
203 | int alpha; | ||
204 | |||
205 | DEBUG_FNCOUNT(""); | ||
206 | |||
207 | #define AP "_blend_mas_can_dp_neon_" | ||
208 | asm volatile ( | ||
209 | ".fpu neon \n\t" | ||
210 | "vdup.u32 q9, %[c] \n\t" | ||
211 | "vmov.i8 q15, #1 \n\t" | ||
212 | "vmov.i8 q14, #0 \n\t" | ||
213 | |||
214 | // Make C 16 bit (C in q3/q2) | ||
215 | "vmovl.u8 q3, d19 \n\t" | ||
216 | "vmovl.u8 q2, d18 \n\t" | ||
217 | |||
218 | // Which loop to start | ||
219 | " andS %[tmp], %[d],$0xf \n\t" | ||
220 | " beq "AP"quadloop \n\t" | ||
221 | |||
222 | " andS %[tmp], %[d], #4 \n\t" | ||
223 | " beq "AP"dualstart \n\t" | ||
224 | |||
225 | |||
226 | AP"singleloop: \n\t" | ||
227 | " vld1.8 d0[0], [%[m]]! \n\t" | ||
228 | " vld1.32 d8[0], [%[d]] \n\t" | ||
229 | " vdup.u8 d0, d0[0] \n\t" | ||
230 | " vshr.u8 d0, d0, #1 \n\t" | ||
231 | " vmovl.u8 q0, d0 \n\t" | ||
232 | " vmovl.u8 q4, d8 \n\t" | ||
233 | " vsub.s16 q6, q2, q4 \n\t" | ||
234 | " vmul.s16 q6, q0 \n\t" | ||
235 | " vshr.s16 q6, #7 \n\t" | ||
236 | " vadd.s16 q6, q4 \n\t" | ||
237 | " vqmovun.s16 d2, q6 \n\t" | ||
238 | " vst1.32 d2[0], [%[d]]! \n\t" | ||
239 | |||
240 | " andS %[tmp], %[d], $0xf \n\t" | ||
241 | " beq "AP"quadloop \n\t" | ||
242 | |||
243 | AP"dualstart: \n\t" | ||
244 | " sub %[tmp], %[e], %[d] \n\t" | ||
245 | " cmp %[tmp], #16 \n\t" | ||
246 | " blt "AP"loopout \n\t" | ||
247 | |||
248 | AP"dualloop: \n\t" | ||
249 | " vld1.16 d0[0], [%[m]]! \n\t" | ||
250 | " vldm %[d], {d8} \n\t" | ||
251 | " vmovl.u8 q0, d0 \n\t" | ||
252 | " vmovl.u8 q0, d0 \n\t" | ||
253 | " vmul.u32 d0, d0, d30 \n\t" | ||
254 | " vshr.u8 d0, d0, #1 \n\t" | ||
255 | " vmovl.u8 q0, d0 \n\t" | ||
256 | " vmovl.u8 q4, d8 \n\t" | ||
257 | " vsub.s16 q6, q2, q4 \n\t" | ||
258 | " vmul.s16 q6, q0 \n\t" | ||
259 | " vshr.s16 q6, #7 \n\t" | ||
260 | " vadd.s16 q6, q4 \n\t" | ||
261 | " vqmovun.s16 d2, q6 \n\t" | ||
262 | " vstm %[d]!, {d2} \n\t" | ||
263 | |||
264 | AP"quadloop: \n\t" | ||
265 | " sub %[tmp], %[e], %[d] \n\t" | ||
266 | " cmp %[tmp], #16 \n\t" | ||
267 | " blt "AP"loopout \n\t" | ||
268 | " sub %[tmp], %[e], #15 \n\t" | ||
269 | |||
270 | " sub %[d], #16 \n\t" | ||
271 | AP"fastloop: \n\t" | ||
272 | " add %[d], #16 \n\t" | ||
273 | " cmp %[tmp], %[d] \n\t" | ||
274 | " blt "AP"loopout \n\t" | ||
275 | |||
276 | AP"quadloopint: \n\t" | ||
277 | // Load the mask: 4 bytes: It has d0/d1 | ||
278 | " ldr %[x], [%[m]] \n\t" | ||
279 | " add %[m], #4 \n\t" | ||
280 | |||
281 | // Check for shortcuts | ||
282 | " cmp %[x], #0 \n\t" | ||
283 | " beq "AP"fastloop \n\t" | ||
284 | |||
285 | " cmp %[x], $0xffffffff \n\t" | ||
286 | " beq "AP"quadstore \n\t" | ||
287 | |||
288 | " vmov.32 d0[0], %[x] \n\t" | ||
289 | // Load d into d8/d9 q4 | ||
290 | " vldm %[d], {d8,d9} \n\t" | ||
291 | |||
292 | // Get the alpha channel ready (m) | ||
293 | " vmovl.u8 q0, d0 \n\t" | ||
294 | " vmovl.u8 q0, d0 \n\t" | ||
295 | " vmul.u32 q0, q0,q15 \n\t" | ||
296 | // Lop a bit off to prevent overflow | ||
297 | " vshr.u8 q0, q0, #1 \n\t" | ||
298 | |||
299 | // Now make it 16 bit | ||
300 | " vmovl.u8 q1, d1 \n\t" | ||
301 | " vmovl.u8 q0, d0 \n\t" | ||
302 | |||
303 | // 16 bit 'd' | ||
304 | " vmovl.u8 q5, d9 \n\t" | ||
305 | " vmovl.u8 q4, d8 \n\t" | ||
306 | |||
307 | // Diff 'd' & 'c' | ||
308 | " vsub.s16 q7, q3, q5 \n\t" | ||
309 | " vsub.s16 q6, q2, q4 \n\t" | ||
310 | |||
311 | " vmul.s16 q7, q1 \n\t" | ||
312 | " vmul.s16 q6, q0 \n\t" | ||
313 | |||
314 | // Shift results a bit | ||
315 | " vshr.s16 q7, #7 \n\t" | ||
316 | " vshr.s16 q6, #7 \n\t" | ||
317 | |||
318 | // Add 'd' | ||
319 | " vadd.s16 q7, q5 \n\t" | ||
320 | " vadd.s16 q6, q4 \n\t" | ||
321 | |||
322 | // Make sure none are negative | ||
323 | " vqmovun.s16 d9, q7 \n\t" | ||
324 | " vqmovun.s16 d8, q6 \n\t" | ||
325 | |||
326 | " vstm %[d]!, {d8,d9} \n\t" | ||
327 | |||
328 | " cmp %[tmp], %[d] \n\t" | ||
329 | " bhi "AP"quadloopint \n\t" | ||
330 | " b "AP"loopout \n\t" | ||
331 | |||
332 | AP"quadstore: \n\t" | ||
333 | " vstm %[d]!, {d18,d19} \n\t" | ||
334 | " cmp %[tmp], %[d] \n\t" | ||
335 | " bhi "AP"quadloopint \n\t" | ||
336 | |||
337 | AP"loopout: \n\t" | ||
338 | #if NEONDEBUG | ||
339 | "cmp %[d], %[e] \n\t" | ||
340 | "ble "AP"foo \n\t" | ||
341 | "sub %[tmp], %[tmp] \n\t" | ||
342 | "vst1.32 d0[0], [%[tmp]] \n\t" | ||
343 | AP"foo: \n\t" | ||
344 | #endif | ||
345 | |||
346 | " cmp %[e], %[d] \n\t" | ||
347 | " beq "AP"done \n\t" | ||
348 | |||
349 | " sub %[tmp],%[e], %[d] \n\t" | ||
350 | " cmp %[tmp],#8 \n\t" | ||
351 | |||
352 | " blt "AP"onebyte \n\t" | ||
353 | |||
354 | // Load the mask: 2 bytes: It has d0 | ||
355 | " vld1.16 d0[0], [%[m]]! \n\t" | ||
356 | |||
357 | // Load d into d8/d9 q4 | ||
358 | " vldm %[d], {d8} \n\t" | ||
359 | |||
360 | // Get the alpha channel ready (m) | ||
361 | " vmovl.u8 q0, d0 \n\t" | ||
362 | " vmovl.u8 q0, d0 \n\t" | ||
363 | " vmul.u32 d0, d0, d30 \n\t" | ||
364 | // Lop a bit off to prevent overflow | ||
365 | " vshr.u8 d0, d0, #1 \n\t" | ||
366 | |||
367 | // Now make it 16 bit | ||
368 | " vmovl.u8 q0, d0 \n\t" | ||
369 | |||
370 | // 16 bit 'd' | ||
371 | " vmovl.u8 q4, d8 \n\t" | ||
372 | |||
373 | // Diff 'd' & 'c' | ||
374 | " vsub.s16 q6, q2, q4 \n\t" | ||
375 | |||
376 | " vmul.s16 q6, q0 \n\t" | ||
377 | |||
378 | // Shift results a bit | ||
379 | " vshr.s16 q6, #7 \n\t" | ||
380 | |||
381 | // Add 'd' | ||
382 | "vadd.s16 q6, q4 \n\t" | ||
383 | |||
384 | // Make sure none are negative | ||
385 | "vqmovun.s16 d2, q6 \n\t" | ||
386 | |||
387 | "vstm %[d]!, {d2} \n\t" | ||
388 | |||
389 | "cmp %[e], %[d] \n\t" | ||
390 | "beq "AP"done \n\t" | ||
391 | |||
392 | AP"onebyte: \n\t" | ||
393 | "vld1.8 d0[0], [%[m]]! \n\t" | ||
394 | "vld1.32 d8[0], [%[d]] \n\t" | ||
395 | "vdup.u8 d0, d0[0] \n\t" | ||
396 | "vshr.u8 d0, d0, #1 \n\t" | ||
397 | "vmovl.u8 q0, d0 \n\t" | ||
398 | "vmovl.u8 q4, d8 \n\t" | ||
399 | "vsub.s16 q6, q2, q4 \n\t" | ||
400 | "vmul.s16 q6, q0 \n\t" | ||
401 | "vshr.s16 q6, #7 \n\t" | ||
402 | "vadd.s16 q6, q4 \n\t" | ||
403 | "vqmovun.s16 d2, q6 \n\t" | ||
404 | "vst1.32 d2[0], [%[d]]! \n\t" | ||
405 | |||
406 | |||
407 | AP"done: \n\t" | ||
408 | #if NEONDEBUG | ||
409 | "cmp %[d], %[e] \n\t" | ||
410 | "beq "AP"reallydone \n\t" | ||
411 | "sub %[m], %[m] \n\t" | ||
412 | "vst1.32 d0[0], [%[m]] \n\t" | ||
413 | AP"reallydone:" | ||
414 | #endif | ||
415 | |||
416 | |||
417 | : // output regs | ||
418 | // Input | ||
419 | : [e] "r" (e = d + l), [d] "r" (d), [c] "r" (c), | ||
420 | [m] "r" (m), [tmp] "r" (7), [x] "r" (33) | ||
421 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q9","q14","q15", | ||
422 | "memory" // clobbered | ||
423 | |||
424 | ); | ||
425 | #undef AP | ||
426 | } | ||
427 | #endif | ||
428 | |||
429 | #ifdef BUILD_NEON | ||
430 | #define _op_blend_mas_cn_dp_neon _op_blend_mas_can_dp_neon | ||
431 | #define _op_blend_mas_caa_dp_neon _op_blend_mas_c_dp_neon | ||
432 | |||
433 | #define _op_blend_mas_c_dpan_neon _op_blend_mas_c_dp_neon | ||
434 | #define _op_blend_mas_cn_dpan_neon _op_blend_mas_cn_dp_neon | ||
435 | #define _op_blend_mas_can_dpan_neon _op_blend_mas_can_dp_neon | ||
436 | #define _op_blend_mas_caa_dpan_neon _op_blend_mas_caa_dp_neon | ||
437 | |||
438 | static void | ||
439 | init_blend_mask_color_span_funcs_neon(void) | ||
440 | { | ||
441 | op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_mas_c_dp_neon; | ||
442 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_mas_cn_dp_neon; | ||
443 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_mas_can_dp_neon; | ||
444 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_mas_caa_dp_neon; | ||
445 | |||
446 | op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_mas_c_dpan_neon; | ||
447 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_mas_cn_dpan_neon; | ||
448 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_mas_can_dpan_neon; | ||
449 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_mas_caa_dpan_neon; | ||
450 | } | ||
451 | #endif | ||
452 | |||
453 | #ifdef BUILD_NEON | ||
454 | static void | ||
455 | _op_blend_pt_mas_c_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
456 | s = MUL_SYM(m, c); | ||
457 | c = 256 - (s >> 24); | ||
458 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
459 | } | ||
460 | |||
461 | |||
462 | #define _op_blend_pt_mas_cn_dp_neon _op_blend_pt_mas_c_dp_neon | ||
463 | #define _op_blend_pt_mas_can_dp_neon _op_blend_pt_mas_c_dp_neon | ||
464 | #define _op_blend_pt_mas_caa_dp_neon _op_blend_pt_mas_c_dp_neon | ||
465 | |||
466 | #define _op_blend_pt_mas_c_dpan_neon _op_blend_pt_mas_c_dp_neon | ||
467 | #define _op_blend_pt_mas_cn_dpan_neon _op_blend_pt_mas_cn_dp_neon | ||
468 | #define _op_blend_pt_mas_can_dpan_neon _op_blend_pt_mas_can_dp_neon | ||
469 | #define _op_blend_pt_mas_caa_dpan_neon _op_blend_pt_mas_caa_dp_neon | ||
470 | |||
471 | static void | ||
472 | init_blend_mask_color_pt_funcs_neon(void) | ||
473 | { | ||
474 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_pt_mas_c_dp_neon; | ||
475 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pt_mas_cn_dp_neon; | ||
476 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_pt_mas_can_dp_neon; | ||
477 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_pt_mas_caa_dp_neon; | ||
478 | |||
479 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_pt_mas_c_dpan_neon; | ||
480 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_mas_cn_dpan_neon; | ||
481 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_mas_can_dpan_neon; | ||
482 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_mas_caa_dpan_neon; | ||
483 | } | ||
484 | #endif | ||
485 | |||
486 | /*-----*/ | ||
487 | |||
488 | /* blend_rel mask x color -> dst */ | ||
489 | |||
490 | #ifdef BUILD_NEON | ||
491 | static void | ||
492 | _op_blend_rel_mas_c_dp_neon(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
493 | DATA32 *e; | ||
494 | int alpha; | ||
495 | |||
496 | DEBUG_FNCOUNT("not"); | ||
497 | |||
498 | UNROLL8_PLD_WHILE(d, l, e, | ||
499 | { | ||
500 | DATA32 mc = MUL_SYM(*m, c); | ||
501 | alpha = 256 - (mc >> 24); | ||
502 | *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d); | ||
503 | d++; | ||
504 | m++; | ||
505 | }); | ||
506 | } | ||
507 | |||
508 | #define _op_blend_rel_mas_cn_dp_neon _op_blend_rel_mas_c_dp_neon | ||
509 | #define _op_blend_rel_mas_can_dp_neon _op_blend_rel_mas_c_dp_neon | ||
510 | #define _op_blend_rel_mas_caa_dp_neon _op_blend_rel_mas_c_dp_neon | ||
511 | |||
512 | #define _op_blend_rel_mas_c_dpan_neon _op_blend_mas_c_dpan_neon | ||
513 | #define _op_blend_rel_mas_cn_dpan_neon _op_blend_mas_cn_dpan_neon | ||
514 | #define _op_blend_rel_mas_can_dpan_neon _op_blend_mas_can_dpan_neon | ||
515 | #define _op_blend_rel_mas_caa_dpan_neon _op_blend_mas_caa_dpan_neon | ||
516 | |||
517 | static void | ||
518 | init_blend_rel_mask_color_span_funcs_neon(void) | ||
519 | { | ||
520 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_rel_mas_c_dp_neon; | ||
521 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_rel_mas_cn_dp_neon; | ||
522 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_rel_mas_can_dp_neon; | ||
523 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_rel_mas_caa_dp_neon; | ||
524 | |||
525 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_rel_mas_c_dpan_neon; | ||
526 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_mas_cn_dpan_neon; | ||
527 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_mas_can_dpan_neon; | ||
528 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_mas_caa_dpan_neon; | ||
529 | } | ||
530 | #endif | ||
531 | |||
532 | #ifdef BUILD_NEON | ||
533 | static void | ||
534 | _op_blend_rel_pt_mas_c_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
535 | s = MUL_SYM(m, c); | ||
536 | c = 256 - (s >> 24); | ||
537 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
538 | } | ||
539 | |||
540 | #define _op_blend_rel_pt_mas_cn_dp_neon _op_blend_rel_pt_mas_c_dp_neon | ||
541 | #define _op_blend_rel_pt_mas_can_dp_neon _op_blend_rel_pt_mas_c_dp_neon | ||
542 | #define _op_blend_rel_pt_mas_caa_dp_neon _op_blend_rel_pt_mas_c_dp_neon | ||
543 | |||
544 | #define _op_blend_rel_pt_mas_c_dpan_neon _op_blend_pt_mas_c_dpan_neon | ||
545 | #define _op_blend_rel_pt_mas_cn_dpan_neon _op_blend_pt_mas_cn_dpan_neon | ||
546 | #define _op_blend_rel_pt_mas_can_dpan_neon _op_blend_pt_mas_can_dpan_neon | ||
547 | #define _op_blend_rel_pt_mas_caa_dpan_neon _op_blend_pt_mas_caa_dpan_neon | ||
548 | |||
549 | static void | ||
550 | init_blend_rel_mask_color_pt_funcs_neon(void) | ||
551 | { | ||
552 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_NEON] = _op_blend_rel_pt_mas_c_dp_neon; | ||
553 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_rel_pt_mas_cn_dp_neon; | ||
554 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_NEON] = _op_blend_rel_pt_mas_can_dp_neon; | ||
555 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_NEON] = _op_blend_rel_pt_mas_caa_dp_neon; | ||
556 | |||
557 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_c_dpan_neon; | ||
558 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_cn_dpan_neon; | ||
559 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_can_dpan_neon; | ||
560 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_mas_caa_dpan_neon; | ||
561 | } | ||
562 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c new file mode 100644 index 0000000..83230e5 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_mask_color_sse3.c | |||
@@ -0,0 +1,320 @@ | |||
1 | /* blend mask x color -> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_mas_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
9 | |||
10 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
11 | { /* UOP */ | ||
12 | |||
13 | DATA32 a = *m; | ||
14 | DATA32 mc = MUL_SYM(a, c); | ||
15 | a = 256 - (mc >> 24); | ||
16 | *d = mc + MUL_256(a, *d); | ||
17 | m++; d++; l--; | ||
18 | }, | ||
19 | { /* A4OP */ | ||
20 | |||
21 | if ((m[3] | m[2] | m[1] | m[0]) == 0) { | ||
22 | m += 4; d += 4; l -= 4; | ||
23 | continue; | ||
24 | } | ||
25 | |||
26 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
27 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
28 | |||
29 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
30 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
31 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
32 | |||
33 | mul0 = _mm_add_epi32(mul0, mc0); | ||
34 | |||
35 | _mm_store_si128((__m128i *)d, mul0); | ||
36 | |||
37 | m += 4; d += 4; l -= 4; | ||
38 | }, | ||
39 | { /* A8OP */ | ||
40 | |||
41 | if((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) { | ||
42 | m += 8; d += 8; l -= 8; | ||
43 | continue; | ||
44 | } | ||
45 | |||
46 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
47 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
48 | |||
49 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
50 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
51 | |||
52 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
53 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
54 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
55 | |||
56 | mul0 = _mm_add_epi32(mc0, mul0); | ||
57 | |||
58 | __m128i mc1 = mul_sym_sse3(m1, c_packed); | ||
59 | __m128i a1 = sub4_alpha_sse3(mc1); | ||
60 | __m128i mul1 = mul_256_sse3(a1, d1); | ||
61 | |||
62 | mul1 = _mm_add_epi32(mc1, mul1); | ||
63 | |||
64 | _mm_store_si128((__m128i *)d, mul0); | ||
65 | _mm_store_si128((__m128i *)(d+4), mul1); | ||
66 | |||
67 | m += 8; d += 8; l -= 8; | ||
68 | }) | ||
69 | } | ||
70 | |||
71 | static void | ||
72 | _op_blend_mas_can_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
73 | |||
74 | DATA32 alpha; | ||
75 | |||
76 | const __m128i one = _mm_set_epi32(1, 1, 1, 1); | ||
77 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
78 | |||
79 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
80 | { /* UOP */ | ||
81 | |||
82 | alpha = *m; | ||
83 | switch(alpha) | ||
84 | { | ||
85 | case 0: | ||
86 | break; | ||
87 | case 255: | ||
88 | *d = c; | ||
89 | break; | ||
90 | default: | ||
91 | alpha++; | ||
92 | *d = INTERP_256(alpha, c, *d); | ||
93 | break; | ||
94 | } | ||
95 | m++; d++; l--; | ||
96 | }, | ||
97 | { /* A4OP */ | ||
98 | |||
99 | if ((m[3] | m[2] | m[1] | m[0]) == 0) { | ||
100 | m += 4; d += 4; l -= 4; | ||
101 | continue; | ||
102 | } | ||
103 | |||
104 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
105 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
106 | |||
107 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
108 | |||
109 | m0 = _mm_add_epi32(one, m0); | ||
110 | |||
111 | __m128i r0 = interp4_256_sse3(m0, c_packed, d0); | ||
112 | |||
113 | r0 = _mm_and_si128(~zm0, r0); | ||
114 | d0 = _mm_and_si128(zm0, d0); | ||
115 | |||
116 | d0 = _mm_add_epi32(r0, d0); | ||
117 | |||
118 | _mm_store_si128((__m128i *)d, d0); | ||
119 | |||
120 | m += 4; d += 4; l -= 4; | ||
121 | }, | ||
122 | { /* A8OP */ | ||
123 | |||
124 | if ((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) { | ||
125 | m += 8; d += 8; l -= 8; | ||
126 | continue; | ||
127 | } | ||
128 | |||
129 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
130 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
131 | |||
132 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
133 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
134 | |||
135 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
136 | __m128i zm1 = _mm_cmpeq_epi32(m1, _mm_setzero_si128()); | ||
137 | |||
138 | m0 = _mm_add_epi32(one, m0); | ||
139 | m1 = _mm_add_epi32(one, m1); | ||
140 | |||
141 | __m128i r0 = interp4_256_sse3(m0, c_packed, d0); | ||
142 | __m128i r1 = interp4_256_sse3(m1, c_packed, d1); | ||
143 | |||
144 | r0 = _mm_and_si128(~zm0, r0); | ||
145 | d0 = _mm_and_si128(zm0, d0); | ||
146 | |||
147 | r1 = _mm_and_si128(~zm1, r1); | ||
148 | d1 = _mm_and_si128(zm1, d1); | ||
149 | |||
150 | d0 = _mm_add_epi32(d0, r0); | ||
151 | d1 = _mm_add_epi32(d1, r1); | ||
152 | |||
153 | _mm_store_si128((__m128i *)d, d0); | ||
154 | _mm_store_si128((__m128i *)(d+4), d1); | ||
155 | |||
156 | m += 8; d += 8; l -= 8; | ||
157 | }) | ||
158 | } | ||
159 | |||
160 | #define _op_blend_mas_cn_dp_sse3 _op_blend_mas_can_dp_sse3 | ||
161 | #define _op_blend_mas_caa_dp_sse3 _op_blend_mas_c_dp_sse3 | ||
162 | |||
163 | #define _op_blend_mas_c_dpan_sse3 _op_blend_mas_c_dp_sse3 | ||
164 | #define _op_blend_mas_cn_dpan_sse3 _op_blend_mas_cn_dp_sse3 | ||
165 | #define _op_blend_mas_can_dpan_sse3 _op_blend_mas_can_dp_sse3 | ||
166 | #define _op_blend_mas_caa_dpan_sse3 _op_blend_mas_caa_dp_sse3 | ||
167 | |||
168 | static void | ||
169 | init_blend_mask_color_span_funcs_sse3(void) | ||
170 | { | ||
171 | op_blend_span_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_mas_c_dp_sse3; | ||
172 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_mas_cn_dp_sse3; | ||
173 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_mas_can_dp_sse3; | ||
174 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_mas_caa_dp_sse3; | ||
175 | |||
176 | // FIXME: BUGGY BUGGY Core i5 2500 (64bit), gcc version 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text) | ||
177 | // op_blend_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_mas_c_dpan_sse3; | ||
178 | op_blend_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_mas_cn_dpan_sse3; | ||
179 | op_blend_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_mas_can_dpan_sse3; | ||
180 | op_blend_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_mas_caa_dpan_sse3; | ||
181 | } | ||
182 | |||
183 | #define _op_blend_pt_mas_c_dp_sse3 NULL | ||
184 | #define _op_blend_pt_mas_can_dp_sse3 NULL | ||
185 | |||
186 | #define _op_blend_pt_mas_cn_dp_sse3 _op_blend_pt_mas_can_dp_sse3 | ||
187 | #define _op_blend_pt_mas_caa_dp_sse3 _op_blend_pt_mas_c_dp_sse3 | ||
188 | |||
189 | #define _op_blend_pt_mas_c_dpan_sse3 _op_blend_pt_mas_c_dp_sse3 | ||
190 | #define _op_blend_pt_mas_cn_dpan_sse3 _op_blend_pt_mas_cn_dp_sse3 | ||
191 | #define _op_blend_pt_mas_can_dpan_sse3 _op_blend_pt_mas_can_dp_sse3 | ||
192 | #define _op_blend_pt_mas_caa_dpan_sse3 _op_blend_pt_mas_caa_dp_sse3 | ||
193 | |||
194 | static void | ||
195 | init_blend_mask_color_pt_funcs_sse3(void) | ||
196 | { | ||
197 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_pt_mas_c_dp_sse3; | ||
198 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_mas_cn_dp_sse3; | ||
199 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_pt_mas_can_dp_sse3; | ||
200 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_pt_mas_caa_dp_sse3; | ||
201 | |||
202 | op_blend_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_pt_mas_c_dpan_sse3; | ||
203 | op_blend_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_mas_cn_dpan_sse3; | ||
204 | op_blend_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_mas_can_dpan_sse3; | ||
205 | op_blend_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_mas_caa_dpan_sse3; | ||
206 | } | ||
207 | |||
208 | /*-----*/ | ||
209 | |||
210 | /* blend_rel mask x color --> dst */ | ||
211 | |||
212 | static void | ||
213 | _op_blend_rel_mas_c_dp_sse3(DATA32 *s __UNUSED__, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
214 | |||
215 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
216 | |||
217 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
218 | { /* UOP */ | ||
219 | |||
220 | DATA32 mc = MUL_SYM(*m, c); | ||
221 | int alpha = 256 - (mc >> 24); | ||
222 | *d = MUL_SYM(*d >> 24, mc) + MUL_256(alpha, *d); | ||
223 | d++; m++; l--; | ||
224 | }, | ||
225 | { /* A4OP */ | ||
226 | |||
227 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
228 | __m128i d0 = _mm_load_si128((__m128i *) d); | ||
229 | |||
230 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
231 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
232 | |||
233 | __m128i d0_sym = mul_sym_sse3(_mm_srli_epi32(d0, 24), mc0); | ||
234 | d0 = mul_256_sse3(a0, d0); | ||
235 | |||
236 | d0 = _mm_add_epi32(d0, d0_sym); | ||
237 | |||
238 | _mm_store_si128((__m128i *)d, d0); | ||
239 | |||
240 | d += 4; m += 4; l -= 4; | ||
241 | }, | ||
242 | { /* A8OP */ | ||
243 | |||
244 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
245 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
246 | |||
247 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
248 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
249 | |||
250 | __m128i mc0 = mul_sym_sse3(m0, c_packed); | ||
251 | __m128i mc1 = mul_sym_sse3(m1, c_packed); | ||
252 | |||
253 | __m128i a0 = sub4_alpha_sse3(mc0); | ||
254 | __m128i a1 = sub4_alpha_sse3(mc1); | ||
255 | |||
256 | __m128i d0_sym = mul_sym_sse3(_mm_srli_epi32(d0, 24), mc0); | ||
257 | __m128i d1_sym = mul_sym_sse3(_mm_srli_epi32(d1, 24), mc1); | ||
258 | |||
259 | d0 = mul_256_sse3(a0, d0); | ||
260 | d1 = mul_256_sse3(a1, d1); | ||
261 | |||
262 | d0 = _mm_add_epi32(d0, d0_sym); | ||
263 | d1 = _mm_add_epi32(d1, d1_sym); | ||
264 | |||
265 | _mm_store_si128((__m128i *)d, d0); | ||
266 | _mm_store_si128((__m128i *)(d+4), d1); | ||
267 | |||
268 | d += 8; m += 8; l -= 8; | ||
269 | }) | ||
270 | } | ||
271 | |||
272 | #define _op_blend_rel_mas_cn_dp_sse3 _op_blend_rel_mas_c_dp_sse3 | ||
273 | #define _op_blend_rel_mas_can_dp_sse3 _op_blend_rel_mas_c_dp_sse3 | ||
274 | #define _op_blend_rel_mas_caa_dp_sse3 _op_blend_rel_mas_c_dp_sse3 | ||
275 | |||
276 | #define _op_blend_rel_mas_c_dpan_sse3 _op_blend_mas_c_dpan_sse3 | ||
277 | #define _op_blend_rel_mas_cn_dpan_sse3 _op_blend_mas_cn_dpan_sse3 | ||
278 | #define _op_blend_rel_mas_can_dpan_sse3 _op_blend_mas_can_dpan_sse3 | ||
279 | #define _op_blend_rel_mas_caa_dpan_sse3 _op_blend_mas_caa_dpan_sse3 | ||
280 | |||
281 | static void | ||
282 | init_blend_rel_mask_color_span_funcs_sse3(void) | ||
283 | { | ||
284 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_rel_mas_c_dp_sse3; | ||
285 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_mas_can_dp_sse3; | ||
286 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_rel_mas_can_dp_sse3; | ||
287 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_rel_mas_caa_dp_sse3; | ||
288 | |||
289 | op_blend_rel_span_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_rel_mas_c_dpan_sse3; | ||
290 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_mas_cn_dpan_sse3; | ||
291 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_mas_can_dpan_sse3; | ||
292 | op_blend_rel_span_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_mas_caa_dpan_sse3; | ||
293 | } | ||
294 | |||
295 | #define _op_blend_rel_pt_mas_c_dp_sse3 NULL | ||
296 | |||
297 | #define _op_blend_rel_pt_mas_cn_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3 | ||
298 | #define _op_blend_rel_pt_mas_can_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3 | ||
299 | #define _op_blend_rel_pt_mas_caa_dp_sse3 _op_blend_rel_pt_mas_c_dp_sse3 | ||
300 | |||
301 | #define _op_blend_rel_pt_mas_c_dpan_sse3 _op_blend_pt_mas_c_dpan_sse3 | ||
302 | #define _op_blend_rel_pt_mas_cn_dpan_sse3 _op_blend_pt_mas_cn_dpan_sse3 | ||
303 | #define _op_blend_rel_pt_mas_can_dpan_sse3 _op_blend_pt_mas_can_dpan_sse3 | ||
304 | #define _op_blend_rel_pt_mas_caa_dpan_sse3 _op_blend_pt_mas_caa_dpan_sse3 | ||
305 | |||
306 | static void | ||
307 | init_blend_rel_mask_color_pt_funcs_sse3(void) | ||
308 | { | ||
309 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP][CPU_SSE3] = _op_blend_rel_pt_mas_c_dp_sse3; | ||
310 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_mas_cn_dp_sse3; | ||
311 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_mas_can_dp_sse3; | ||
312 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_mas_caa_dp_sse3; | ||
313 | |||
314 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_c_dpan_sse3; | ||
315 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_cn_dpan_sse3; | ||
316 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_can_dpan_sse3; | ||
317 | op_blend_rel_pt_funcs[SP_N][SM_AS][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_mas_caa_dpan_sse3; | ||
318 | } | ||
319 | |||
320 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_master_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_master_sse3.c new file mode 100644 index 0000000..eac6755 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_master_sse3.c | |||
@@ -0,0 +1,77 @@ | |||
1 | #define NEED_SSE3 1 | ||
2 | |||
3 | #include "evas_common.h" | ||
4 | |||
5 | #ifdef BUILD_SSE3 | ||
6 | static __m128i A_MASK_SSE3; | ||
7 | #endif | ||
8 | |||
9 | extern RGBA_Gfx_Func op_blend_span_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
10 | extern RGBA_Gfx_Pt_Func op_blend_pt_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
11 | |||
12 | extern RGBA_Gfx_Func op_blend_rel_span_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
13 | extern RGBA_Gfx_Pt_Func op_blend_rel_pt_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
14 | |||
15 | # include "op_blend_pixel_sse3.c" | ||
16 | # include "op_blend_color_sse3.c" | ||
17 | # include "op_blend_pixel_color_sse3.c" | ||
18 | # include "op_blend_pixel_mask_sse3.c" | ||
19 | # include "op_blend_mask_color_sse3.c" | ||
20 | |||
21 | void | ||
22 | evas_common_op_blend_init_sse3(void) | ||
23 | { | ||
24 | #ifdef BUILD_SSE3 | ||
25 | GA_MASK_SSE3 = _mm_set_epi32(0x00FF00FF, 0x00FF00FF, 0x00FF00FF, 0x00FF00FF); | ||
26 | RB_MASK_SSE3 = _mm_set_epi32(0xFF00FF00, 0xFF00FF00, 0xFF00FF00, 0xFF00FF00); | ||
27 | SYM4_MASK_SSE3 = _mm_set_epi32(0x00FF00FF, 0x000000FF, 0x00FF00FF, 0x000000FF); | ||
28 | RGB_MASK_SSE3 = _mm_set_epi32(0xFFFFFF, 0xFFFFFF, 0xFFFFFF, 0xFFFFFF); | ||
29 | A_MASK_SSE3 = _mm_set_epi32(0xFF000000, 0xFF000000, 0xFF000000, 0xFF000000); | ||
30 | ALPHA_SSE3 = _mm_set_epi32(256, 256, 256, 256); | ||
31 | |||
32 | init_blend_pixel_span_funcs_sse3(); | ||
33 | init_blend_pixel_color_span_funcs_sse3(); | ||
34 | init_blend_pixel_mask_span_funcs_sse3(); | ||
35 | init_blend_color_span_funcs_sse3(); | ||
36 | init_blend_mask_color_span_funcs_sse3(); | ||
37 | |||
38 | init_blend_pixel_pt_funcs_sse3(); | ||
39 | init_blend_pixel_color_pt_funcs_sse3(); | ||
40 | init_blend_pixel_mask_pt_funcs_sse3(); | ||
41 | init_blend_color_pt_funcs_sse3(); | ||
42 | init_blend_mask_color_pt_funcs_sse3(); | ||
43 | #endif | ||
44 | } | ||
45 | |||
46 | void | ||
47 | evas_common_op_blend_rel_init_sse3(void) | ||
48 | { | ||
49 | #ifdef BUILD_SSE3 | ||
50 | init_blend_rel_pixel_span_funcs_sse3(); | ||
51 | init_blend_rel_pixel_color_span_funcs_sse3(); | ||
52 | init_blend_rel_pixel_mask_span_funcs_sse3(); | ||
53 | init_blend_rel_color_span_funcs_sse3(); | ||
54 | init_blend_rel_mask_color_span_funcs_sse3(); | ||
55 | |||
56 | init_blend_rel_pixel_pt_funcs_sse3(); | ||
57 | init_blend_rel_pixel_color_pt_funcs_sse3(); | ||
58 | init_blend_rel_pixel_mask_pt_funcs_sse3(); | ||
59 | init_blend_rel_color_pt_funcs_sse3(); | ||
60 | init_blend_rel_mask_color_pt_funcs_sse3(); | ||
61 | #endif | ||
62 | } | ||
63 | |||
64 | //#pragma GCC push_options | ||
65 | //#pragma GCC optimize ("O0") | ||
66 | void | ||
67 | evas_common_op_sse3_test(void) | ||
68 | { | ||
69 | #ifdef BUILD_SSE3 | ||
70 | DATA32 s[64] = {0x11883399}, d[64] = {0xff88cc33}; | ||
71 | |||
72 | s[0] = rand(); d[1] = rand(); | ||
73 | _op_blend_pas_dp_sse3(s, NULL, 0, d, 64); | ||
74 | evas_common_cpu_end_opt(); | ||
75 | #endif | ||
76 | } | ||
77 | //#pragma GCC pop_options | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_.c new file mode 100644 index 0000000..0ce78d8 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_.c | |||
@@ -0,0 +1,162 @@ | |||
1 | /* blend pixel --> dst */ | ||
2 | |||
3 | #ifdef BUILD_C | ||
4 | static void | ||
5 | _op_blend_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
6 | DATA32 *e; | ||
7 | int alpha; | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | alpha = 256 - (*s >> 24); | ||
11 | *d = *s++ + MUL_256(alpha, *d); | ||
12 | d++; | ||
13 | }); | ||
14 | } | ||
15 | |||
16 | static void | ||
17 | _op_blend_pas_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
18 | DATA32 *e; | ||
19 | int alpha; | ||
20 | UNROLL8_PLD_WHILE(d, l, e, | ||
21 | { | ||
22 | switch (*s & 0xff000000) | ||
23 | { | ||
24 | case 0: | ||
25 | break; | ||
26 | case 0xff000000: | ||
27 | *d = *s; | ||
28 | break; | ||
29 | default: | ||
30 | alpha = 256 - (*s >> 24); | ||
31 | *d = *s + MUL_256(alpha, *d); | ||
32 | break; | ||
33 | } | ||
34 | s++; d++; | ||
35 | }); | ||
36 | } | ||
37 | |||
38 | #define _op_blend_pan_dp NULL | ||
39 | |||
40 | #define _op_blend_p_dpan _op_blend_p_dp | ||
41 | #define _op_blend_pas_dpan _op_blend_pas_dp | ||
42 | #define _op_blend_pan_dpan _op_blend_pan_dp | ||
43 | |||
44 | static void | ||
45 | init_blend_pixel_span_funcs_c(void) | ||
46 | { | ||
47 | op_blend_span_funcs[SP][SM_N][SC_N][DP][CPU_C] = _op_blend_p_dp; | ||
48 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_C] = _op_blend_pas_dp; | ||
49 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_C] = _op_blend_pan_dp; | ||
50 | |||
51 | op_blend_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_p_dpan; | ||
52 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_pas_dpan; | ||
53 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_pan_dpan; | ||
54 | } | ||
55 | #endif | ||
56 | |||
57 | #ifdef BUILD_C | ||
58 | static void | ||
59 | _op_blend_pt_p_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
60 | c = 256 - (s >> 24); | ||
61 | *d = s + MUL_256(c, *d); | ||
62 | } | ||
63 | |||
64 | #define _op_blend_pt_pas_dp _op_blend_pt_p_dp | ||
65 | #define _op_blend_pt_pan_dp NULL | ||
66 | |||
67 | #define _op_blend_pt_p_dpan _op_blend_pt_p_dp | ||
68 | #define _op_blend_pt_pan_dpan _op_blend_pt_pan_dp | ||
69 | #define _op_blend_pt_pas_dpan _op_blend_pt_pas_dp | ||
70 | |||
71 | static void | ||
72 | init_blend_pixel_pt_funcs_c(void) | ||
73 | { | ||
74 | op_blend_pt_funcs[SP][SM_N][SC_N][DP][CPU_C] = _op_blend_pt_p_dp; | ||
75 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_C] = _op_blend_pt_pas_dp; | ||
76 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_C] = _op_blend_pt_pan_dp; | ||
77 | |||
78 | op_blend_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_pt_p_dpan; | ||
79 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_pt_pas_dpan; | ||
80 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_pt_pan_dpan; | ||
81 | } | ||
82 | #endif | ||
83 | |||
84 | /*-----*/ | ||
85 | |||
86 | /* blend_rel pixel -> dst */ | ||
87 | |||
88 | #ifdef BUILD_C | ||
89 | static void | ||
90 | _op_blend_rel_p_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
91 | DATA32 *e; | ||
92 | int alpha; | ||
93 | UNROLL8_PLD_WHILE(d, l, e, | ||
94 | { | ||
95 | alpha = 256 - (*s >> 24); | ||
96 | c = 1 + (*d >> 24); | ||
97 | *d = MUL_256(c, *s) + MUL_256(alpha, *d); | ||
98 | d++; | ||
99 | s++; | ||
100 | }); | ||
101 | } | ||
102 | |||
103 | static void | ||
104 | _op_blend_rel_pan_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
105 | DATA32 *e; | ||
106 | UNROLL8_PLD_WHILE(d, l, e, | ||
107 | { | ||
108 | c = 1 + (*d >> 24); | ||
109 | *d++ = MUL_256(c, *s); | ||
110 | s++; | ||
111 | }); | ||
112 | } | ||
113 | |||
114 | #define _op_blend_rel_pas_dp _op_blend_rel_p_dp | ||
115 | |||
116 | #define _op_blend_rel_p_dpan _op_blend_p_dpan | ||
117 | #define _op_blend_rel_pan_dpan _op_blend_pan_dpan | ||
118 | #define _op_blend_rel_pas_dpan _op_blend_pas_dpan | ||
119 | |||
120 | static void | ||
121 | init_blend_rel_pixel_span_funcs_c(void) | ||
122 | { | ||
123 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP][CPU_C] = _op_blend_rel_p_dp; | ||
124 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_C] = _op_blend_rel_pas_dp; | ||
125 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_C] = _op_blend_rel_pan_dp; | ||
126 | |||
127 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_rel_p_dpan; | ||
128 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_rel_pas_dpan; | ||
129 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_rel_pan_dpan; | ||
130 | } | ||
131 | #endif | ||
132 | |||
133 | #ifdef BUILD_C | ||
134 | static void | ||
135 | _op_blend_rel_pt_p_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
136 | c = 256 - (s >> 24); | ||
137 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
138 | } | ||
139 | |||
140 | static void | ||
141 | _op_blend_rel_pt_pan_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) { | ||
142 | *d = MUL_SYM(*d >> 24, s); | ||
143 | } | ||
144 | |||
145 | #define _op_blend_rel_pt_pas_dp _op_blend_rel_pt_p_dp | ||
146 | |||
147 | #define _op_blend_rel_pt_p_dpan _op_blend_pt_p_dpan | ||
148 | #define _op_blend_rel_pt_pan_dpan _op_blend_pt_pan_dpan | ||
149 | #define _op_blend_rel_pt_pas_dpan _op_blend_pt_pas_dpan | ||
150 | |||
151 | static void | ||
152 | init_blend_rel_pixel_pt_funcs_c(void) | ||
153 | { | ||
154 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP][CPU_C] = _op_blend_rel_pt_p_dp; | ||
155 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_C] = _op_blend_rel_pt_pas_dp; | ||
156 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_C] = _op_blend_rel_pt_pan_dp; | ||
157 | |||
158 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_p_dpan; | ||
159 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_pas_dpan; | ||
160 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_pan_dpan; | ||
161 | } | ||
162 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_.c new file mode 100644 index 0000000..c5e40a0 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_.c | |||
@@ -0,0 +1,284 @@ | |||
1 | /* blend pixel x color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_C | ||
4 | static void | ||
5 | _op_blend_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e; | ||
7 | int alpha; | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | DATA32 sc = MUL4_SYM(c, *s); | ||
11 | alpha = 256 - (sc >> 24); | ||
12 | *d = sc + MUL_256(alpha, *d); | ||
13 | d++; | ||
14 | s++; | ||
15 | }); | ||
16 | } | ||
17 | |||
18 | static void | ||
19 | _op_blend_pan_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
20 | DATA32 *e; | ||
21 | int alpha = 256 - (c >> 24); | ||
22 | UNROLL8_PLD_WHILE(d, l, e, | ||
23 | { | ||
24 | *d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d); | ||
25 | d++; | ||
26 | s++; | ||
27 | }); | ||
28 | } | ||
29 | |||
30 | static void | ||
31 | _op_blend_p_can_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
32 | DATA32 *e; | ||
33 | int alpha; | ||
34 | UNROLL8_PLD_WHILE(d, l, e, | ||
35 | { | ||
36 | alpha = 256 - (*s >> 24); | ||
37 | *d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d); | ||
38 | d++; | ||
39 | s++; | ||
40 | }); | ||
41 | } | ||
42 | |||
43 | static void | ||
44 | _op_blend_pan_can_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
45 | DATA32 *e; | ||
46 | UNROLL8_PLD_WHILE(d, l, e, | ||
47 | { | ||
48 | *d++ = 0xff000000 + MUL3_SYM(c, *s); | ||
49 | s++; | ||
50 | }); | ||
51 | } | ||
52 | |||
53 | static void | ||
54 | _op_blend_p_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
55 | DATA32 *e; | ||
56 | int alpha; | ||
57 | c = 1 + (c & 0xff); | ||
58 | UNROLL8_PLD_WHILE(d, l, e, | ||
59 | { | ||
60 | DATA32 sc = MUL_256(c, *s); | ||
61 | alpha = 256 - (sc >> 24); | ||
62 | *d = sc + MUL_256(alpha, *d); | ||
63 | d++; | ||
64 | s++; | ||
65 | }); | ||
66 | } | ||
67 | |||
68 | static void | ||
69 | _op_blend_pan_caa_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
70 | DATA32 *e; | ||
71 | c = 1 + (c & 0xff); | ||
72 | UNROLL8_PLD_WHILE(d, l, e, | ||
73 | { | ||
74 | *d = INTERP_256(c, *s, *d); | ||
75 | d++; | ||
76 | s++; | ||
77 | }); | ||
78 | } | ||
79 | |||
80 | #define _op_blend_pas_c_dp _op_blend_p_c_dp | ||
81 | #define _op_blend_pas_can_dp _op_blend_p_can_dp | ||
82 | #define _op_blend_pas_caa_dp _op_blend_p_caa_dp | ||
83 | |||
84 | #define _op_blend_p_c_dpan _op_blend_p_c_dp | ||
85 | #define _op_blend_pas_c_dpan _op_blend_pas_c_dp | ||
86 | #define _op_blend_pan_c_dpan _op_blend_pan_c_dp | ||
87 | #define _op_blend_p_can_dpan _op_blend_p_can_dp | ||
88 | #define _op_blend_pas_can_dpan _op_blend_pas_can_dp | ||
89 | #define _op_blend_pan_can_dpan _op_blend_pan_can_dp | ||
90 | #define _op_blend_p_caa_dpan _op_blend_p_caa_dp | ||
91 | #define _op_blend_pas_caa_dpan _op_blend_pas_caa_dp | ||
92 | #define _op_blend_pan_caa_dpan _op_blend_pan_caa_dp | ||
93 | |||
94 | static void | ||
95 | init_blend_pixel_color_span_funcs_c(void) | ||
96 | { | ||
97 | op_blend_span_funcs[SP][SM_N][SC][DP][CPU_C] = _op_blend_p_c_dp; | ||
98 | op_blend_span_funcs[SP_AS][SM_N][SC][DP][CPU_C] = _op_blend_pas_c_dp; | ||
99 | op_blend_span_funcs[SP_AN][SM_N][SC][DP][CPU_C] = _op_blend_pan_c_dp; | ||
100 | op_blend_span_funcs[SP][SM_N][SC_AN][DP][CPU_C] = _op_blend_p_can_dp; | ||
101 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_C] = _op_blend_pas_can_dp; | ||
102 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_C] = _op_blend_pan_can_dp; | ||
103 | op_blend_span_funcs[SP][SM_N][SC_AA][DP][CPU_C] = _op_blend_p_caa_dp; | ||
104 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_C] = _op_blend_pas_caa_dp; | ||
105 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_C] = _op_blend_pan_caa_dp; | ||
106 | |||
107 | op_blend_span_funcs[SP][SM_N][SC][DP_AN][CPU_C] = _op_blend_p_c_dpan; | ||
108 | op_blend_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_C] = _op_blend_pas_c_dpan; | ||
109 | op_blend_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_C] = _op_blend_pan_c_dpan; | ||
110 | op_blend_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_p_can_dpan; | ||
111 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_pas_can_dpan; | ||
112 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_pan_can_dpan; | ||
113 | op_blend_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_p_caa_dpan; | ||
114 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_pas_caa_dpan; | ||
115 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_pan_caa_dpan; | ||
116 | } | ||
117 | #endif | ||
118 | |||
119 | #ifdef BUILD_C | ||
120 | static void | ||
121 | _op_blend_pt_p_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
122 | s = MUL4_SYM(c, s); | ||
123 | c = 256 - (s >> 24); | ||
124 | *d = s + MUL_256(c, *d); | ||
125 | } | ||
126 | |||
127 | #define _op_blend_pt_pas_c_dp _op_blend_pt_p_c_dp | ||
128 | #define _op_blend_pt_pan_c_dp _op_blend_pt_p_c_dp | ||
129 | #define _op_blend_pt_p_can_dp _op_blend_pt_p_c_dp | ||
130 | #define _op_blend_pt_pas_can_dp _op_blend_pt_p_c_dp | ||
131 | #define _op_blend_pt_pan_can_dp _op_blend_pt_p_c_dp | ||
132 | #define _op_blend_pt_p_caa_dp _op_blend_pt_p_c_dp | ||
133 | #define _op_blend_pt_pas_caa_dp _op_blend_pt_p_c_dp | ||
134 | #define _op_blend_pt_pan_caa_dp _op_blend_pt_p_c_dp | ||
135 | |||
136 | #define _op_blend_pt_p_c_dpan _op_blend_pt_p_c_dp | ||
137 | #define _op_blend_pt_pas_c_dpan _op_blend_pt_pas_c_dp | ||
138 | #define _op_blend_pt_pan_c_dpan _op_blend_pt_pan_c_dp | ||
139 | #define _op_blend_pt_p_can_dpan _op_blend_pt_p_can_dp | ||
140 | #define _op_blend_pt_pas_can_dpan _op_blend_pt_pas_can_dp | ||
141 | #define _op_blend_pt_pan_can_dpan _op_blend_pt_pan_can_dp | ||
142 | #define _op_blend_pt_p_caa_dpan _op_blend_pt_p_caa_dp | ||
143 | #define _op_blend_pt_pas_caa_dpan _op_blend_pt_pas_caa_dp | ||
144 | #define _op_blend_pt_pan_caa_dpan _op_blend_pt_pan_caa_dp | ||
145 | |||
146 | static void | ||
147 | init_blend_pixel_color_pt_funcs_c(void) | ||
148 | { | ||
149 | op_blend_pt_funcs[SP][SM_N][SC][DP][CPU_C] = _op_blend_pt_p_c_dp; | ||
150 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP][CPU_C] = _op_blend_pt_pas_c_dp; | ||
151 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP][CPU_C] = _op_blend_pt_pan_c_dp; | ||
152 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP][CPU_C] = _op_blend_pt_p_can_dp; | ||
153 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_C] = _op_blend_pt_pas_can_dp; | ||
154 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_C] = _op_blend_pt_pan_can_dp; | ||
155 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP][CPU_C] = _op_blend_pt_p_caa_dp; | ||
156 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_C] = _op_blend_pt_pas_caa_dp; | ||
157 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_C] = _op_blend_pt_pan_caa_dp; | ||
158 | |||
159 | op_blend_pt_funcs[SP][SM_N][SC][DP_AN][CPU_C] = _op_blend_pt_p_c_dpan; | ||
160 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_C] = _op_blend_pt_pas_c_dpan; | ||
161 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_C] = _op_blend_pt_pan_c_dpan; | ||
162 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_pt_p_can_dpan; | ||
163 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_pt_pas_can_dpan; | ||
164 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_pt_pan_can_dpan; | ||
165 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_pt_p_caa_dpan; | ||
166 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_pt_pas_caa_dpan; | ||
167 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_pt_pan_caa_dpan; | ||
168 | } | ||
169 | #endif | ||
170 | |||
171 | /*-----*/ | ||
172 | |||
173 | /* blend_rel pixel x color -> dst */ | ||
174 | |||
175 | #ifdef BUILD_C | ||
176 | static void | ||
177 | _op_blend_rel_p_c_dp(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
178 | DATA32 *e; | ||
179 | int alpha; | ||
180 | UNROLL8_PLD_WHILE(d, l, e, | ||
181 | { | ||
182 | DATA32 sc = MUL4_SYM(c, *s); | ||
183 | alpha = 256 - (sc >> 24); | ||
184 | *d = MUL_SYM(*d >> 24, sc) + MUL_256(alpha, *d); | ||
185 | d++; | ||
186 | s++; | ||
187 | }); | ||
188 | } | ||
189 | |||
190 | #define _op_blend_rel_pas_c_dp _op_blend_rel_p_c_dp | ||
191 | #define _op_blend_rel_pan_c_dp _op_blend_rel_p_c_dp | ||
192 | #define _op_blend_rel_p_can_dp _op_blend_rel_p_c_dp | ||
193 | #define _op_blend_rel_pas_can_dp _op_blend_rel_p_c_dp | ||
194 | #define _op_blend_rel_pan_can_dp _op_blend_rel_p_c_dp | ||
195 | #define _op_blend_rel_p_caa_dp _op_blend_rel_p_c_dp | ||
196 | #define _op_blend_rel_pas_caa_dp _op_blend_rel_p_c_dp | ||
197 | #define _op_blend_rel_pan_caa_dp _op_blend_rel_p_c_dp | ||
198 | |||
199 | #define _op_blend_rel_p_c_dpan _op_blend_p_c_dpan | ||
200 | #define _op_blend_rel_pas_c_dpan _op_blend_pas_c_dpan | ||
201 | #define _op_blend_rel_pan_c_dpan _op_blend_pan_c_dpan | ||
202 | #define _op_blend_rel_p_can_dpan _op_blend_p_can_dpan | ||
203 | #define _op_blend_rel_pas_can_dpan _op_blend_pas_can_dpan | ||
204 | #define _op_blend_rel_pan_can_dpan _op_blend_pan_can_dpan | ||
205 | #define _op_blend_rel_p_caa_dpan _op_blend_p_caa_dpan | ||
206 | #define _op_blend_rel_pas_caa_dpan _op_blend_pas_caa_dpan | ||
207 | #define _op_blend_rel_pan_caa_dpan _op_blend_pan_caa_dpan | ||
208 | |||
209 | static void | ||
210 | init_blend_rel_pixel_color_span_funcs_c(void) | ||
211 | { | ||
212 | op_blend_rel_span_funcs[SP][SM_N][SC][DP][CPU_C] = _op_blend_rel_p_c_dp; | ||
213 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP][CPU_C] = _op_blend_rel_pas_c_dp; | ||
214 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP][CPU_C] = _op_blend_rel_pan_c_dp; | ||
215 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP][CPU_C] = _op_blend_rel_p_can_dp; | ||
216 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_C] = _op_blend_rel_pas_can_dp; | ||
217 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_C] = _op_blend_rel_pan_can_dp; | ||
218 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_p_caa_dp; | ||
219 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_pas_caa_dp; | ||
220 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_pan_caa_dp; | ||
221 | |||
222 | op_blend_rel_span_funcs[SP][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_p_c_dpan; | ||
223 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_pas_c_dpan; | ||
224 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_pan_c_dpan; | ||
225 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_rel_p_can_dpan; | ||
226 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_rel_pas_can_dpan; | ||
227 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_rel_pan_can_dpan; | ||
228 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_p_caa_dpan; | ||
229 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pas_caa_dpan; | ||
230 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pan_caa_dpan; | ||
231 | } | ||
232 | #endif | ||
233 | |||
234 | #ifdef BUILD_C | ||
235 | static void | ||
236 | _op_blend_rel_pt_p_c_dp(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
237 | s = MUL4_SYM(c, s); | ||
238 | c = 256 - (s >> 24); | ||
239 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
240 | } | ||
241 | |||
242 | #define _op_blend_rel_pt_pas_c_dp _op_blend_rel_pt_p_c_dp | ||
243 | #define _op_blend_rel_pt_pan_c_dp _op_blend_rel_pt_p_c_dp | ||
244 | #define _op_blend_rel_pt_p_can_dp _op_blend_rel_pt_p_c_dp | ||
245 | #define _op_blend_rel_pt_pas_can_dp _op_blend_rel_pt_p_c_dp | ||
246 | #define _op_blend_rel_pt_pan_can_dp _op_blend_rel_pt_p_c_dp | ||
247 | #define _op_blend_rel_pt_p_caa_dp _op_blend_rel_pt_p_c_dp | ||
248 | #define _op_blend_rel_pt_pas_caa_dp _op_blend_rel_pt_p_c_dp | ||
249 | #define _op_blend_rel_pt_pan_caa_dp _op_blend_rel_pt_p_c_dp | ||
250 | |||
251 | #define _op_blend_rel_pt_p_c_dpan _op_blend_pt_p_c_dpan | ||
252 | #define _op_blend_rel_pt_pas_c_dpan _op_blend_pt_pas_c_dpan | ||
253 | #define _op_blend_rel_pt_pan_c_dpan _op_blend_pt_pan_c_dpan | ||
254 | #define _op_blend_rel_pt_p_can_dpan _op_blend_pt_p_can_dpan | ||
255 | #define _op_blend_rel_pt_pas_can_dpan _op_blend_pt_pas_can_dpan | ||
256 | #define _op_blend_rel_pt_pan_can_dpan _op_blend_pt_pan_can_dpan | ||
257 | #define _op_blend_rel_pt_p_caa_dpan _op_blend_pt_p_caa_dpan | ||
258 | #define _op_blend_rel_pt_pas_caa_dpan _op_blend_pt_pas_caa_dpan | ||
259 | #define _op_blend_rel_pt_pan_caa_dpan _op_blend_pt_pan_caa_dpan | ||
260 | |||
261 | static void | ||
262 | init_blend_rel_pixel_color_pt_funcs_c(void) | ||
263 | { | ||
264 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP][CPU_C] = _op_blend_rel_pt_p_c_dp; | ||
265 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP][CPU_C] = _op_blend_rel_pt_pas_c_dp; | ||
266 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP][CPU_C] = _op_blend_rel_pt_pan_c_dp; | ||
267 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP][CPU_C] = _op_blend_rel_pt_p_can_dp; | ||
268 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_C] = _op_blend_rel_pt_pas_can_dp; | ||
269 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_C] = _op_blend_rel_pt_pan_can_dp; | ||
270 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_pt_p_caa_dp; | ||
271 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_pt_pas_caa_dp; | ||
272 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_C] = _op_blend_rel_pt_pan_caa_dp; | ||
273 | |||
274 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_pt_p_c_dpan; | ||
275 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_pt_pas_c_dpan; | ||
276 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_C] = _op_blend_rel_pt_pan_c_dpan; | ||
277 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_rel_pt_p_can_dpan; | ||
278 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_rel_pt_pas_can_dpan; | ||
279 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_C] = _op_blend_rel_pt_pan_can_dpan; | ||
280 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pt_p_caa_dpan; | ||
281 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pt_pas_caa_dpan; | ||
282 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_C] = _op_blend_rel_pt_pan_caa_dpan; | ||
283 | } | ||
284 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_i386.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_i386.c new file mode 100644 index 0000000..52751f4 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_i386.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* blend pixel x color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_MMX | ||
4 | static void | ||
5 | _op_blend_p_c_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e = d + l; | ||
7 | MOV_A2R(ALPHA_256, mm6) | ||
8 | MOV_A2R(ALPHA_255, mm5) | ||
9 | pxor_r2r(mm0, mm0); | ||
10 | MOV_P2R(c, mm2, mm0) | ||
11 | while (d < e) { | ||
12 | MOV_P2R(*s, mm3, mm0) | ||
13 | MUL4_SYM_R2R(mm2, mm3, mm5) | ||
14 | |||
15 | MOV_RA2R(mm3, mm1) | ||
16 | movq_r2r(mm6, mm4); | ||
17 | psubw_r2r(mm1, mm4); | ||
18 | |||
19 | MOV_P2R(*d, mm1, mm0) | ||
20 | MUL4_256_R2R(mm4, mm1) | ||
21 | |||
22 | paddw_r2r(mm3, mm1); | ||
23 | MOV_R2P(mm1, *d, mm0) | ||
24 | s++; d++; | ||
25 | } | ||
26 | } | ||
27 | |||
28 | static void | ||
29 | _op_blend_pan_can_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
30 | DATA32 *e = d + l; | ||
31 | pxor_r2r(mm0, mm0); | ||
32 | MOV_P2R(c, mm2, mm0) | ||
33 | MOV_A2R(ALPHA_255, mm5) | ||
34 | while (d < e) { | ||
35 | MOV_P2R(*s, mm1, mm0) | ||
36 | MUL4_SYM_R2R(mm2, mm1, mm5) | ||
37 | MOV_R2P(mm1, *d, mm0) | ||
38 | s++; d++; | ||
39 | } | ||
40 | } | ||
41 | |||
42 | static void | ||
43 | _op_blend_pan_caa_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
44 | DATA32 *e = d + l; | ||
45 | c = 1 + (c & 0xff); | ||
46 | MOV_A2R(c, mm2) | ||
47 | MOV_A2R(ALPHA_255, mm5) | ||
48 | pxor_r2r(mm0, mm0); | ||
49 | while (d < e) { | ||
50 | MOV_P2R(*s, mm3, mm0) | ||
51 | MOV_P2R(*d, mm1, mm0) | ||
52 | INTERP_256_R2R(mm2, mm3, mm1, mm5) | ||
53 | MOV_R2P(mm1, *d, mm0) | ||
54 | s++; d++; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | #define _op_blend_pas_c_dp_mmx _op_blend_p_c_dp_mmx | ||
59 | #define _op_blend_pan_c_dp_mmx _op_blend_p_c_dp_mmx | ||
60 | #define _op_blend_p_can_dp_mmx _op_blend_p_c_dp_mmx | ||
61 | #define _op_blend_pas_can_dp_mmx _op_blend_p_c_dp_mmx | ||
62 | #define _op_blend_p_caa_dp_mmx _op_blend_p_c_dp_mmx | ||
63 | #define _op_blend_pas_caa_dp_mmx _op_blend_p_c_dp_mmx | ||
64 | |||
65 | #define _op_blend_p_c_dpan_mmx _op_blend_p_c_dp_mmx | ||
66 | #define _op_blend_pas_c_dpan_mmx _op_blend_pas_c_dp_mmx | ||
67 | #define _op_blend_pan_c_dpan_mmx _op_blend_pan_c_dp_mmx | ||
68 | #define _op_blend_p_can_dpan_mmx _op_blend_p_can_dp_mmx | ||
69 | #define _op_blend_pas_can_dpan_mmx _op_blend_pas_can_dp_mmx | ||
70 | #define _op_blend_pan_can_dpan_mmx _op_blend_pan_can_dp_mmx | ||
71 | #define _op_blend_p_caa_dpan_mmx _op_blend_p_caa_dp_mmx | ||
72 | #define _op_blend_pas_caa_dpan_mmx _op_blend_pas_caa_dp_mmx | ||
73 | #define _op_blend_pan_caa_dpan_mmx _op_blend_pan_caa_dp_mmx | ||
74 | |||
75 | |||
76 | static void | ||
77 | init_blend_pixel_color_span_funcs_mmx(void) | ||
78 | { | ||
79 | op_blend_span_funcs[SP][SM_N][SC][DP][CPU_MMX] = _op_blend_p_c_dp_mmx; | ||
80 | op_blend_span_funcs[SP_AS][SM_N][SC][DP][CPU_MMX] = _op_blend_pas_c_dp_mmx; | ||
81 | op_blend_span_funcs[SP_AN][SM_N][SC][DP][CPU_MMX] = _op_blend_pan_c_dp_mmx; | ||
82 | op_blend_span_funcs[SP][SM_N][SC_AN][DP][CPU_MMX] = _op_blend_p_can_dp_mmx; | ||
83 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_MMX] = _op_blend_pas_can_dp_mmx; | ||
84 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_MMX] = _op_blend_pan_can_dp_mmx; | ||
85 | op_blend_span_funcs[SP][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_p_caa_dp_mmx; | ||
86 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_pas_caa_dp_mmx; | ||
87 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_pan_caa_dp_mmx; | ||
88 | |||
89 | op_blend_span_funcs[SP][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_p_c_dpan_mmx; | ||
90 | op_blend_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_pas_c_dpan_mmx; | ||
91 | op_blend_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_pan_c_dpan_mmx; | ||
92 | op_blend_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_p_can_dpan_mmx; | ||
93 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_pas_can_dpan_mmx; | ||
94 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_pan_can_dpan_mmx; | ||
95 | op_blend_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_p_caa_dpan_mmx; | ||
96 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_pas_caa_dpan_mmx; | ||
97 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_pan_caa_dpan_mmx; | ||
98 | } | ||
99 | #endif | ||
100 | |||
101 | #ifdef BUILD_MMX | ||
102 | static void | ||
103 | _op_blend_pt_p_c_dp_mmx(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
104 | MOV_A2R(ALPHA_256, mm4) | ||
105 | MOV_A2R(ALPHA_255, mm5) | ||
106 | pxor_r2r(mm0, mm0); | ||
107 | MOV_P2R(c, mm2, mm0) | ||
108 | MOV_P2R(s, mm3, mm0) | ||
109 | MUL4_SYM_R2R(mm2, mm3, mm5) | ||
110 | |||
111 | MOV_RA2R(mm3, mm1) | ||
112 | psubw_r2r(mm1, mm4); | ||
113 | |||
114 | MOV_P2R(*d, mm1, mm0) | ||
115 | MUL4_256_R2R(mm4, mm1) | ||
116 | |||
117 | paddw_r2r(mm3, mm1); | ||
118 | MOV_R2P(mm1, *d, mm0) | ||
119 | } | ||
120 | |||
121 | #define _op_blend_pt_pas_c_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
122 | #define _op_blend_pt_pan_c_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
123 | #define _op_blend_pt_p_can_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
124 | #define _op_blend_pt_pas_can_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
125 | #define _op_blend_pt_pan_can_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
126 | #define _op_blend_pt_p_caa_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
127 | #define _op_blend_pt_pas_caa_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
128 | #define _op_blend_pt_pan_caa_dp_mmx _op_blend_pt_p_c_dp_mmx | ||
129 | |||
130 | #define _op_blend_pt_p_c_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
131 | #define _op_blend_pt_pas_c_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
132 | #define _op_blend_pt_pan_c_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
133 | #define _op_blend_pt_p_can_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
134 | #define _op_blend_pt_pas_can_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
135 | #define _op_blend_pt_pan_can_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
136 | #define _op_blend_pt_p_caa_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
137 | #define _op_blend_pt_pas_caa_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
138 | #define _op_blend_pt_pan_caa_dpan_mmx _op_blend_pt_p_c_dp_mmx | ||
139 | |||
140 | static void | ||
141 | init_blend_pixel_color_pt_funcs_mmx(void) | ||
142 | { | ||
143 | op_blend_pt_funcs[SP][SM_N][SC][DP][CPU_MMX] = _op_blend_pt_p_c_dp_mmx; | ||
144 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP][CPU_MMX] = _op_blend_pt_pas_c_dp_mmx; | ||
145 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP][CPU_MMX] = _op_blend_pt_pan_c_dp_mmx; | ||
146 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP][CPU_MMX] = _op_blend_pt_p_can_dp_mmx; | ||
147 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_MMX] = _op_blend_pt_pas_can_dp_mmx; | ||
148 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_MMX] = _op_blend_pt_pan_can_dp_mmx; | ||
149 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_pt_p_caa_dp_mmx; | ||
150 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_pt_pas_caa_dp_mmx; | ||
151 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_MMX] = _op_blend_pt_pan_caa_dp_mmx; | ||
152 | |||
153 | op_blend_pt_funcs[SP][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_pt_p_c_dpan_mmx; | ||
154 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_pt_pas_c_dpan_mmx; | ||
155 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_pt_pan_c_dpan_mmx; | ||
156 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_pt_p_can_dpan_mmx; | ||
157 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_pt_pas_can_dpan_mmx; | ||
158 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_pt_pan_can_dpan_mmx; | ||
159 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_pt_p_caa_dpan_mmx; | ||
160 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_pt_pas_caa_dpan_mmx; | ||
161 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_pt_pan_caa_dpan_mmx; | ||
162 | } | ||
163 | #endif | ||
164 | |||
165 | /*-----*/ | ||
166 | |||
167 | /* blend_rel pixel x color -> dst */ | ||
168 | |||
169 | #ifdef BUILD_MMX | ||
170 | |||
171 | #define _op_blend_rel_p_c_dpan_mmx _op_blend_p_c_dpan_mmx | ||
172 | #define _op_blend_rel_pas_c_dpan_mmx _op_blend_pas_c_dpan_mmx | ||
173 | #define _op_blend_rel_pan_c_dpan_mmx _op_blend_pan_c_dpan_mmx | ||
174 | #define _op_blend_rel_p_can_dpan_mmx _op_blend_p_can_dpan_mmx | ||
175 | #define _op_blend_rel_pas_can_dpan_mmx _op_blend_pas_can_dpan_mmx | ||
176 | #define _op_blend_rel_pan_can_dpan_mmx _op_blend_pan_can_dpan_mmx | ||
177 | #define _op_blend_rel_p_caa_dpan_mmx _op_blend_p_caa_dpan_mmx | ||
178 | #define _op_blend_rel_pas_caa_dpan_mmx _op_blend_pas_caa_dpan_mmx | ||
179 | #define _op_blend_rel_pan_caa_dpan_mmx _op_blend_pan_caa_dpan_mmx | ||
180 | |||
181 | static void | ||
182 | init_blend_rel_pixel_color_span_funcs_mmx(void) | ||
183 | { | ||
184 | op_blend_rel_span_funcs[SP][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_p_c_dpan_mmx; | ||
185 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_pas_c_dpan_mmx; | ||
186 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_pan_c_dpan_mmx; | ||
187 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_p_can_dpan_mmx; | ||
188 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_pas_can_dpan_mmx; | ||
189 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_pan_can_dpan_mmx; | ||
190 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_p_caa_dpan_mmx; | ||
191 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pas_caa_dpan_mmx; | ||
192 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pan_caa_dpan_mmx; | ||
193 | } | ||
194 | #endif | ||
195 | |||
196 | #ifdef BUILD_MMX | ||
197 | |||
198 | #define _op_blend_rel_pt_p_c_dpan_mmx _op_blend_pt_p_c_dpan_mmx | ||
199 | #define _op_blend_rel_pt_pas_c_dpan_mmx _op_blend_pt_pas_c_dpan_mmx | ||
200 | #define _op_blend_rel_pt_pan_c_dpan_mmx _op_blend_pt_pan_c_dpan_mmx | ||
201 | #define _op_blend_rel_pt_p_can_dpan_mmx _op_blend_pt_p_can_dpan_mmx | ||
202 | #define _op_blend_rel_pt_pas_can_dpan_mmx _op_blend_pt_pas_can_dpan_mmx | ||
203 | #define _op_blend_rel_pt_pan_can_dpan_mmx _op_blend_pt_pan_can_dpan_mmx | ||
204 | #define _op_blend_rel_pt_p_caa_dpan_mmx _op_blend_pt_p_caa_dpan_mmx | ||
205 | #define _op_blend_rel_pt_pas_caa_dpan_mmx _op_blend_pt_pas_caa_dpan_mmx | ||
206 | #define _op_blend_rel_pt_pan_caa_dpan_mmx _op_blend_pt_pan_caa_dpan_mmx | ||
207 | |||
208 | static void | ||
209 | init_blend_rel_pixel_color_pt_funcs_mmx(void) | ||
210 | { | ||
211 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_pt_p_c_dpan_mmx; | ||
212 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_pt_pas_c_dpan_mmx; | ||
213 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_MMX] = _op_blend_rel_pt_pan_c_dpan_mmx; | ||
214 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_pt_p_can_dpan_mmx; | ||
215 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_pt_pas_can_dpan_mmx; | ||
216 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_MMX] = _op_blend_rel_pt_pan_can_dpan_mmx; | ||
217 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pt_p_caa_dpan_mmx; | ||
218 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pt_pas_caa_dpan_mmx; | ||
219 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_MMX] = _op_blend_rel_pt_pan_caa_dpan_mmx; | ||
220 | } | ||
221 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c new file mode 100644 index 0000000..6e35970 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_neon.c | |||
@@ -0,0 +1,570 @@ | |||
1 | /* blend pixel x color --> dst */ | ||
2 | #ifdef BUILD_NEON | ||
3 | /* Note: Optimisation is based on keeping _dest_ aligned: else it's a pair of | ||
4 | * reads, then two writes, a miss on read is 'just' two reads */ | ||
5 | static void | ||
6 | _op_blend_p_c_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
7 | #define AP "blend_p_c_dp_" | ||
8 | asm volatile ( | ||
9 | ".fpu neon \n\t" | ||
10 | // Load 'c' | ||
11 | "vdup.u32 q7, %[c] \n\t" | ||
12 | "vmov.i8 q6, #1 \n\t" | ||
13 | |||
14 | // Choose a loop | ||
15 | "andS %[tmp], %[d], $0xf \n\t" | ||
16 | "beq "AP"quadstart \n\t" | ||
17 | |||
18 | "andS %[tmp],%[d], $0x4 \n\t" | ||
19 | "beq "AP"dualloop \n\t" | ||
20 | |||
21 | AP"singleloop:" | ||
22 | "vld1.32 d0[0], [%[s]]! \n\t" | ||
23 | "vld1.32 d2[0], [%[d]] \n\t" | ||
24 | // Mulitply s * c (= sc) | ||
25 | "vmull.u8 q4, d0,d14 \n\t" | ||
26 | // sc in d8 | ||
27 | "vqrshrn.u16 d4, q4, #8 \n\t" | ||
28 | |||
29 | // sca in d9 | ||
30 | "vmvn.u32 d6, d4 \n\t" | ||
31 | "vshr.u32 d6, d6, #24 \n\t" | ||
32 | |||
33 | "vmul.u32 d6, d12, d6 \n\t" | ||
34 | |||
35 | /* d * alpha */ | ||
36 | "vmull.u8 q4, d6, d2 \n\t" | ||
37 | "vqrshrn.u16 d0, q4, #8 \n\t" | ||
38 | |||
39 | "vqadd.u8 d2, d0, d4 \n\t" | ||
40 | |||
41 | // Save dsc + sc | ||
42 | "vst1.32 d2[0], [%[d]]! \n\t" | ||
43 | |||
44 | // Now where? | ||
45 | // Can we go the fast path? | ||
46 | "andS %[tmp], %[d],$0xf \n\t" | ||
47 | "beq "AP"quadstart \n\t" | ||
48 | |||
49 | AP"dualloop: \n\t" | ||
50 | // Check we have enough to bother with! | ||
51 | "sub %[tmp], %[e], %[d] \n\t" | ||
52 | "cmp %[tmp], #16 \n\t" | ||
53 | "blt "AP"loopout \n\t" | ||
54 | |||
55 | // load 's' -> q0, 'd' -> q1 | ||
56 | "vldm %[s]!, {d0} \n\t" | ||
57 | "vldm %[d], {d2} \n\t" | ||
58 | // Mulitply s * c (= sc) | ||
59 | "vmull.u8 q4, d0,d14 \n\t" | ||
60 | // sc in d8 | ||
61 | "vqrshrn.u16 d4, q4, #8 \n\t" | ||
62 | |||
63 | // sca in d9 | ||
64 | "vmvn.u32 d6, d4 \n\t" | ||
65 | "vshr.u32 d6, d6, #24 \n\t" | ||
66 | |||
67 | "vmul.u32 d6, d12, d6 \n\t" | ||
68 | |||
69 | /* d * alpha */ | ||
70 | "vmull.u8 q4, d6, d2 \n\t" | ||
71 | "vqrshrn.u16 d0, q4, #8 \n\t" | ||
72 | |||
73 | "vqadd.u8 d2, d0, d4 \n\t" | ||
74 | |||
75 | // Save dsc + sc | ||
76 | "vst1.32 d2, [%[d]]! \n\t" | ||
77 | |||
78 | AP"quadstart: \n\t" | ||
79 | "sub %[tmp], %[e], %[d] \n\t" | ||
80 | "cmp %[tmp], #16 \n\t" | ||
81 | "blt "AP"loopout \n\t" | ||
82 | |||
83 | "sub %[tmp], %[e], #15 \n\t" | ||
84 | |||
85 | AP"quadloop:\n\t" | ||
86 | // load 's' -> q0, 'd' -> q1 | ||
87 | "vldm %[s]!, {d0,d1} \n\t" | ||
88 | "vldm %[d], {d2,d3} \n\t" | ||
89 | // Mulitply s * c (= sc) | ||
90 | "vmull.u8 q4, d0,d14 \n\t" | ||
91 | "vmull.u8 q5, d1,d14 \n\t" | ||
92 | |||
93 | // Get sc & sc alpha | ||
94 | "vqrshrn.u16 d4, q4, #8 \n\t" | ||
95 | "vqrshrn.u16 d5, q5, #8 \n\t" | ||
96 | // sc is now in q2, 8bpp | ||
97 | // Shift out, then spread alpha for q2 | ||
98 | "vmvn.u32 q3, q2 \n\t" | ||
99 | "vshr.u32 q3, q3, $0x18 \n\t" | ||
100 | "vmul.u32 q3, q6,q3 \n\t" | ||
101 | |||
102 | // Multiply 'd' by sc.alpha (dsca) | ||
103 | "vmull.u8 q4, d6,d2 \n\t" | ||
104 | "vmull.u8 q5, d7,d3 \n\t" | ||
105 | |||
106 | "vqrshrn.u16 d0, q4, #8 \n\t" | ||
107 | "vqrshrn.u16 d1, q5, #8 \n\t" | ||
108 | |||
109 | "vqadd.u8 q1, q0, q2 \n\t" | ||
110 | |||
111 | // Save dsc + sc | ||
112 | "vstm %[d]!, {d2,d3} \n\t" | ||
113 | |||
114 | "cmp %[tmp], %[d] \n\t" | ||
115 | |||
116 | "bhi "AP"quadloop \n\t" | ||
117 | |||
118 | /* Trailing stuff */ | ||
119 | AP"loopout: \n\t" | ||
120 | |||
121 | "cmp %[d], %[e] \n\t" | ||
122 | "beq "AP"done\n\t" | ||
123 | "sub %[tmp],%[e], %[d] \n\t" | ||
124 | "cmp %[tmp],$0x04 \n\t" | ||
125 | "beq "AP"singleloop2 \n\t" | ||
126 | |||
127 | "sub %[tmp], %[e], #7 \n\t" | ||
128 | /* Dual loop */ | ||
129 | AP"dualloop2: \n\t" | ||
130 | "vldm %[s]!, {d0} \n\t" | ||
131 | "vldm %[d], {d2} \n\t" | ||
132 | // Mulitply s * c (= sc) | ||
133 | "vmull.u8 q4, d0,d14 \n\t" | ||
134 | // sc in d8 | ||
135 | "vqrshrn.u16 d4, q4, #8 \n\t" | ||
136 | |||
137 | // sca in d9 | ||
138 | // XXX: I can probably squash one of these 3 | ||
139 | "vmvn.u32 d6, d4 \n\t" | ||
140 | "vshr.u32 d6, d6, #24 \n\t" | ||
141 | "vmul.u32 d6, d6, d12 \n\t" | ||
142 | |||
143 | /* d * alpha */ | ||
144 | "vmull.u8 q4, d6, d2 \n\t" | ||
145 | "vqrshrn.u16 d0, q4, #8 \n\t" | ||
146 | |||
147 | "vqadd.u8 d2, d0, d4 \n\t" | ||
148 | |||
149 | // Save dsc + sc | ||
150 | "vstm %[d]!, {d2} \n\t" | ||
151 | |||
152 | "cmp %[tmp], %[d] \n\t" | ||
153 | "bhi "AP"dualloop2 \n\t" | ||
154 | |||
155 | "cmp %[d], %[e] \n\t" | ||
156 | "beq "AP"done \n\t" | ||
157 | |||
158 | AP"singleloop2: \n\t" | ||
159 | "vld1.32 d0[0], [%[s]]! \n\t" | ||
160 | "vld1.32 d2[0], [%[d]] \n\t" | ||
161 | // Mulitply s * c (= sc) | ||
162 | "vmull.u8 q4, d0,d14 \n\t" | ||
163 | // sc in d8 | ||
164 | "vqrshrn.u16 d4, q4, #8 \n\t" | ||
165 | |||
166 | // sca in d6 | ||
167 | "vmvn.u32 d6, d4 \n\t" | ||
168 | "vshr.u32 d6, d6, #24 \n\t" | ||
169 | "vmul.u32 d6, d12,d6 \n\t" | ||
170 | |||
171 | /* d * alpha */ | ||
172 | "vmull.u8 q4, d6, d2 \n\t" | ||
173 | "vqrshrn.u16 d0, q4, #8 \n\t" | ||
174 | |||
175 | "vqadd.u8 d2, d0, d4 \n\t" | ||
176 | |||
177 | // Save dsc + sc | ||
178 | "vst1.32 d2[0], [%[d]]! \n\t" | ||
179 | |||
180 | |||
181 | AP"done:" | ||
182 | : // No output | ||
183 | // | ||
184 | : [s] "r" (s), [e] "r" (d + l), [d] "r" (d), [c] "r" (c), | ||
185 | [tmp] "r" (12) | ||
186 | : "q0","q1","q2","q3","q4","q5","q6","q7","memory" | ||
187 | ); | ||
188 | #undef AP | ||
189 | } | ||
190 | |||
191 | static void | ||
192 | _op_blend_pan_can_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
193 | DATA32 *e; | ||
194 | UNROLL8_PLD_WHILE(d, l, e, | ||
195 | { | ||
196 | *d++ = 0xff000000 + MUL3_SYM(c, *s); | ||
197 | s++; | ||
198 | }); | ||
199 | } | ||
200 | |||
201 | static void | ||
202 | _op_blend_pan_caa_dp_neon(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
203 | #if 1 | ||
204 | DATA32 *e; | ||
205 | int alpha; | ||
206 | c = 1 + (c & 0xff); | ||
207 | UNROLL8_PLD_WHILE(d, l, e, | ||
208 | { | ||
209 | DATA32 sc = MUL_256(c, *s); | ||
210 | alpha = 256 - (sc >> 24); | ||
211 | *d = sc + MUL_256(alpha, *d); | ||
212 | d++; | ||
213 | s++; | ||
214 | }); | ||
215 | #else // the below neon is buggy!! misses rendering of spans, i think with alignment. quick - just disable this. | ||
216 | #define AP "_op_blend_pan_caa_dp_" | ||
217 | DATA32 *e = d + l, *tmp = (void*)73; | ||
218 | asm volatile ( | ||
219 | ".fpu neon \n\t" | ||
220 | /* Set up 'c' */ | ||
221 | "vdup.u8 d14, %[c] \n\t" | ||
222 | "vmov.i8 d15, #1 \n\t" | ||
223 | "vaddl.u8 q15, d14, d15 \n\t" | ||
224 | "vshr.u8 q15,#1 \n\t" | ||
225 | |||
226 | // Pick a loop | ||
227 | "andS %[tmp], %[d], $0xf \n\t" | ||
228 | "beq "AP"quadstart \n\t" | ||
229 | |||
230 | "andS %[tmp], %[d], $0x4 \n\t" | ||
231 | "beq "AP"dualstart \n\t" | ||
232 | |||
233 | AP"singleloop: \n\t" | ||
234 | "vld1.32 d4[0], [%[d]] \n\t" | ||
235 | "vld1.32 d0[0], [%[s]]! \n\t" | ||
236 | |||
237 | // Long version of 'd' | ||
238 | "vmovl.u8 q8, d4 \n\t" | ||
239 | |||
240 | // Long version of 's' | ||
241 | "vmovl.u8 q6, d0 \n\t" | ||
242 | |||
243 | // d8 = s -d | ||
244 | "vsub.s16 d8, d12, d16 \n\t" | ||
245 | |||
246 | // Multiply | ||
247 | "vmul.s16 d8, d8, d30 \n\t" | ||
248 | |||
249 | // Shift down | ||
250 | "vshr.s16 d8, #7 \n\t" | ||
251 | |||
252 | // Add 'd' | ||
253 | "vqadd.s16 d8, d8, d16 \n\t" | ||
254 | |||
255 | // Shrink to save | ||
256 | "vqmovun.s16 d0, q4 \n\t" | ||
257 | "vst1.32 d0[0], [%[d]]! \n\t" | ||
258 | |||
259 | // Now where? | ||
260 | "andS %[tmp], %[d], $0xf \n\t" | ||
261 | "beq "AP"quadstart \n\t" | ||
262 | |||
263 | AP"dualstart: \n\t" | ||
264 | // Check we have enough | ||
265 | "sub %[tmp], %[e], %[d] \n\t" | ||
266 | "cmp %[tmp], #16 \n\t" | ||
267 | "blt "AP"loopout \n\t" | ||
268 | |||
269 | AP"dualloop:" | ||
270 | "vldm %[d], {d4} \n\t" | ||
271 | "vldm %[s]!, {d0} \n\t" | ||
272 | |||
273 | // Long version of d | ||
274 | "vmovl.u8 q8, d4 \n\t" | ||
275 | |||
276 | // Long version of s | ||
277 | "vmovl.u8 q6, d0 \n\t" | ||
278 | |||
279 | // q4/q5 = s-d | ||
280 | "vsub.s16 q4, q6, q8 \n\t" | ||
281 | |||
282 | // Multiply | ||
283 | "vmul.s16 q4, q4,q15 \n\t" | ||
284 | |||
285 | // Shift down | ||
286 | "vshr.s16 q4, #7 \n\t" | ||
287 | |||
288 | // Add d | ||
289 | "vqadd.s16 q4, q4, q8 \n\t" | ||
290 | |||
291 | // Shrink to save | ||
292 | "vqmovun.s16 d0, q4 \n\t" | ||
293 | |||
294 | "vstm %[d]!, {d0} \n\t" | ||
295 | AP"quadstart: \n\t" | ||
296 | "sub %[tmp], %[e], %[d] \n\t" | ||
297 | "cmp %[tmp], #16 \n\t" | ||
298 | "blt "AP"loopout \n\t" | ||
299 | |||
300 | "sub %[tmp], %[e], #15 \n\t" | ||
301 | |||
302 | AP"quadloop: \n\t" | ||
303 | // load 's' -> q0, 'd' -> q2 | ||
304 | "vldm %[d], {d4,d5} \n\t" | ||
305 | "vldm %[s]!, {d0,d1} \n\t" | ||
306 | |||
307 | // Long version of d | ||
308 | "vmovl.u8 q8, d4 \n\t" | ||
309 | "vmovl.u8 q9, d5 \n\t" | ||
310 | |||
311 | // Long version of s | ||
312 | "vmovl.u8 q6, d0 \n\t" | ||
313 | "vmovl.u8 q7, d1 \n\t" | ||
314 | |||
315 | // q4/q5 = s-d | ||
316 | "vsub.s16 q4, q6, q8 \n\t" | ||
317 | "vsub.s16 q5, q7, q9 \n\t" | ||
318 | |||
319 | // Multiply | ||
320 | "vmul.s16 q4, q4,q15 \n\t" | ||
321 | "vmul.s16 q5, q5,q15 \n\t" | ||
322 | |||
323 | // Shift down | ||
324 | "vshr.s16 q4, #7 \n\t" | ||
325 | "vshr.s16 q5, #7 \n\t" | ||
326 | |||
327 | // Add d | ||
328 | "vqadd.s16 q4, q4, q8 \n\t" | ||
329 | "vqadd.s16 q5, q5, q9 \n\t" | ||
330 | |||
331 | // Shrink to save | ||
332 | "vqmovun.s16 d0, q4 \n\t" | ||
333 | "vqmovun.s16 d1, q5 \n\t" | ||
334 | "vstm %[d]!, {d0,d1} \n\t" | ||
335 | "cmp %[tmp], %[d] \n\t" | ||
336 | |||
337 | "bhi "AP"quadloop\n\t" | ||
338 | |||
339 | |||
340 | "b "AP"done\n\t" | ||
341 | AP"loopout: \n\t" | ||
342 | "cmp %[d], %[e] \n\t" | ||
343 | "beq "AP"done\n\t" | ||
344 | "sub %[tmp],%[e], %[d] \n\t" | ||
345 | "cmp %[tmp],$0x04 \n\t" | ||
346 | "beq "AP"singleloop2 \n\t" | ||
347 | |||
348 | AP"dualloop2: \n\t" | ||
349 | "vldm %[d], {d4} \n\t" | ||
350 | "vldm %[s]!, {d0} \n\t" | ||
351 | |||
352 | // Long version of d | ||
353 | "vmovl.u8 q8, d4 \n\t" | ||
354 | |||
355 | // Long version of s | ||
356 | "vmovl.u8 q6, d0 \n\t" | ||
357 | |||
358 | // q4/q5 = s-d | ||
359 | "vsub.s16 q4, q6, q8 \n\t" | ||
360 | |||
361 | // Multiply | ||
362 | "vmul.s16 q4, q4,q15 \n\t" | ||
363 | |||
364 | // Shift down | ||
365 | "vshr.s16 q4, #7 \n\t" | ||
366 | |||
367 | // Add d | ||
368 | "vqadd.s16 q4, q4, q8 \n\t" | ||
369 | |||
370 | // Shrink to save | ||
371 | "vqmovun.s16 d0, q4 \n\t" | ||
372 | |||
373 | "vstm %[d]!, {d0} \n\t" | ||
374 | |||
375 | "cmp %[d], %[e] \n\t" | ||
376 | "beq "AP"done \n\t" | ||
377 | |||
378 | AP"singleloop2: \n\t" | ||
379 | "vld1.32 d4[0], [%[d]] \n\t" | ||
380 | "vld1.32 d0[0], [%[s]]! \n\t" | ||
381 | |||
382 | // Long version of 'd' | ||
383 | "vmovl.u8 q8, d4 \n\t" | ||
384 | |||
385 | // Long version of 's' | ||
386 | "vmovl.u8 q6, d0 \n\t" | ||
387 | |||
388 | // d8 = s -d | ||
389 | "vsub.s16 d8, d12, d16 \n\t" | ||
390 | |||
391 | // Multiply | ||
392 | "vmul.s16 d8, d8, d30 \n\t" | ||
393 | |||
394 | // Shift down | ||
395 | "vshr.s16 d8, #7 \n\t" | ||
396 | |||
397 | // Add 'd' | ||
398 | "vqadd.s16 d8, d8, d16 \n\t" | ||
399 | |||
400 | // Shrink to save | ||
401 | "vqmovun.s16 d0, q4 \n\t" | ||
402 | |||
403 | "vst1.32 d0[0], [%[d]] \n\t" | ||
404 | |||
405 | |||
406 | AP"done: \n\t" | ||
407 | |||
408 | // No output | ||
409 | : | ||
410 | // Input | ||
411 | : [s] "r" (s), [d] "r" (d), [e] "r" (e), [c] "r" (c), [tmp] "r" (tmp) | ||
412 | // Clobbered | ||
413 | : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "memory" | ||
414 | ); | ||
415 | #undef AP | ||
416 | #endif | ||
417 | } | ||
418 | |||
419 | #define _op_blend_pas_c_dp_neon _op_blend_p_c_dp_neon | ||
420 | #define _op_blend_pan_c_dp_neon _op_blend_p_c_dp_neon | ||
421 | #define _op_blend_p_can_dp_neon _op_blend_p_c_dp_neon | ||
422 | #define _op_blend_pas_can_dp_neon _op_blend_p_c_dp_neon | ||
423 | #define _op_blend_p_caa_dp_neon _op_blend_p_c_dp_neon | ||
424 | #define _op_blend_pas_caa_dp_neon _op_blend_p_c_dp_neon | ||
425 | |||
426 | #define _op_blend_p_c_dpan_neon _op_blend_p_c_dp_neon | ||
427 | #define _op_blend_pas_c_dpan_neon _op_blend_pas_c_dp_neon | ||
428 | #define _op_blend_pan_c_dpan_neon _op_blend_pan_c_dp_neon | ||
429 | #define _op_blend_p_can_dpan_neon _op_blend_p_can_dp_neon | ||
430 | #define _op_blend_pas_can_dpan_neon _op_blend_pas_can_dp_neon | ||
431 | #define _op_blend_pan_can_dpan_neon _op_blend_pan_can_dp_neon | ||
432 | #define _op_blend_p_caa_dpan_neon _op_blend_p_caa_dp_neon | ||
433 | #define _op_blend_pas_caa_dpan_neon _op_blend_pas_caa_dp_neon | ||
434 | #define _op_blend_pan_caa_dpan_neon _op_blend_pan_caa_dp_neon | ||
435 | |||
436 | |||
437 | static void | ||
438 | init_blend_pixel_color_span_funcs_neon(void) | ||
439 | { | ||
440 | op_blend_span_funcs[SP][SM_N][SC][DP][CPU_NEON] = _op_blend_p_c_dp_neon; | ||
441 | op_blend_span_funcs[SP_AS][SM_N][SC][DP][CPU_NEON] = _op_blend_pas_c_dp_neon; | ||
442 | op_blend_span_funcs[SP_AN][SM_N][SC][DP][CPU_NEON] = _op_blend_pan_c_dp_neon; | ||
443 | op_blend_span_funcs[SP][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_p_can_dp_neon; | ||
444 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pas_can_dp_neon; | ||
445 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pan_can_dp_neon; | ||
446 | op_blend_span_funcs[SP][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_p_caa_dp_neon; | ||
447 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pas_caa_dp_neon; | ||
448 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pan_caa_dp_neon; | ||
449 | |||
450 | op_blend_span_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_p_c_dpan_neon; | ||
451 | op_blend_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pas_c_dpan_neon; | ||
452 | op_blend_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pan_c_dpan_neon; | ||
453 | op_blend_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_p_can_dpan_neon; | ||
454 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pas_can_dpan_neon; | ||
455 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pan_can_dpan_neon; | ||
456 | op_blend_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_p_caa_dpan_neon; | ||
457 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pas_caa_dpan_neon; | ||
458 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pan_caa_dpan_neon; | ||
459 | } | ||
460 | #endif | ||
461 | |||
462 | #ifdef BUILD_NEON | ||
463 | static void | ||
464 | _op_blend_pt_p_c_dp_neon(DATA32 s, DATA8 m __UNUSED__, DATA32 c, DATA32 *d) { | ||
465 | s = MUL4_SYM(c, s); | ||
466 | c = 256 - (s >> 24); | ||
467 | *d = s + MUL_256(c, *d); | ||
468 | } | ||
469 | |||
470 | #define _op_blend_pt_pas_c_dp_neon _op_blend_pt_p_c_dp_neon | ||
471 | #define _op_blend_pt_pan_c_dp_neon _op_blend_pt_p_c_dp_neon | ||
472 | #define _op_blend_pt_p_can_dp_neon _op_blend_pt_p_c_dp_neon | ||
473 | #define _op_blend_pt_pas_can_dp_neon _op_blend_pt_p_c_dp_neon | ||
474 | #define _op_blend_pt_pan_can_dp_neon _op_blend_pt_p_c_dp_neon | ||
475 | #define _op_blend_pt_p_caa_dp_neon _op_blend_pt_p_c_dp_neon | ||
476 | #define _op_blend_pt_pas_caa_dp_neon _op_blend_pt_p_c_dp_neon | ||
477 | #define _op_blend_pt_pan_caa_dp_neon _op_blend_pt_p_c_dp_neon | ||
478 | |||
479 | #define _op_blend_pt_p_c_dpan_neon _op_blend_pt_p_c_dp_neon | ||
480 | #define _op_blend_pt_pas_c_dpan_neon _op_blend_pt_p_c_dp_neon | ||
481 | #define _op_blend_pt_pan_c_dpan_neon _op_blend_pt_p_c_dp_neon | ||
482 | #define _op_blend_pt_p_can_dpan_neon _op_blend_pt_p_c_dp_neon | ||
483 | #define _op_blend_pt_pas_can_dpan_neon _op_blend_pt_p_c_dp_neon | ||
484 | #define _op_blend_pt_pan_can_dpan_neon _op_blend_pt_p_c_dp_neon | ||
485 | #define _op_blend_pt_p_caa_dpan_neon _op_blend_pt_p_c_dp_neon | ||
486 | #define _op_blend_pt_pas_caa_dpan_neon _op_blend_pt_p_c_dp_neon | ||
487 | #define _op_blend_pt_pan_caa_dpan_neon _op_blend_pt_p_c_dp_neon | ||
488 | |||
489 | static void | ||
490 | init_blend_pixel_color_pt_funcs_neon(void) | ||
491 | { | ||
492 | op_blend_pt_funcs[SP][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_p_c_dp_neon; | ||
493 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_pas_c_dp_neon; | ||
494 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP][CPU_NEON] = _op_blend_pt_pan_c_dp_neon; | ||
495 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pt_p_can_dp_neon; | ||
496 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pt_pas_can_dp_neon; | ||
497 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_NEON] = _op_blend_pt_pan_can_dp_neon; | ||
498 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_p_caa_dp_neon; | ||
499 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_pas_caa_dp_neon; | ||
500 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_NEON] = _op_blend_pt_pan_caa_dp_neon; | ||
501 | |||
502 | op_blend_pt_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_p_c_dpan_neon; | ||
503 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_pas_c_dpan_neon; | ||
504 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_pt_pan_c_dpan_neon; | ||
505 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_p_can_dpan_neon; | ||
506 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_pas_can_dpan_neon; | ||
507 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_pt_pan_can_dpan_neon; | ||
508 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_p_caa_dpan_neon; | ||
509 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_pas_caa_dpan_neon; | ||
510 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_pt_pan_caa_dpan_neon; | ||
511 | } | ||
512 | #endif | ||
513 | |||
514 | /*-----*/ | ||
515 | |||
516 | /* blend_rel pixel x color -> dst */ | ||
517 | |||
518 | #ifdef BUILD_NEON | ||
519 | |||
520 | #define _op_blend_rel_p_c_dpan_neon _op_blend_p_c_dpan_neon | ||
521 | #define _op_blend_rel_pas_c_dpan_neon _op_blend_pas_c_dpan_neon | ||
522 | #define _op_blend_rel_pan_c_dpan_neon _op_blend_pan_c_dpan_neon | ||
523 | #define _op_blend_rel_p_can_dpan_neon _op_blend_p_can_dpan_neon | ||
524 | #define _op_blend_rel_pas_can_dpan_neon _op_blend_pas_can_dpan_neon | ||
525 | #define _op_blend_rel_pan_can_dpan_neon _op_blend_pan_can_dpan_neon | ||
526 | #define _op_blend_rel_p_caa_dpan_neon _op_blend_p_caa_dpan_neon | ||
527 | #define _op_blend_rel_pas_caa_dpan_neon _op_blend_pas_caa_dpan_neon | ||
528 | #define _op_blend_rel_pan_caa_dpan_neon _op_blend_pan_caa_dpan_neon | ||
529 | |||
530 | static void | ||
531 | init_blend_rel_pixel_color_span_funcs_neon(void) | ||
532 | { | ||
533 | op_blend_rel_span_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_p_c_dpan_neon; | ||
534 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pas_c_dpan_neon; | ||
535 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pan_c_dpan_neon; | ||
536 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_p_can_dpan_neon; | ||
537 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pas_can_dpan_neon; | ||
538 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pan_can_dpan_neon; | ||
539 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_p_caa_dpan_neon; | ||
540 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pas_caa_dpan_neon; | ||
541 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pan_caa_dpan_neon; | ||
542 | } | ||
543 | #endif | ||
544 | |||
545 | #ifdef BUILD_NEON | ||
546 | |||
547 | #define _op_blend_rel_pt_p_c_dpan_neon _op_blend_pt_p_c_dpan_neon | ||
548 | #define _op_blend_rel_pt_pas_c_dpan_neon _op_blend_pt_pas_c_dpan_neon | ||
549 | #define _op_blend_rel_pt_pan_c_dpan_neon _op_blend_pt_pan_c_dpan_neon | ||
550 | #define _op_blend_rel_pt_p_can_dpan_neon _op_blend_pt_p_can_dpan_neon | ||
551 | #define _op_blend_rel_pt_pas_can_dpan_neon _op_blend_pt_pas_can_dpan_neon | ||
552 | #define _op_blend_rel_pt_pan_can_dpan_neon _op_blend_pt_pan_can_dpan_neon | ||
553 | #define _op_blend_rel_pt_p_caa_dpan_neon _op_blend_pt_p_caa_dpan_neon | ||
554 | #define _op_blend_rel_pt_pas_caa_dpan_neon _op_blend_pt_pas_caa_dpan_neon | ||
555 | #define _op_blend_rel_pt_pan_caa_dpan_neon _op_blend_pt_pan_caa_dpan_neon | ||
556 | |||
557 | static void | ||
558 | init_blend_rel_pixel_color_pt_funcs_neon(void) | ||
559 | { | ||
560 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_c_dpan_neon; | ||
561 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_c_dpan_neon; | ||
562 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_c_dpan_neon; | ||
563 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_can_dpan_neon; | ||
564 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_can_dpan_neon; | ||
565 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_can_dpan_neon; | ||
566 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_caa_dpan_neon; | ||
567 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_caa_dpan_neon; | ||
568 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_caa_dpan_neon; | ||
569 | } | ||
570 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c new file mode 100644 index 0000000..4ee31f5 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_color_sse3.c | |||
@@ -0,0 +1,543 @@ | |||
1 | /* blend pixel x color --> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_p_c_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | DATA32 alpha; | ||
9 | |||
10 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
11 | |||
12 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
13 | { /* UOP */ | ||
14 | |||
15 | DATA32 sc = MUL4_SYM(c, *s); | ||
16 | alpha = 256 - (sc >> 24); | ||
17 | *d = sc + MUL_256(alpha, *d); | ||
18 | d++; s++; l--; | ||
19 | }, | ||
20 | { /* A4OP */ | ||
21 | |||
22 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
23 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
24 | |||
25 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
26 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
27 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
28 | |||
29 | d0 = _mm_add_epi32(sc0, mul0); | ||
30 | |||
31 | _mm_store_si128((__m128i *)d, d0); | ||
32 | |||
33 | d += 4; s += 4; l -= 4; | ||
34 | }, | ||
35 | { /* A8OP */ | ||
36 | |||
37 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
38 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
39 | |||
40 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
41 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
42 | |||
43 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
44 | __m128i sc1 = mul4_sym_sse3(c_packed, s1); | ||
45 | |||
46 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
47 | __m128i a1 = sub4_alpha_sse3(sc1); | ||
48 | |||
49 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
50 | __m128i mul1 = mul_256_sse3(a1, d1); | ||
51 | |||
52 | d0 = _mm_add_epi32(sc0, mul0); | ||
53 | d1 = _mm_add_epi32(sc1, mul1); | ||
54 | |||
55 | _mm_store_si128((__m128i *)d, d0); | ||
56 | _mm_store_si128((__m128i *)(d+4), d1); | ||
57 | |||
58 | d += 8; s += 8; l -= 8; | ||
59 | }) | ||
60 | } | ||
61 | |||
62 | static void | ||
63 | _op_blend_pan_c_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
64 | |||
65 | DATA32 c_a = c & 0xFF000000; | ||
66 | DATA32 alpha = 256 - (c >> 24); | ||
67 | |||
68 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
69 | const __m128i c_alpha = _mm_set_epi32(c_a, c_a, c_a, c_a); | ||
70 | const __m128i a0 = _mm_set_epi32(alpha, alpha, alpha, alpha); | ||
71 | |||
72 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
73 | { /* UOP */ | ||
74 | |||
75 | *d = ((c & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d); | ||
76 | d++; s++; l--; | ||
77 | }, | ||
78 | { /* A4OP */ | ||
79 | |||
80 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
81 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
82 | |||
83 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
84 | mul_256_sse3(a0, d0)); | ||
85 | |||
86 | r0 = _mm_add_epi32(r0, c_alpha); | ||
87 | |||
88 | _mm_store_si128((__m128i *)d, r0); | ||
89 | |||
90 | d += 4; s += 4; l -= 4; | ||
91 | }, | ||
92 | { /* A8OP */ | ||
93 | |||
94 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
95 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
96 | |||
97 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
98 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
99 | |||
100 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
101 | mul_256_sse3(a0, d0)); | ||
102 | |||
103 | __m128i r1 = _mm_add_epi32(mul3_sym_sse3(c_packed, s1), | ||
104 | mul_256_sse3(a0, d1)); | ||
105 | |||
106 | r0 = _mm_add_epi32(r0, c_alpha); | ||
107 | r1 = _mm_add_epi32(r1, c_alpha); | ||
108 | |||
109 | _mm_store_si128((__m128i *)d, r0); | ||
110 | _mm_store_si128((__m128i *)(d+4), r1); | ||
111 | |||
112 | d += 8; s += 8; l -= 8; | ||
113 | }) | ||
114 | } | ||
115 | |||
116 | static void | ||
117 | _op_blend_p_can_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
118 | |||
119 | int alpha; | ||
120 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
121 | |||
122 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
123 | { /* UOP */ | ||
124 | |||
125 | alpha = 256 - (*s >> 24); | ||
126 | *d = ((*s & 0xff000000) + MUL3_SYM(c, *s)) + MUL_256(alpha, *d); | ||
127 | d++; s++; l--; | ||
128 | }, | ||
129 | { /* A4OP */ | ||
130 | |||
131 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
132 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
133 | |||
134 | __m128i a0 = sub4_alpha_sse3(s0); | ||
135 | |||
136 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
137 | mul_256_sse3(a0, d0)); | ||
138 | |||
139 | r0 = _mm_add_epi32(r0, _mm_and_si128(s0, A_MASK_SSE3)); | ||
140 | |||
141 | _mm_store_si128((__m128i *)d, r0); | ||
142 | |||
143 | d += 4; s += 4; l -= 4; | ||
144 | }, | ||
145 | { | ||
146 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
147 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
148 | |||
149 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
150 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
151 | |||
152 | __m128i a0 = sub4_alpha_sse3(s0); | ||
153 | __m128i a1 = sub4_alpha_sse3(s1); | ||
154 | |||
155 | __m128i r0 = _mm_add_epi32(mul3_sym_sse3(c_packed, s0), | ||
156 | mul_256_sse3(a0, d0)); | ||
157 | |||
158 | __m128i r1 = _mm_add_epi32(mul3_sym_sse3(c_packed, s1), | ||
159 | mul_256_sse3(a1, d1)); | ||
160 | |||
161 | r0 = _mm_add_epi32(r0, _mm_and_si128(s0, A_MASK_SSE3)); | ||
162 | r1 = _mm_add_epi32(r1, _mm_and_si128(s1, A_MASK_SSE3)); | ||
163 | |||
164 | _mm_store_si128((__m128i *)d, r0); | ||
165 | _mm_store_si128((__m128i *)(d+4), r1); | ||
166 | |||
167 | d += 8; s += 8; l -= 8; | ||
168 | }) | ||
169 | } | ||
170 | |||
171 | static void | ||
172 | _op_blend_pan_can_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
173 | |||
174 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
175 | |||
176 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
177 | { /* UOP */ | ||
178 | |||
179 | *d++ = 0xff000000 + MUL3_SYM(c, *s); | ||
180 | s++; l--; | ||
181 | }, | ||
182 | { /* A4OP */ | ||
183 | |||
184 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
185 | |||
186 | __m128i r0 = mul3_sym_sse3(c_packed, s0); | ||
187 | r0 = _mm_add_epi32(r0, A_MASK_SSE3); | ||
188 | |||
189 | _mm_store_si128((__m128i *)d, r0); | ||
190 | |||
191 | d += 4; s += 4; l -= 4; | ||
192 | }, | ||
193 | { /* A8OP */ | ||
194 | |||
195 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
196 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
197 | |||
198 | __m128i r0 = mul3_sym_sse3(c_packed, s0); | ||
199 | __m128i r1 = mul3_sym_sse3(c_packed, s1); | ||
200 | |||
201 | r0 = _mm_add_epi32(r0, A_MASK_SSE3); | ||
202 | r1 = _mm_add_epi32(r1, A_MASK_SSE3); | ||
203 | |||
204 | _mm_store_si128((__m128i *)d, r0); | ||
205 | _mm_store_si128((__m128i *)(d+4), r1); | ||
206 | |||
207 | d += 8; s += 8; l -= 8; | ||
208 | }) | ||
209 | } | ||
210 | |||
211 | static void | ||
212 | _op_blend_p_caa_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
213 | |||
214 | int alpha; | ||
215 | c = 1 + (c & 0xff); | ||
216 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
217 | |||
218 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
219 | { /* UOP */ | ||
220 | |||
221 | DATA32 sc = MUL_256(c, *s); | ||
222 | alpha = 256 - (sc >> 24); | ||
223 | *d = sc + MUL_256(alpha, *d); | ||
224 | d++; | ||
225 | s++; | ||
226 | l--; | ||
227 | }, | ||
228 | { /* A4OP */ | ||
229 | |||
230 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
231 | __m128i d0 = _mm_load_si128 ((__m128i *)d); | ||
232 | |||
233 | __m128i sc0 = mul_256_sse3(c_packed, s0); | ||
234 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
235 | |||
236 | __m128i r0 = _mm_add_epi32(mul_256_sse3(a0, d0), sc0); | ||
237 | |||
238 | _mm_store_si128((__m128i *)d, r0); | ||
239 | |||
240 | d += 4; s += 4; l -= 4; | ||
241 | }, | ||
242 | { | ||
243 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
244 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
245 | |||
246 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
247 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
248 | |||
249 | __m128i sc0 = mul_256_sse3(c_packed, s0); | ||
250 | __m128i sc1 = mul_256_sse3(c_packed, s1); | ||
251 | |||
252 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
253 | __m128i a1 = sub4_alpha_sse3(sc1); | ||
254 | |||
255 | __m128i r0 = _mm_add_epi32(mul_256_sse3(a0, d0), sc0); | ||
256 | __m128i r1 = _mm_add_epi32(mul_256_sse3(a1, d1), sc1); | ||
257 | |||
258 | _mm_store_si128((__m128i *)d, r0); | ||
259 | _mm_store_si128((__m128i *)(d+4), r1); | ||
260 | |||
261 | d += 8; s += 8; l -= 8; | ||
262 | }) | ||
263 | } | ||
264 | |||
265 | static void | ||
266 | _op_blend_pan_caa_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
267 | |||
268 | c = 1 + (c & 0xff); | ||
269 | const __m128i c_packed = _mm_set_epi32(c, c, c,c); | ||
270 | |||
271 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
272 | { /* UOP */ | ||
273 | |||
274 | *d = INTERP_256(c, *s, *d); | ||
275 | d++; s++; l--; | ||
276 | }, | ||
277 | { /* A4OP */ | ||
278 | |||
279 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
280 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
281 | |||
282 | __m128i r0 = interp4_256_sse3(c_packed, s0, d0); | ||
283 | |||
284 | _mm_store_si128((__m128i *)d, r0); | ||
285 | |||
286 | d += 4; s += 4; l -= 4; | ||
287 | }, | ||
288 | { | ||
289 | |||
290 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
291 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
292 | |||
293 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
294 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
295 | |||
296 | __m128i r0 = interp4_256_sse3(c_packed, s0, d0); | ||
297 | __m128i r1 = interp4_256_sse3(c_packed, s1, d1); | ||
298 | |||
299 | _mm_store_si128((__m128i *)d, r0); | ||
300 | _mm_store_si128((__m128i *)(d+4), r1); | ||
301 | |||
302 | d += 8; s += 8; l -= 8; | ||
303 | }) | ||
304 | } | ||
305 | |||
306 | #define _op_blend_pas_c_dp_sse3 _op_blend_p_c_dp_sse3 | ||
307 | #define _op_blend_pas_can_dp_sse3 _op_blend_p_can_dp_sse3 | ||
308 | #define _op_blend_pas_caa_dp_sse3 _op_blend_p_caa_dp_sse3 | ||
309 | |||
310 | #define _op_blend_p_c_dpan_sse3 _op_blend_p_c_dp_sse3 | ||
311 | #define _op_blend_pas_c_dpan_sse3 _op_blend_pas_c_dp_sse3 | ||
312 | #define _op_blend_pan_c_dpan_sse3 _op_blend_pan_c_dp_sse3 | ||
313 | #define _op_blend_p_can_dpan_sse3 _op_blend_p_can_dp_sse3 | ||
314 | #define _op_blend_pas_can_dpan_sse3 _op_blend_pas_can_dp_sse3 | ||
315 | #define _op_blend_pan_can_dpan_sse3 _op_blend_pan_can_dp_sse3 | ||
316 | #define _op_blend_p_caa_dpan_sse3 _op_blend_p_caa_dp_sse3 | ||
317 | #define _op_blend_pas_caa_dpan_sse3 _op_blend_pas_caa_dp_sse3 | ||
318 | #define _op_blend_pan_caa_dpan_sse3 _op_blend_pan_caa_dp_sse3 | ||
319 | |||
320 | static void | ||
321 | init_blend_pixel_color_span_funcs_sse3(void) | ||
322 | { | ||
323 | op_blend_span_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_p_c_dp_sse3; | ||
324 | op_blend_span_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_pas_c_dp_sse3; | ||
325 | op_blend_span_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_pan_c_dp_sse3; | ||
326 | op_blend_span_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_p_can_dp_sse3; | ||
327 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pas_can_dp_sse3; | ||
328 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pan_can_dp_sse3; | ||
329 | op_blend_span_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_p_caa_dp_sse3; | ||
330 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pas_caa_dp_sse3; | ||
331 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pan_caa_dp_sse3; | ||
332 | |||
333 | op_blend_span_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_p_c_dpan_sse3; | ||
334 | op_blend_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pas_c_dpan_sse3; | ||
335 | op_blend_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pan_c_dpan_sse3; | ||
336 | op_blend_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_p_can_dpan_sse3; | ||
337 | op_blend_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pas_can_dpan_sse3; | ||
338 | op_blend_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pan_can_dpan_sse3; | ||
339 | op_blend_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_p_caa_dpan_sse3; | ||
340 | op_blend_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pas_caa_dpan_sse3; | ||
341 | op_blend_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pan_caa_dpan_sse3; | ||
342 | } | ||
343 | |||
344 | #define _op_blend_pt_p_c_dp_sse3 NULL | ||
345 | |||
346 | #define _op_blend_pt_pas_c_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
347 | #define _op_blend_pt_pan_c_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
348 | #define _op_blend_pt_p_can_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
349 | #define _op_blend_pt_pas_can_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
350 | #define _op_blend_pt_pan_can_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
351 | #define _op_blend_pt_p_caa_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
352 | #define _op_blend_pt_pas_caa_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
353 | #define _op_blend_pt_pan_caa_dp_sse3 _op_blend_pt_p_c_dp_sse3 | ||
354 | |||
355 | #define _op_blend_pt_p_c_dpan_sse3 _op_blend_pt_p_c_dp_sse3 | ||
356 | #define _op_blend_pt_pas_c_dpan_sse3 _op_blend_pt_pas_c_dp_sse3 | ||
357 | #define _op_blend_pt_pan_c_dpan_sse3 _op_blend_pt_pan_c_dp_sse3 | ||
358 | #define _op_blend_pt_p_can_dpan_sse3 _op_blend_pt_p_can_dp_sse3 | ||
359 | #define _op_blend_pt_pas_can_dpan_sse3 _op_blend_pt_pas_can_dp_sse3 | ||
360 | #define _op_blend_pt_pan_can_dpan_sse3 _op_blend_pt_pan_can_dp_sse3 | ||
361 | #define _op_blend_pt_p_caa_dpan_sse3 _op_blend_pt_p_caa_dp_sse3 | ||
362 | #define _op_blend_pt_pas_caa_dpan_sse3 _op_blend_pt_pas_caa_dp_sse3 | ||
363 | #define _op_blend_pt_pan_caa_dpan_sse3 _op_blend_pt_pan_caa_dp_sse3 | ||
364 | |||
365 | static void | ||
366 | init_blend_pixel_color_pt_funcs_sse3(void) | ||
367 | { | ||
368 | op_blend_pt_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_p_c_dp_sse3; | ||
369 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_pas_c_dp_sse3; | ||
370 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_pt_pan_c_dp_sse3; | ||
371 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pt_p_can_dp_sse3; | ||
372 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pt_pas_can_dp_sse3; | ||
373 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_pt_pan_can_dp_sse3; | ||
374 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_p_caa_dp_sse3; | ||
375 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_pas_caa_dp_sse3; | ||
376 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_pt_pan_caa_dp_sse3; | ||
377 | |||
378 | op_blend_pt_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_p_c_dpan_sse3; | ||
379 | op_blend_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_pas_c_dpan_sse3; | ||
380 | op_blend_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_pt_pan_c_dpan_sse3; | ||
381 | op_blend_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_p_can_dpan_sse3; | ||
382 | op_blend_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_pas_can_dpan_sse3; | ||
383 | op_blend_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_pt_pan_can_dpan_sse3; | ||
384 | op_blend_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_p_caa_dpan_sse3; | ||
385 | op_blend_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_pas_caa_dpan_sse3; | ||
386 | op_blend_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_pt_pan_caa_dpan_sse3; | ||
387 | } | ||
388 | |||
389 | /*-----*/ | ||
390 | |||
391 | /* blend_rel pixel x color -> dst */ | ||
392 | |||
393 | static void | ||
394 | _op_blend_rel_p_c_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
395 | |||
396 | int alpha; | ||
397 | |||
398 | const __m128i c_packed = _mm_set_epi32(c, c, c, c); | ||
399 | |||
400 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
401 | { /* UOP */ | ||
402 | |||
403 | DATA32 sc = MUL4_SYM(c, *s); | ||
404 | alpha = 256 - (sc >> 24); | ||
405 | *d = MUL_SYM(*d >> 24, sc) + MUL_256(alpha, *d); | ||
406 | d++; s++; l--; | ||
407 | }, | ||
408 | { /* A4OP */ | ||
409 | |||
410 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
411 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
412 | |||
413 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
414 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
415 | |||
416 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), sc0); | ||
417 | __m128i r0 = mul_256_sse3(a0, d0); | ||
418 | |||
419 | r0 = _mm_add_epi32(l0, r0); | ||
420 | |||
421 | _mm_store_si128((__m128i *)d, r0); | ||
422 | |||
423 | d += 4; s += 4; l -= 4; | ||
424 | }, | ||
425 | { /* A8OP */ | ||
426 | |||
427 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
428 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
429 | |||
430 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
431 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
432 | |||
433 | __m128i sc0 = mul4_sym_sse3(c_packed, s0); | ||
434 | __m128i sc1 = mul4_sym_sse3(c_packed, s1); | ||
435 | |||
436 | __m128i a0 = sub4_alpha_sse3(sc0); | ||
437 | __m128i a1 = sub4_alpha_sse3(sc1); | ||
438 | |||
439 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), sc0); | ||
440 | __m128i r0 = mul_256_sse3(a0, d0); | ||
441 | |||
442 | __m128i l1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), sc1); | ||
443 | __m128i r1 = mul_256_sse3(a1, d1); | ||
444 | |||
445 | r0 = _mm_add_epi32(l0, r0); | ||
446 | r1 = _mm_add_epi32(l1, r1); | ||
447 | |||
448 | _mm_store_si128((__m128i *)d, r0); | ||
449 | _mm_store_si128((__m128i *)(d+4), r1); | ||
450 | |||
451 | d += 8; s += 8; l -= 8; | ||
452 | }) | ||
453 | } | ||
454 | |||
455 | #define _op_blend_rel_pas_c_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
456 | #define _op_blend_rel_pan_c_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
457 | #define _op_blend_rel_p_can_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
458 | #define _op_blend_rel_pas_can_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
459 | #define _op_blend_rel_pan_can_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
460 | #define _op_blend_rel_p_caa_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
461 | #define _op_blend_rel_pas_caa_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
462 | #define _op_blend_rel_pan_caa_dp_sse3 _op_blend_rel_p_c_dp_sse3 | ||
463 | |||
464 | #define _op_blend_rel_p_c_dpan_sse3 _op_blend_p_c_dpan_sse3 | ||
465 | #define _op_blend_rel_pas_c_dpan_sse3 _op_blend_pas_c_dpan_sse3 | ||
466 | #define _op_blend_rel_pan_c_dpan_sse3 _op_blend_pan_c_dpan_sse3 | ||
467 | #define _op_blend_rel_p_can_dpan_sse3 _op_blend_p_can_dpan_sse3 | ||
468 | #define _op_blend_rel_pas_can_dpan_sse3 _op_blend_pas_can_dpan_sse3 | ||
469 | #define _op_blend_rel_pan_can_dpan_sse3 _op_blend_pan_can_dpan_sse3 | ||
470 | #define _op_blend_rel_p_caa_dpan_sse3 _op_blend_p_caa_dpan_sse3 | ||
471 | #define _op_blend_rel_pas_caa_dpan_sse3 _op_blend_pas_caa_dpan_sse3 | ||
472 | #define _op_blend_rel_pan_caa_dpan_sse3 _op_blend_pan_caa_dpan_sse3 | ||
473 | |||
474 | static void | ||
475 | init_blend_rel_pixel_color_span_funcs_sse3(void) | ||
476 | { | ||
477 | op_blend_rel_span_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_p_c_dp_sse3; | ||
478 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pas_c_dp_sse3; | ||
479 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pan_c_dp_sse3; | ||
480 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_p_can_dp_sse3; | ||
481 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pas_can_dp_sse3; | ||
482 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pan_can_dp_sse3; | ||
483 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_p_caa_dp_sse3; | ||
484 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pas_caa_dp_sse3; | ||
485 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pan_caa_dp_sse3; | ||
486 | |||
487 | op_blend_rel_span_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_p_c_dpan_sse3; | ||
488 | op_blend_rel_span_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pas_c_dpan_sse3; | ||
489 | op_blend_rel_span_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pan_c_dpan_sse3; | ||
490 | op_blend_rel_span_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_p_can_dpan_sse3; | ||
491 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pas_can_dpan_sse3; | ||
492 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pan_can_dpan_sse3; | ||
493 | op_blend_rel_span_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_p_caa_dpan_sse3; | ||
494 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pas_caa_dpan_sse3; | ||
495 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pan_caa_dpan_sse3; | ||
496 | } | ||
497 | |||
498 | #define _op_blend_rel_pt_p_c_dp_sse3 NULL | ||
499 | |||
500 | #define _op_blend_rel_pt_pas_c_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
501 | #define _op_blend_rel_pt_pan_c_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
502 | #define _op_blend_rel_pt_p_can_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
503 | #define _op_blend_rel_pt_pas_can_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
504 | #define _op_blend_rel_pt_pan_can_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
505 | #define _op_blend_rel_pt_p_caa_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
506 | #define _op_blend_rel_pt_pas_caa_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
507 | #define _op_blend_rel_pt_pan_caa_dp_sse3 _op_blend_rel_pt_p_c_dp_sse3 | ||
508 | |||
509 | #define _op_blend_rel_pt_p_c_dpan_sse3 _op_blend_pt_p_c_dpan_sse3 | ||
510 | #define _op_blend_rel_pt_pas_c_dpan_sse3 _op_blend_pt_pas_c_dpan_sse3 | ||
511 | #define _op_blend_rel_pt_pan_c_dpan_sse3 _op_blend_pt_pan_c_dpan_sse3 | ||
512 | #define _op_blend_rel_pt_p_can_dpan_sse3 _op_blend_pt_p_can_dpan_sse3 | ||
513 | #define _op_blend_rel_pt_pas_can_dpan_sse3 _op_blend_pt_pas_can_dpan_sse3 | ||
514 | #define _op_blend_rel_pt_pan_can_dpan_sse3 _op_blend_pt_pan_can_dpan_sse3 | ||
515 | #define _op_blend_rel_pt_p_caa_dpan_sse3 _op_blend_pt_p_caa_dpan_sse3 | ||
516 | #define _op_blend_rel_pt_pas_caa_dpan_sse3 _op_blend_pt_pas_caa_dpan_sse3 | ||
517 | #define _op_blend_rel_pt_pan_caa_dpan_sse3 _op_blend_pt_pan_caa_dpan_sse3 | ||
518 | |||
519 | static void | ||
520 | init_blend_rel_pixel_color_pt_funcs_sse3(void) | ||
521 | { | ||
522 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_p_c_dp_sse3; | ||
523 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_pas_c_dp_sse3; | ||
524 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP][CPU_SSE3] = _op_blend_rel_pt_pan_c_dp_sse3; | ||
525 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_p_can_dp_sse3; | ||
526 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_pas_can_dp_sse3; | ||
527 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP][CPU_SSE3] = _op_blend_rel_pt_pan_can_dp_sse3; | ||
528 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_p_caa_dp_sse3; | ||
529 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_pas_caa_dp_sse3; | ||
530 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP][CPU_SSE3] = _op_blend_rel_pt_pan_caa_dp_sse3; | ||
531 | |||
532 | op_blend_rel_pt_funcs[SP][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_c_dpan_sse3; | ||
533 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_c_dpan_sse3; | ||
534 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_c_dpan_sse3; | ||
535 | op_blend_rel_pt_funcs[SP][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_can_dpan_sse3; | ||
536 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_can_dpan_sse3; | ||
537 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AN][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_can_dpan_sse3; | ||
538 | op_blend_rel_pt_funcs[SP][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_caa_dpan_sse3; | ||
539 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_caa_dpan_sse3; | ||
540 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_AA][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_caa_dpan_sse3; | ||
541 | } | ||
542 | |||
543 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_i386.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_i386.c new file mode 100644 index 0000000..9b5abe6 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_i386.c | |||
@@ -0,0 +1,217 @@ | |||
1 | /* blend pixel --> dst */ | ||
2 | |||
3 | #ifdef BUILD_MMX | ||
4 | static void | ||
5 | _op_blend_p_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
6 | DATA32 *e = d + l; | ||
7 | pxor_r2r(mm0, mm0); | ||
8 | MOV_A2R(ALPHA_256, mm6) | ||
9 | while (d < e) | ||
10 | { | ||
11 | MOV_P2R(*s, mm2, mm0) | ||
12 | MOV_RA2R(mm2, mm1) | ||
13 | movq_r2r(mm6, mm3); | ||
14 | psubw_r2r(mm1, mm3); | ||
15 | |||
16 | MOV_P2R(*d, mm1, mm0) | ||
17 | MUL4_256_R2R(mm3, mm1) | ||
18 | |||
19 | paddw_r2r(mm2, mm1); | ||
20 | MOV_R2P(mm1, *d, mm0) | ||
21 | s++; d++; | ||
22 | } | ||
23 | } | ||
24 | |||
25 | static void | ||
26 | _op_blend_pas_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
27 | _op_blend_p_dp_mmx(s, m, c, d, l); | ||
28 | return; | ||
29 | DATA32 *e = d + l; | ||
30 | pxor_r2r(mm0, mm0); | ||
31 | MOV_A2R(ALPHA_256, mm6) | ||
32 | while (d < e) | ||
33 | { | ||
34 | switch (*s & 0xff000000) | ||
35 | { | ||
36 | case 0: | ||
37 | break; | ||
38 | case 0xff000000: | ||
39 | *d = *s; | ||
40 | break; | ||
41 | default : | ||
42 | MOV_P2R(*s, mm2, mm0) | ||
43 | MOV_RA2R(mm2, mm1) | ||
44 | movq_r2r(mm6, mm3); | ||
45 | psubw_r2r(mm1, mm3); | ||
46 | |||
47 | MOV_P2R(*d, mm1, mm0) | ||
48 | MUL4_256_R2R(mm3, mm1) | ||
49 | |||
50 | paddw_r2r(mm2, mm1); | ||
51 | MOV_R2P(mm1, *d, mm0) | ||
52 | break; | ||
53 | } | ||
54 | s++; d++; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | #define _op_blend_pan_dp_mmx NULL | ||
59 | |||
60 | #define _op_blend_p_dpan_mmx _op_blend_p_dp_mmx | ||
61 | #define _op_blend_pas_dpan_mmx _op_blend_pas_dp_mmx | ||
62 | #define _op_blend_pan_dpan_mmx _op_blend_pan_dp_mmx | ||
63 | |||
64 | static void | ||
65 | init_blend_pixel_span_funcs_mmx(void) | ||
66 | { | ||
67 | op_blend_span_funcs[SP][SM_N][SC_N][DP][CPU_MMX] = _op_blend_p_dp_mmx; | ||
68 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_MMX] = _op_blend_pas_dp_mmx; | ||
69 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_MMX] = _op_blend_pan_dp_mmx; | ||
70 | |||
71 | op_blend_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_p_dpan_mmx; | ||
72 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_pas_dpan_mmx; | ||
73 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_pan_dpan_mmx; | ||
74 | } | ||
75 | #endif | ||
76 | |||
77 | #ifdef BUILD_MMX | ||
78 | static void | ||
79 | _op_blend_pt_p_dp_mmx(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) { | ||
80 | pxor_r2r(mm0, mm0); | ||
81 | MOV_A2R(ALPHA_256, mm6) | ||
82 | MOV_P2R(s, mm2, mm0) | ||
83 | MOV_RA2R(mm2, mm1) | ||
84 | movq_r2r(mm6, mm3); | ||
85 | psubw_r2r(mm1, mm3); | ||
86 | |||
87 | MOV_P2R(*d, mm1, mm0) | ||
88 | MUL4_256_R2R(mm3, mm1) | ||
89 | |||
90 | paddw_r2r(mm2, mm1); | ||
91 | MOV_R2P(mm1, *d, mm0) | ||
92 | } | ||
93 | |||
94 | |||
95 | #define _op_blend_pt_pan_dp_mmx NULL | ||
96 | #define _op_blend_pt_pas_dp_mmx _op_blend_pt_p_dp_mmx | ||
97 | |||
98 | #define _op_blend_pt_p_dpan_mmx _op_blend_pt_p_dp_mmx | ||
99 | #define _op_blend_pt_pan_dpan_mmx _op_blend_pt_pan_dp_mmx | ||
100 | #define _op_blend_pt_pas_dpan_mmx _op_blend_pt_pas_dp_mmx | ||
101 | |||
102 | static void | ||
103 | init_blend_pixel_pt_funcs_mmx(void) | ||
104 | { | ||
105 | op_blend_pt_funcs[SP][SM_N][SC_N][DP][CPU_MMX] = _op_blend_pt_p_dp_mmx; | ||
106 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_MMX] = _op_blend_pt_pas_dp_mmx; | ||
107 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_MMX] = _op_blend_pt_pan_dp_mmx; | ||
108 | |||
109 | op_blend_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_p_dpan_mmx; | ||
110 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_pas_dpan_mmx; | ||
111 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_pan_dpan_mmx; | ||
112 | } | ||
113 | #endif | ||
114 | |||
115 | /*-----*/ | ||
116 | |||
117 | /* blend_rel pixel -> dst */ | ||
118 | |||
119 | #ifdef BUILD_MMX | ||
120 | static void | ||
121 | _op_blend_rel_p_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
122 | DATA32 *e = d + l; | ||
123 | pxor_r2r(mm0, mm0); | ||
124 | MOV_A2R(ALPHA_256, mm6) | ||
125 | MOV_A2R(ALPHA_255, mm5) | ||
126 | while (d < e) | ||
127 | { | ||
128 | MOV_P2R(*s, mm2, mm0) | ||
129 | MOV_RA2R(mm2, mm1) | ||
130 | movq_r2r(mm6, mm3); | ||
131 | psubw_r2r(mm1, mm3); | ||
132 | |||
133 | MOV_P2R(*d, mm1, mm0) | ||
134 | MOV_RA2R(mm1, mm4) | ||
135 | MUL4_256_R2R(mm3, mm1) | ||
136 | |||
137 | MUL4_SYM_R2R(mm4, mm2, mm5) | ||
138 | paddw_r2r(mm2, mm1); | ||
139 | MOV_R2P(mm1, *d, mm0) | ||
140 | s++; d++; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | static void | ||
145 | _op_blend_rel_pan_dp_mmx(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
146 | DATA32 *e = d + l; | ||
147 | pxor_r2r(mm0, mm0); | ||
148 | MOV_A2R(ALPHA_256, mm6) | ||
149 | MOV_A2R(ALPHA_255, mm5) | ||
150 | while (d < e) | ||
151 | { | ||
152 | MOV_P2R(*s, mm2, mm0) | ||
153 | MOV_PA2R(*d, mm1) | ||
154 | MUL4_SYM_R2R(mm2, mm1, mm5) | ||
155 | MOV_R2P(mm1, *d, mm0) | ||
156 | s++; d++; | ||
157 | } | ||
158 | } | ||
159 | |||
160 | #define _op_blend_rel_pas_dp_mmx _op_blend_rel_p_dp_mmx | ||
161 | |||
162 | #define _op_blend_rel_p_dpan_mmx _op_blend_p_dpan_mmx | ||
163 | #define _op_blend_rel_pan_dpan_mmx _op_blend_pan_dpan_mmx | ||
164 | #define _op_blend_rel_pas_dpan_mmx _op_blend_pas_dpan_mmx | ||
165 | |||
166 | static void | ||
167 | init_blend_rel_pixel_span_funcs_mmx(void) | ||
168 | { | ||
169 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP][CPU_MMX] = _op_blend_rel_p_dp_mmx; | ||
170 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_MMX] = _op_blend_rel_pas_dp_mmx; | ||
171 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_MMX] = _op_blend_rel_pan_dp_mmx; | ||
172 | |||
173 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_p_dpan_mmx; | ||
174 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pas_dpan_mmx; | ||
175 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pan_dpan_mmx; | ||
176 | } | ||
177 | #endif | ||
178 | |||
179 | #ifdef BUILD_MMX | ||
180 | static void | ||
181 | _op_blend_rel_pt_p_dp_mmx(DATA32 s, DATA8 m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d) { | ||
182 | pxor_r2r(mm0, mm0); | ||
183 | MOV_A2R(ALPHA_256, mm6) | ||
184 | MOV_A2R(ALPHA_255, mm5) | ||
185 | |||
186 | MOV_P2R(s, mm2, mm0) | ||
187 | MOV_RA2R(mm2, mm1) | ||
188 | psubw_r2r(mm1, mm6); | ||
189 | |||
190 | MOV_P2R(*d, mm1, mm0) | ||
191 | MOV_RA2R(mm1, mm4) | ||
192 | MUL4_256_R2R(mm6, mm1) | ||
193 | |||
194 | MUL4_SYM_R2R(mm4, mm2, mm5) | ||
195 | paddw_r2r(mm2, mm1); | ||
196 | MOV_R2P(mm1, *d, mm0) | ||
197 | } | ||
198 | |||
199 | #define _op_blend_rel_pt_pas_dp_mmx _op_blend_rel_pt_p_dp_mmx | ||
200 | #define _op_blend_rel_pt_pan_dp_mmx _op_blend_rel_pt_p_dp_mmx | ||
201 | |||
202 | #define _op_blend_rel_pt_p_dpan_mmx _op_blend_pt_p_dpan_mmx | ||
203 | #define _op_blend_rel_pt_pas_dpan_mmx _op_blend_pt_pas_dpan_mmx | ||
204 | #define _op_blend_rel_pt_pan_dpan_mmx _op_blend_pt_pan_dpan_mmx | ||
205 | |||
206 | static void | ||
207 | init_blend_rel_pixel_pt_funcs_mmx(void) | ||
208 | { | ||
209 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP][CPU_MMX] = _op_blend_rel_pt_p_dp_mmx; | ||
210 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_MMX] = _op_blend_rel_pt_pas_dp_mmx; | ||
211 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_MMX] = _op_blend_rel_pt_pan_dp_mmx; | ||
212 | |||
213 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_p_dpan_mmx; | ||
214 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_pas_dpan_mmx; | ||
215 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_pan_dpan_mmx; | ||
216 | } | ||
217 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_.c new file mode 100644 index 0000000..a9d0f4b --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_.c | |||
@@ -0,0 +1,189 @@ | |||
1 | /* blend pixel x mask --> dst */ | ||
2 | |||
3 | #ifdef BUILD_C | ||
4 | static void | ||
5 | _op_blend_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
6 | DATA32 *e; | ||
7 | int alpha; | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | alpha = *m; | ||
11 | switch(alpha) | ||
12 | { | ||
13 | case 0: | ||
14 | break; | ||
15 | case 255: | ||
16 | alpha = 256 - (*s >> 24); | ||
17 | *d = *s + MUL_256(alpha, *d); | ||
18 | break; | ||
19 | default: | ||
20 | c = MUL_SYM(alpha, *s); | ||
21 | alpha = 256 - (c >> 24); | ||
22 | *d = c + MUL_256(alpha, *d); | ||
23 | break; | ||
24 | } | ||
25 | m++; s++; d++; | ||
26 | }); | ||
27 | } | ||
28 | |||
29 | static void | ||
30 | _op_blend_pas_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
31 | DATA32 *e; | ||
32 | int alpha; | ||
33 | UNROLL8_PLD_WHILE(d, l, e, | ||
34 | { | ||
35 | alpha = *m; | ||
36 | switch(alpha) | ||
37 | { | ||
38 | case 0: | ||
39 | break; | ||
40 | case 255: | ||
41 | *d = *s; | ||
42 | break; | ||
43 | default: | ||
44 | alpha++; | ||
45 | *d = INTERP_256(alpha, *s, *d); | ||
46 | break; | ||
47 | } | ||
48 | m++; s++; d++; | ||
49 | }); | ||
50 | } | ||
51 | |||
52 | static void | ||
53 | _op_blend_pan_mas_dp(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
54 | DATA32 *e; | ||
55 | int alpha; | ||
56 | UNROLL8_PLD_WHILE(d, l, e, | ||
57 | { | ||
58 | alpha = *m; | ||
59 | switch(alpha) | ||
60 | { | ||
61 | case 0: | ||
62 | break; | ||
63 | case 255: | ||
64 | *d = *s; | ||
65 | break; | ||
66 | default: | ||
67 | alpha++; | ||
68 | *d = INTERP_256(alpha, *s, *d); | ||
69 | break; | ||
70 | } | ||
71 | m++; s++; d++; | ||
72 | }); | ||
73 | } | ||
74 | |||
75 | |||
76 | #define _op_blend_p_mas_dpan _op_blend_p_mas_dp | ||
77 | #define _op_blend_pas_mas_dpan _op_blend_pas_mas_dp | ||
78 | #define _op_blend_pan_mas_dpan _op_blend_pan_mas_dp | ||
79 | |||
80 | static void | ||
81 | init_blend_pixel_mask_span_funcs_c(void) | ||
82 | { | ||
83 | op_blend_span_funcs[SP][SM_AS][SC_N][DP][CPU_C] = _op_blend_p_mas_dp; | ||
84 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_C] = _op_blend_pas_mas_dp; | ||
85 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_C] = _op_blend_pan_mas_dp; | ||
86 | |||
87 | op_blend_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_p_mas_dpan; | ||
88 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_pas_mas_dpan; | ||
89 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_pan_mas_dpan; | ||
90 | } | ||
91 | #endif | ||
92 | |||
93 | #ifdef BUILD_C | ||
94 | static void | ||
95 | _op_blend_pt_p_mas_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
96 | s = MUL_SYM(m, s); | ||
97 | c = 256 - (s >> 24); | ||
98 | *d = s + MUL_256(c, *d); | ||
99 | } | ||
100 | |||
101 | static void | ||
102 | _op_blend_pt_pan_mas_dp(DATA32 s, DATA8 m, DATA32 c __UNUSED__, DATA32 *d) { | ||
103 | *d = INTERP_256(m + 1, s, *d); | ||
104 | } | ||
105 | |||
106 | #define _op_blend_pt_pas_mas_dp _op_blend_pt_p_mas_dp | ||
107 | |||
108 | #define _op_blend_pt_p_mas_dpan _op_blend_pt_p_mas_dp | ||
109 | #define _op_blend_pt_pas_mas_dpan _op_blend_pt_pas_mas_dp | ||
110 | #define _op_blend_pt_pan_mas_dpan _op_blend_pt_pan_mas_dp | ||
111 | |||
112 | static void | ||
113 | init_blend_pixel_mask_pt_funcs_c(void) | ||
114 | { | ||
115 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP][CPU_C] = _op_blend_pt_p_mas_dp; | ||
116 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_C] = _op_blend_pt_pas_mas_dp; | ||
117 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_C] = _op_blend_pt_pan_mas_dp; | ||
118 | |||
119 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_pt_p_mas_dpan; | ||
120 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_pt_pas_mas_dpan; | ||
121 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_pt_pan_mas_dpan; | ||
122 | } | ||
123 | #endif | ||
124 | |||
125 | /*-----*/ | ||
126 | |||
127 | /* blend_rel pixel x mask -> dst */ | ||
128 | |||
129 | #ifdef BUILD_C | ||
130 | static void | ||
131 | _op_blend_rel_p_mas_dp(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
132 | DATA32 *e; | ||
133 | int alpha; | ||
134 | UNROLL8_PLD_WHILE(d, l, e, | ||
135 | { | ||
136 | c = MUL_SYM(*m, *s); | ||
137 | alpha = 256 - (c >> 24); | ||
138 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
139 | d++; m++; s++; | ||
140 | }); | ||
141 | } | ||
142 | |||
143 | #define _op_blend_rel_pas_mas_dp _op_blend_rel_p_mas_dp | ||
144 | #define _op_blend_rel_pan_mas_dp _op_blend_rel_p_mas_dp | ||
145 | |||
146 | #define _op_blend_rel_p_mas_dpan _op_blend_p_mas_dpan | ||
147 | #define _op_blend_rel_pas_mas_dpan _op_blend_pas_mas_dpan | ||
148 | #define _op_blend_rel_pan_mas_dpan _op_blend_pan_mas_dpan | ||
149 | |||
150 | static void | ||
151 | init_blend_rel_pixel_mask_span_funcs_c(void) | ||
152 | { | ||
153 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_p_mas_dp; | ||
154 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_pas_mas_dp; | ||
155 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_pan_mas_dp; | ||
156 | |||
157 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_p_mas_dpan; | ||
158 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_pas_mas_dpan; | ||
159 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_pan_mas_dpan; | ||
160 | } | ||
161 | #endif | ||
162 | |||
163 | #ifdef BUILD_C | ||
164 | static void | ||
165 | _op_blend_rel_pt_p_mas_dp(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
166 | s = MUL_SYM(m, s); | ||
167 | c = 256 - (s >> 24); | ||
168 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
169 | } | ||
170 | |||
171 | #define _op_blend_rel_pt_pas_mas_dp _op_blend_rel_pt_p_mas_dp | ||
172 | #define _op_blend_rel_pt_pan_mas_dp _op_blend_rel_pt_p_mas_dp | ||
173 | |||
174 | #define _op_blend_rel_pt_p_mas_dpan _op_blend_pt_p_mas_dpan | ||
175 | #define _op_blend_rel_pt_pas_mas_dpan _op_blend_pt_pas_mas_dpan | ||
176 | #define _op_blend_rel_pt_pan_mas_dpan _op_blend_pt_pan_mas_dpan | ||
177 | |||
178 | static void | ||
179 | init_blend_rel_pixel_mask_pt_funcs_c(void) | ||
180 | { | ||
181 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_pt_p_mas_dp; | ||
182 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_pt_pas_mas_dp; | ||
183 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_C] = _op_blend_rel_pt_pan_mas_dp; | ||
184 | |||
185 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_p_mas_dpan; | ||
186 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_pas_mas_dpan; | ||
187 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_C] = _op_blend_rel_pt_pan_mas_dpan; | ||
188 | } | ||
189 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_i386.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_i386.c new file mode 100644 index 0000000..4fa50a9 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_i386.c | |||
@@ -0,0 +1,157 @@ | |||
1 | /* blend pixel x mask --> dst */ | ||
2 | |||
3 | #ifdef BUILD_MMX | ||
4 | static void | ||
5 | _op_blend_pas_mas_dp_mmx(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
6 | DATA32 *e = d + l; | ||
7 | pxor_r2r(mm0, mm0); | ||
8 | MOV_A2R(ALPHA_256, mm6) | ||
9 | while (d < e) { | ||
10 | l = (*s >> 24); | ||
11 | switch(*m & l) | ||
12 | { | ||
13 | case 0: | ||
14 | break; | ||
15 | case 255: | ||
16 | *d = *s; | ||
17 | break; | ||
18 | default: | ||
19 | l = 1 + *m; | ||
20 | MOV_A2R(l, mm3) | ||
21 | MOV_P2R(*s, mm2, mm0) | ||
22 | MUL4_256_R2R(mm3, mm2) | ||
23 | |||
24 | MOV_RA2R(mm2, mm1) | ||
25 | movq_r2r(mm6, mm3); | ||
26 | psubw_r2r(mm1, mm3); | ||
27 | |||
28 | MOV_P2R(*d, mm1, mm0) | ||
29 | MUL4_256_R2R(mm3, mm1) | ||
30 | |||
31 | paddw_r2r(mm2, mm1); | ||
32 | MOV_R2P(mm1, *d, mm0) | ||
33 | break; | ||
34 | } | ||
35 | m++; s++; d++; | ||
36 | } | ||
37 | } | ||
38 | |||
39 | static void | ||
40 | _op_blend_pan_mas_dp_mmx(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
41 | DATA32 *e = d + l; | ||
42 | MOV_A2R(ALPHA_255, mm5) | ||
43 | pxor_r2r(mm0, mm0); | ||
44 | while (d < e) { | ||
45 | l = *m; | ||
46 | switch(l) | ||
47 | { | ||
48 | case 0: | ||
49 | break; | ||
50 | case 255: | ||
51 | *d = *s; | ||
52 | break; | ||
53 | default: | ||
54 | l++; | ||
55 | MOV_A2R(l, mm3) | ||
56 | MOV_P2R(*s, mm2, mm0) | ||
57 | MOV_P2R(*d, mm1, mm0) | ||
58 | INTERP_256_R2R(mm3, mm2, mm1, mm5) | ||
59 | MOV_R2P(mm1, *d, mm0) | ||
60 | break; | ||
61 | } | ||
62 | m++; s++; d++; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | #define _op_blend_p_mas_dp_mmx _op_blend_pas_mas_dp_mmx | ||
67 | |||
68 | #define _op_blend_p_mas_dpan_mmx _op_blend_p_mas_dp_mmx | ||
69 | #define _op_blend_pan_mas_dpan_mmx _op_blend_pan_mas_dp_mmx | ||
70 | #define _op_blend_pas_mas_dpan_mmx _op_blend_pas_mas_dp_mmx | ||
71 | |||
72 | static void | ||
73 | init_blend_pixel_mask_span_funcs_mmx(void) | ||
74 | { | ||
75 | op_blend_span_funcs[SP][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_p_mas_dp_mmx; | ||
76 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_pas_mas_dp_mmx; | ||
77 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_pan_mas_dp_mmx; | ||
78 | |||
79 | op_blend_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_p_mas_dpan_mmx; | ||
80 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_pas_mas_dpan_mmx; | ||
81 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_pan_mas_dpan_mmx; | ||
82 | } | ||
83 | #endif | ||
84 | |||
85 | #ifdef BUILD_MMX | ||
86 | static void | ||
87 | _op_blend_pt_p_mas_dp_mmx(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
88 | c = m + 1; | ||
89 | MOV_A2R(c, mm3) | ||
90 | pxor_r2r(mm0, mm0); | ||
91 | MOV_A2R(ALPHA_256, mm6) | ||
92 | MOV_P2R(s, mm2, mm0) | ||
93 | MUL4_256_R2R(mm3, mm2) | ||
94 | |||
95 | MOV_RA2R(mm2, mm1) | ||
96 | psubw_r2r(mm1, mm6); | ||
97 | |||
98 | MOV_P2R(*d, mm1, mm0) | ||
99 | MUL4_256_R2R(mm6, mm1) | ||
100 | |||
101 | paddw_r2r(mm2, mm1); | ||
102 | MOV_R2P(mm1, *d, mm0) | ||
103 | } | ||
104 | |||
105 | #define _op_blend_pt_pan_mas_dp_mmx _op_blend_pt_p_mas_dp_mmx | ||
106 | #define _op_blend_pt_pas_mas_dp_mmx _op_blend_pt_p_mas_dp_mmx | ||
107 | |||
108 | #define _op_blend_pt_p_mas_dpan_mmx _op_blend_pt_p_mas_dp_mmx | ||
109 | #define _op_blend_pt_pas_mas_dpan_mmx _op_blend_pt_pas_mas_dp_mmx | ||
110 | #define _op_blend_pt_pan_mas_dpan_mmx _op_blend_pt_pan_mas_dp_mmx | ||
111 | |||
112 | static void | ||
113 | init_blend_pixel_mask_pt_funcs_mmx(void) | ||
114 | { | ||
115 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_pt_p_mas_dp_mmx; | ||
116 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_pt_pas_mas_dp_mmx; | ||
117 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_MMX] = _op_blend_pt_pan_mas_dp_mmx; | ||
118 | |||
119 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_p_mas_dpan_mmx; | ||
120 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_pas_mas_dpan_mmx; | ||
121 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_pt_pan_mas_dpan_mmx; | ||
122 | } | ||
123 | #endif | ||
124 | |||
125 | /*-----*/ | ||
126 | |||
127 | /* blend_rel pixel x mask -> dst */ | ||
128 | |||
129 | #ifdef BUILD_MMX | ||
130 | |||
131 | #define _op_blend_rel_p_mas_dpan_mmx _op_blend_p_mas_dpan_mmx | ||
132 | #define _op_blend_rel_pas_mas_dpan_mmx _op_blend_pas_mas_dpan_mmx | ||
133 | #define _op_blend_rel_pan_mas_dpan_mmx _op_blend_pan_mas_dpan_mmx | ||
134 | |||
135 | static void | ||
136 | init_blend_rel_pixel_mask_span_funcs_mmx(void) | ||
137 | { | ||
138 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_p_mas_dpan_mmx; | ||
139 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pas_mas_dpan_mmx; | ||
140 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pan_mas_dpan_mmx; | ||
141 | } | ||
142 | #endif | ||
143 | |||
144 | #ifdef BUILD_MMX | ||
145 | |||
146 | #define _op_blend_rel_pt_p_mas_dpan_mmx _op_blend_pt_p_mas_dpan_mmx | ||
147 | #define _op_blend_rel_pt_pas_mas_dpan_mmx _op_blend_pt_pas_mas_dpan_mmx | ||
148 | #define _op_blend_rel_pt_pan_mas_dpan_mmx _op_blend_pt_pan_mas_dpan_mmx | ||
149 | |||
150 | static void | ||
151 | init_blend_rel_pixel_mask_pt_funcs_mmx(void) | ||
152 | { | ||
153 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_p_mas_dpan_mmx; | ||
154 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_pas_mas_dpan_mmx; | ||
155 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_MMX] = _op_blend_rel_pt_pan_mas_dpan_mmx; | ||
156 | } | ||
157 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_neon.c new file mode 100644 index 0000000..b252a67 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_neon.c | |||
@@ -0,0 +1,129 @@ | |||
1 | /* blend pixel x mask --> dst */ | ||
2 | |||
3 | #ifdef BUILD_NEON | ||
4 | static void | ||
5 | _op_blend_pas_mas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
6 | DATA32 *e; | ||
7 | int alpha; | ||
8 | UNROLL8_PLD_WHILE(d, l, e, | ||
9 | { | ||
10 | alpha = *m; | ||
11 | switch(alpha) | ||
12 | { | ||
13 | case 0: | ||
14 | break; | ||
15 | case 255: | ||
16 | *d = *s; | ||
17 | break; | ||
18 | default: | ||
19 | alpha++; | ||
20 | *d = INTERP_256(alpha, *s, *d); | ||
21 | break; | ||
22 | } | ||
23 | m++; s++; d++; | ||
24 | }); | ||
25 | } | ||
26 | |||
27 | static void | ||
28 | _op_blend_pan_mas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
29 | DATA32 *e; | ||
30 | int alpha; | ||
31 | UNROLL8_PLD_WHILE(d, l, e, | ||
32 | { | ||
33 | alpha = *m; | ||
34 | switch(alpha) | ||
35 | { | ||
36 | case 0: | ||
37 | break; | ||
38 | case 255: | ||
39 | *d = *s; | ||
40 | break; | ||
41 | default: | ||
42 | alpha++; | ||
43 | *d = INTERP_256(alpha, *s, *d); | ||
44 | break; | ||
45 | } | ||
46 | m++; s++; d++; | ||
47 | }); | ||
48 | } | ||
49 | |||
50 | #define _op_blend_p_mas_dp_neon _op_blend_pas_mas_dp_neon | ||
51 | |||
52 | #define _op_blend_p_mas_dpan_neon _op_blend_p_mas_dp_neon | ||
53 | #define _op_blend_pan_mas_dpan_neon _op_blend_pan_mas_dp_neon | ||
54 | #define _op_blend_pas_mas_dpan_neon _op_blend_pas_mas_dp_neon | ||
55 | |||
56 | static void | ||
57 | init_blend_pixel_mask_span_funcs_neon(void) | ||
58 | { | ||
59 | op_blend_span_funcs[SP][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_p_mas_dp_neon; | ||
60 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pas_mas_dp_neon; | ||
61 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pan_mas_dp_neon; | ||
62 | |||
63 | op_blend_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_p_mas_dpan_neon; | ||
64 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pas_mas_dpan_neon; | ||
65 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pan_mas_dpan_neon; | ||
66 | } | ||
67 | #endif | ||
68 | |||
69 | #ifdef BUILD_NEON | ||
70 | static void | ||
71 | _op_blend_pt_p_mas_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
72 | s = MUL_SYM(m, s); | ||
73 | c = 256 - (s >> 24); | ||
74 | *d = s + MUL_256(c, *d); | ||
75 | } | ||
76 | |||
77 | #define _op_blend_pt_pan_mas_dp_neon _op_blend_pt_p_mas_dp_neon | ||
78 | #define _op_blend_pt_pas_mas_dp_neon _op_blend_pt_p_mas_dp_neon | ||
79 | |||
80 | #define _op_blend_pt_p_mas_dpan_neon _op_blend_pt_p_mas_dp_neon | ||
81 | #define _op_blend_pt_pas_mas_dpan_neon _op_blend_pt_pas_mas_dp_neon | ||
82 | #define _op_blend_pt_pan_mas_dpan_neon _op_blend_pt_pan_mas_dp_neon | ||
83 | |||
84 | static void | ||
85 | init_blend_pixel_mask_pt_funcs_neon(void) | ||
86 | { | ||
87 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pt_p_mas_dp_neon; | ||
88 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pt_pas_mas_dp_neon; | ||
89 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_NEON] = _op_blend_pt_pan_mas_dp_neon; | ||
90 | |||
91 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_p_mas_dpan_neon; | ||
92 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_pas_mas_dpan_neon; | ||
93 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_pan_mas_dpan_neon; | ||
94 | } | ||
95 | #endif | ||
96 | |||
97 | /*-----*/ | ||
98 | |||
99 | /* blend_rel pixel x mask -> dst */ | ||
100 | |||
101 | #ifdef BUILD_NEON | ||
102 | |||
103 | #define _op_blend_rel_p_mas_dpan_neon _op_blend_p_mas_dpan_neon | ||
104 | #define _op_blend_rel_pas_mas_dpan_neon _op_blend_pas_mas_dpan_neon | ||
105 | #define _op_blend_rel_pan_mas_dpan_neon _op_blend_pan_mas_dpan_neon | ||
106 | |||
107 | static void | ||
108 | init_blend_rel_pixel_mask_span_funcs_neon(void) | ||
109 | { | ||
110 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_p_mas_dpan_neon; | ||
111 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pas_mas_dpan_neon; | ||
112 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pan_mas_dpan_neon; | ||
113 | } | ||
114 | #endif | ||
115 | |||
116 | #ifdef BUILD_NEON | ||
117 | |||
118 | #define _op_blend_rel_pt_p_mas_dpan_neon _op_blend_pt_p_mas_dpan_neon | ||
119 | #define _op_blend_rel_pt_pas_mas_dpan_neon _op_blend_pt_pas_mas_dpan_neon | ||
120 | #define _op_blend_rel_pt_pan_mas_dpan_neon _op_blend_pt_pan_mas_dpan_neon | ||
121 | |||
122 | static void | ||
123 | init_blend_rel_pixel_mask_pt_funcs_neon(void) | ||
124 | { | ||
125 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_mas_dpan_neon; | ||
126 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_mas_dpan_neon; | ||
127 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_mas_dpan_neon; | ||
128 | } | ||
129 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c new file mode 100644 index 0000000..617b9e2 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_mask_sse3.c | |||
@@ -0,0 +1,300 @@ | |||
1 | /* blend pixel x mask --> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
7 | |||
8 | int alpha; | ||
9 | |||
10 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
11 | { /* UOP */ | ||
12 | |||
13 | alpha = *m; | ||
14 | c = MUL_SYM(alpha, *s); | ||
15 | alpha = 256 - (c >> 24); | ||
16 | *d = c + MUL_256(alpha, *d); | ||
17 | m++; s++; d++; l--; | ||
18 | }, | ||
19 | { /* A4OP */ | ||
20 | |||
21 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
22 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
23 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
24 | |||
25 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
26 | __m128i a0 = sub4_alpha_sse3(c0); | ||
27 | __m128i r0 = mul_256_sse3(a0, d0); | ||
28 | |||
29 | r0 = _mm_add_epi32(r0, c0); | ||
30 | |||
31 | _mm_store_si128((__m128i *)d, r0); | ||
32 | |||
33 | m += 4; s += 4; d += 4; l -= 4; | ||
34 | }, | ||
35 | { /* A8OP */ | ||
36 | |||
37 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
38 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
39 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
40 | |||
41 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
42 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
43 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
44 | |||
45 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
46 | __m128i c1 = mul_sym_sse3(m1, s1); | ||
47 | |||
48 | __m128i a0 = sub4_alpha_sse3(c0); | ||
49 | __m128i a1 = sub4_alpha_sse3(c1); | ||
50 | |||
51 | __m128i r0 = mul_256_sse3(a0, d0); | ||
52 | __m128i r1 = mul_256_sse3(a1, d1); | ||
53 | |||
54 | r0 = _mm_add_epi32(r0, c0); | ||
55 | r1 = _mm_add_epi32(r1, c1); | ||
56 | |||
57 | _mm_store_si128((__m128i *)d, r0); | ||
58 | _mm_store_si128((__m128i *)(d+4), r1); | ||
59 | |||
60 | m += 8; s += 8; d += 8; l -= 8; | ||
61 | }) | ||
62 | } | ||
63 | |||
64 | static void | ||
65 | _op_blend_pas_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
66 | |||
67 | const __m128i ones = _mm_set_epi32(1, 1, 1, 1); | ||
68 | int alpha; | ||
69 | |||
70 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
71 | { /* UOP */ | ||
72 | |||
73 | alpha = *m; | ||
74 | switch(alpha) | ||
75 | { | ||
76 | case 0: | ||
77 | break; | ||
78 | case 255: | ||
79 | *d = *s; | ||
80 | break; | ||
81 | default: | ||
82 | alpha++; | ||
83 | *d = INTERP_256(alpha, *s, *d); | ||
84 | break; | ||
85 | } | ||
86 | m++; s++; d++; l--; | ||
87 | }, | ||
88 | { /*A4OP */ | ||
89 | |||
90 | if ((m[3] | m[2] | m[1] | m[0]) == 0) { | ||
91 | m += 4; s += 4; d += 4; l -= 4; | ||
92 | continue; | ||
93 | } | ||
94 | |||
95 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
96 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
97 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
98 | |||
99 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
100 | |||
101 | m0 = _mm_add_epi32(m0, ones); | ||
102 | |||
103 | __m128i r0 = interp4_256_sse3(m0, s0, d0); | ||
104 | |||
105 | r0 = _mm_and_si128(~zm0, r0); | ||
106 | d0 = _mm_and_si128(zm0, d0); | ||
107 | |||
108 | d0 = _mm_add_epi32(r0, d0); | ||
109 | |||
110 | _mm_store_si128((__m128i *)d, d0); | ||
111 | |||
112 | m += 4; s += 4; d += 4; l -= 4; | ||
113 | }, | ||
114 | { /* A8OP */ | ||
115 | |||
116 | if ((m[7] | m[6] | m[5] | m[4] | m[3] | m[2] | m[1] | m[0]) == 0) { | ||
117 | m += 8; s += 8; d += 8; l -= 8; | ||
118 | continue; | ||
119 | } | ||
120 | |||
121 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
122 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
123 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
124 | |||
125 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
126 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
127 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
128 | |||
129 | __m128i zm0 = _mm_cmpeq_epi32(m0, _mm_setzero_si128()); | ||
130 | __m128i zm1 = _mm_cmpeq_epi32(m1, _mm_setzero_si128()); | ||
131 | |||
132 | m0 = _mm_add_epi32(m0, ones); | ||
133 | m1 = _mm_add_epi32(m1, ones); | ||
134 | |||
135 | __m128i r0 = interp4_256_sse3(m0, s0, d0); | ||
136 | __m128i r1 = interp4_256_sse3(m1, s1, d1); | ||
137 | |||
138 | r0 = _mm_and_si128(~zm0, r0); | ||
139 | d0 = _mm_and_si128(zm0, d0); | ||
140 | |||
141 | r1 = _mm_and_si128(~zm1, r1); | ||
142 | d1 = _mm_and_si128(zm1, d1); | ||
143 | |||
144 | d0 = _mm_add_epi32(d0, r0); | ||
145 | d1 = _mm_add_epi32(d1, r1); | ||
146 | |||
147 | _mm_store_si128((__m128i *)d, d0); | ||
148 | _mm_store_si128((__m128i *)(d+4), d1); | ||
149 | |||
150 | m += 8; s += 8; d += 8; l -= 8; | ||
151 | }) | ||
152 | } | ||
153 | |||
154 | #define _op_blend_pan_mas_dp_sse3 _op_blend_pas_mas_dp_sse3 | ||
155 | |||
156 | #define _op_blend_p_mas_dpan_sse3 _op_blend_p_mas_dp_sse3 | ||
157 | #define _op_blend_pas_mas_dpan_sse3 _op_blend_pas_mas_dp_sse3 | ||
158 | #define _op_blend_pan_mas_dpan_sse3 _op_blend_pan_mas_dp_sse3 | ||
159 | |||
160 | static void | ||
161 | init_blend_pixel_mask_span_funcs_sse3(void) | ||
162 | { | ||
163 | op_blend_span_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_p_mas_dp_sse3; | ||
164 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pas_mas_dp_sse3; | ||
165 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pan_mas_dp_sse3; | ||
166 | |||
167 | op_blend_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_p_mas_dpan_sse3; | ||
168 | op_blend_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pas_mas_dpan_sse3; | ||
169 | op_blend_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pan_mas_dpan_sse3; | ||
170 | } | ||
171 | |||
172 | #define _op_blend_pt_p_mas_dp_sse3 NULL | ||
173 | #define _op_blend_pt_pan_mas_dp_sse3 NULL | ||
174 | |||
175 | #define _op_blend_pt_pas_mas_dp_sse3 _op_blend_pt_p_mas_dp_sse3 | ||
176 | |||
177 | #define _op_blend_pt_p_mas_dpan_sse3 _op_blend_pt_p_mas_dp_sse3 | ||
178 | #define _op_blend_pt_pas_mas_dpan_sse3 _op_blend_pt_pas_mas_dp_sse3 | ||
179 | #define _op_blend_pt_pan_mas_dpan_sse3 _op_blend_pt_pan_mas_dp_sse3 | ||
180 | |||
181 | static void | ||
182 | init_blend_pixel_mask_pt_funcs_sse3(void) | ||
183 | { | ||
184 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_p_mas_dp_sse3; | ||
185 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_pas_mas_dp_sse3; | ||
186 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_pt_pan_mas_dp_sse3; | ||
187 | |||
188 | op_blend_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_p_mas_dpan_sse3; | ||
189 | op_blend_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pas_mas_dpan_sse3; | ||
190 | op_blend_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pan_mas_dpan_sse3; | ||
191 | } | ||
192 | |||
193 | /*-----*/ | ||
194 | |||
195 | /* blend_rel pixel x mask -> dst */ | ||
196 | |||
197 | static void | ||
198 | _op_blend_rel_p_mas_dp_sse3(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
199 | |||
200 | int alpha; | ||
201 | |||
202 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
203 | { /* UOP */ | ||
204 | |||
205 | c = MUL_SYM(*m, *s); | ||
206 | alpha = 256 - (c >> 24); | ||
207 | *d = MUL_SYM(*d >> 24, c) + MUL_256(alpha, *d); | ||
208 | d++; m++; s++; l--; | ||
209 | }, | ||
210 | { /* A4OP */ | ||
211 | |||
212 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
213 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
214 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
215 | |||
216 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
217 | __m128i a0 = sub4_alpha_sse3(c0); | ||
218 | |||
219 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c0); | ||
220 | __m128i r0 = mul_256_sse3(a0, d0); | ||
221 | |||
222 | d0 = _mm_add_epi32(l0, r0); | ||
223 | |||
224 | _mm_store_si128((__m128i *)d, d0); | ||
225 | |||
226 | d += 4; m += 4; s += 4; l -= 4; | ||
227 | }, | ||
228 | { /* A8OP */ | ||
229 | |||
230 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
231 | __m128i m0 = _mm_set_epi32(m[3], m[2], m[1], m[0]); | ||
232 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
233 | |||
234 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
235 | __m128i m1 = _mm_set_epi32(m[7], m[6], m[5], m[4]); | ||
236 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
237 | |||
238 | __m128i c0 = mul_sym_sse3(m0, s0); | ||
239 | __m128i c1 = mul_sym_sse3(m1, s1); | ||
240 | |||
241 | __m128i a0 = sub4_alpha_sse3(c0); | ||
242 | __m128i a1 = sub4_alpha_sse3(c1); | ||
243 | |||
244 | __m128i l0 = mul_sym_sse3(_mm_srli_epi32(d0, 24), c0); | ||
245 | __m128i r0 = mul_256_sse3(a0, d0); | ||
246 | |||
247 | __m128i l1 = mul_sym_sse3(_mm_srli_epi32(d1, 24), c1); | ||
248 | __m128i r1 = mul_256_sse3(a1, d1); | ||
249 | |||
250 | d0 = _mm_add_epi32(l0, r0); | ||
251 | d1 = _mm_add_epi32(l1, r1); | ||
252 | |||
253 | _mm_store_si128((__m128i *)d, d0); | ||
254 | _mm_store_si128((__m128i *)(d+4), d1); | ||
255 | |||
256 | d += 8; m += 8; s += 8; l -= 8; | ||
257 | }) | ||
258 | } | ||
259 | |||
260 | #define _op_blend_rel_pas_mas_dp_sse3 _op_blend_rel_p_mas_dp_sse3 | ||
261 | #define _op_blend_rel_pan_mas_dp_sse3 _op_blend_rel_p_mas_dp_sse3 | ||
262 | |||
263 | #define _op_blend_rel_p_mas_dpan_sse3 _op_blend_p_mas_dpan_sse3 | ||
264 | #define _op_blend_rel_pas_mas_dpan_sse3 _op_blend_pas_mas_dpan_sse3 | ||
265 | #define _op_blend_rel_pan_mas_dpan_sse3 _op_blend_pan_mas_dpan_sse3 | ||
266 | |||
267 | static void | ||
268 | init_blend_rel_pixel_mask_span_funcs_sse3(void) | ||
269 | { | ||
270 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_p_mas_dp_sse3; | ||
271 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pas_mas_dp_sse3; | ||
272 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pan_mas_dp_sse3; | ||
273 | |||
274 | op_blend_rel_span_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_p_mas_dpan_sse3; | ||
275 | op_blend_rel_span_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pas_mas_dpan_sse3; | ||
276 | op_blend_rel_span_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pan_mas_dpan_sse3; | ||
277 | } | ||
278 | |||
279 | #define _op_blend_rel_pt_p_mas_dp_sse3 NULL | ||
280 | |||
281 | #define _op_blend_rel_pt_pas_mas_dp_sse3 _op_blend_rel_pt_p_mas_dp_sse3 | ||
282 | #define _op_blend_rel_pt_pan_mas_dp_sse3 _op_blend_rel_pt_p_mas_dp_sse3 | ||
283 | |||
284 | #define _op_blend_rel_pt_p_mas_dpan_sse3 _op_blend_pt_p_mas_dpan_sse3 | ||
285 | #define _op_blend_rel_pt_pas_mas_dpan_sse3 _op_blend_pt_pas_mas_dpan_sse3 | ||
286 | #define _op_blend_rel_pt_pan_mas_dpan_sse3 _op_blend_pt_pan_mas_dpan_sse3 | ||
287 | |||
288 | static void | ||
289 | init_blend_rel_pixel_mask_pt_funcs_sse3(void) | ||
290 | { | ||
291 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_p_mas_dp_sse3; | ||
292 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pas_mas_dp_sse3; | ||
293 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pan_mas_dp_sse3; | ||
294 | |||
295 | op_blend_rel_pt_funcs[SP][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_mas_dpan_sse3; | ||
296 | op_blend_rel_pt_funcs[SP_AS][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_mas_dpan_sse3; | ||
297 | op_blend_rel_pt_funcs[SP_AN][SM_AS][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_mas_dpan_sse3; | ||
298 | } | ||
299 | |||
300 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_neon.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_neon.c new file mode 100644 index 0000000..1cb50b6 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_neon.c | |||
@@ -0,0 +1,530 @@ | |||
1 | /* blend pixel --> dst */ | ||
2 | |||
3 | #ifdef BUILD_NEON | ||
4 | static void | ||
5 | _op_blend_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
6 | #define AP "blend_p_dp_" | ||
7 | asm volatile ( | ||
8 | ".fpu neon \n\t" | ||
9 | //** init | ||
10 | "vmov.i8 q8, $0x1 \n\t" | ||
11 | |||
12 | AP "loopchoose: \n\t" | ||
13 | // If aligned already - straight to octs | ||
14 | "andS %[tmp], %[d],$0x1f \n\t" | ||
15 | "beq "AP"octloops \n\t" | ||
16 | |||
17 | "andS %[tmp], %[d],$0xf \n\t" | ||
18 | "beq "AP"quadloops \n\t" | ||
19 | |||
20 | "andS %[tmp], %[d],$0x4 \n\t" | ||
21 | "beq "AP"dualloop \n\t" | ||
22 | |||
23 | // Only ever executes once, fall through to dual | ||
24 | AP "singleloop: \n\t" | ||
25 | "vld1.32 d0[0], [%[s]]! \n\t" | ||
26 | "vld1.32 d4[0], [%[d]] \n\t" | ||
27 | |||
28 | "vmvn.u8 d8, d0 \n\t" | ||
29 | "vshr.u32 d8, d8, #24 \n\t" | ||
30 | |||
31 | "vmul.u32 d8, d16, d8 \n\t" | ||
32 | |||
33 | "vmull.u8 q6, d4,d8 \n\t" | ||
34 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
35 | // Add to 's' | ||
36 | "vqadd.u8 q2, q4,q0 \n\t" | ||
37 | |||
38 | "vst1.32 d4[0], [%[d]] \n\t" | ||
39 | "add %[d], #4 \n\t" | ||
40 | |||
41 | // Can we go the fast path? | ||
42 | "andS %[tmp], %[d],$0x1f \n\t" | ||
43 | "beq "AP"octloops \n\t" | ||
44 | |||
45 | "andS %[tmp], %[d],$0x0f \n\t" | ||
46 | "beq "AP"quadloops \n\t" | ||
47 | |||
48 | |||
49 | AP "dualloop: \n\t" | ||
50 | "sub %[tmp], %[e], %[d] \n\t" | ||
51 | "cmp %[tmp], #32 \n\t" | ||
52 | "blt "AP"loopout \n\t" | ||
53 | |||
54 | AP "dualloopint: \n\t" | ||
55 | //** Dual Loop | ||
56 | "vldm %[s]!, {d0} \n\t" | ||
57 | "vldr d4, [%[d]] \n\t" | ||
58 | |||
59 | "vmvn.u8 d8, d0 \n\t" | ||
60 | "vshr.u32 d8, d8, #24 \n\t" | ||
61 | |||
62 | "vmul.u32 d8, d16, d8 \n\t" | ||
63 | |||
64 | "vmull.u8 q6, d4,d8 \n\t" | ||
65 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
66 | // Add to 's' | ||
67 | "vqadd.u8 d4, d8,d0 \n\t" | ||
68 | "vstr d4, [%[d]] \n\t" | ||
69 | "add %[d], #8 \n\t" | ||
70 | |||
71 | "ands %[tmp], %[d], $0x1f \n\t" | ||
72 | "beq "AP"octloops \n\t" | ||
73 | |||
74 | AP"quadloops: \n\t" | ||
75 | "sub %[tmp], %[e], %[d] \n\t" | ||
76 | "cmp %[tmp], #32 \n\t" | ||
77 | "blt "AP"loopout \n\t" | ||
78 | |||
79 | "vldm %[s]!, {d0,d1) \n\t" | ||
80 | "vldm %[d], {d4,d5} \n\t" | ||
81 | |||
82 | |||
83 | // Copy s.a into q2 (>> 24) & subtract from 255 | ||
84 | "vmvn.u8 q4, q0 \n\t" | ||
85 | "vshr.u32 q4, q4,$0x18 \n\t" | ||
86 | |||
87 | // Multiply into all fields | ||
88 | "vmul.u32 q4, q8,q4 \n\t" | ||
89 | |||
90 | // a * d (clobbering 'd'/q7) | ||
91 | "vmull.u8 q6, d4,d8 \n\t" | ||
92 | "vmull.u8 q2, d5,d9 \n\t" | ||
93 | |||
94 | // Shift & narrow it | ||
95 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
96 | "vqrshrn.u16 d9, q2, #8 \n\t" | ||
97 | |||
98 | // Add to s | ||
99 | "vqadd.u8 q2, q4,q0 \n\t" | ||
100 | |||
101 | // Write it | ||
102 | "vstm %[d]!, {d4,d5} \n\t" | ||
103 | |||
104 | AP "octloops: \n\t" | ||
105 | "sub %[tmp], %[e], %[d] \n\t" | ||
106 | "cmp %[tmp], #32 \n\t" | ||
107 | "ble "AP"loopout \n\t" | ||
108 | |||
109 | "sub %[tmp],%[e],#64 \n\t" | ||
110 | |||
111 | |||
112 | AP "octloopint:\n\t" | ||
113 | //** Oct loop | ||
114 | "vldm %[s]!, {d0,d1,d2,d3) \n\t" | ||
115 | "vldm %[d], {d4,d5,d6,d7} \n\t" | ||
116 | "pld [%[s], #64] \n\t" | ||
117 | |||
118 | |||
119 | // Copy s.a into q2 (>> 24) & subtract from 255 | ||
120 | "vmvn.u8 q4, q0 \n\t" | ||
121 | "vmvn.u8 q5, q1 \n\t" | ||
122 | "vshr.u32 q4, q4,$0x18 \n\t" | ||
123 | "vshr.u32 q5, q5,$0x18\n\t" | ||
124 | |||
125 | // Multiply into all fields | ||
126 | "vmul.u32 q4, q8,q4 \n\t" | ||
127 | "vmul.u32 q5, q8,q5 \n\t" | ||
128 | |||
129 | |||
130 | // a * d (clobbering 'd'/q7) | ||
131 | "vmull.u8 q6, d4,d8 \n\t" | ||
132 | "vmull.u8 q2, d5,d9 \n\t" | ||
133 | "vmull.u8 q7, d6,d10 \n\t" | ||
134 | "vmull.u8 q3, d7,d11 \n\t" | ||
135 | |||
136 | "cmp %[tmp], %[d]\n\t" | ||
137 | |||
138 | // Shift & narrow it | ||
139 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
140 | "vqrshrn.u16 d9, q2, #8 \n\t" | ||
141 | "vqrshrn.u16 d10, q7, #8 \n\t" | ||
142 | "vqrshrn.u16 d11, q3, #8 \n\t" | ||
143 | |||
144 | |||
145 | // Add to s | ||
146 | "vqadd.u8 q2, q4,q0 \n\t" | ||
147 | "vqadd.u8 q3, q5,q1 \n\t" | ||
148 | |||
149 | // Write it | ||
150 | "vstm %[d]!, {d4,d5,d6,d7} \n\t" | ||
151 | |||
152 | "bhi "AP"octloopint\n\t" | ||
153 | |||
154 | AP "loopout: \n\t" | ||
155 | "cmp %[d], %[e] \n\t" | ||
156 | "beq "AP"done \n\t" | ||
157 | "sub %[tmp],%[e], %[d] \n\t" | ||
158 | "cmp %[tmp],$0x04 \n\t" | ||
159 | "ble "AP"singleloop2 \n\t" | ||
160 | |||
161 | AP "dualloop2: \n\t" | ||
162 | "sub %[tmp],%[e],$0x7 \n\t" | ||
163 | AP "dualloop2int: \n\t" | ||
164 | //** Trailing double | ||
165 | |||
166 | "vldm %[s]!, {d0} \n\t" | ||
167 | "vldm %[d], {d4} \n\t" | ||
168 | |||
169 | "vmvn.u8 d8, d0 \n\t" | ||
170 | "vshr.u32 d8, d8, #24 \n\t" | ||
171 | |||
172 | "vmul.u32 d8, d16, d8 \n\t" | ||
173 | |||
174 | "vmull.u8 q6, d4,d8 \n\t" | ||
175 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
176 | // Add to 's' | ||
177 | "vqadd.u8 d4, d8,d0 \n\t" | ||
178 | |||
179 | "vstr.32 d4, [%[d]] \n\t" | ||
180 | "add %[d], #8 \n\t" | ||
181 | |||
182 | "cmp %[tmp], %[d] \n\t" | ||
183 | "bhi "AP"dualloop2int \n\t" | ||
184 | |||
185 | // Single ?? | ||
186 | "cmp %[e], %[d] \n\t" | ||
187 | "beq "AP"done \n\t" | ||
188 | |||
189 | AP"singleloop2: \n\t" | ||
190 | "vld1.32 d0[0], [%[s]] \n\t" | ||
191 | "vld1.32 d4[0], [%[d]] \n\t" | ||
192 | |||
193 | "vmvn.u8 d8, d0 \n\t" | ||
194 | "vshr.u32 d8, d8, #24 \n\t" | ||
195 | |||
196 | "vmul.u32 d8, d8, d16 \n\t" | ||
197 | |||
198 | "vmull.u8 q6, d8,d4 \n\t" | ||
199 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
200 | // Add to 's' | ||
201 | "vqadd.u8 d0, d0,d8 \n\t" | ||
202 | "vst1.32 d0[0], [%[d]] \n\t" | ||
203 | |||
204 | //** Trailing single | ||
205 | |||
206 | AP"done:\n\t" | ||
207 | //"sub %[tmp], %[e], #4 \n\t" | ||
208 | //"vmov.i32 d0, $0xffff0000 \n\t" | ||
209 | //"vst1.32 d0[0], [%[tmp]] \n\t" | ||
210 | |||
211 | |||
212 | : // output regs | ||
213 | // Input | ||
214 | : [e] "r" (d + l), [d] "r" (d), [s] "r" (s), [c] "r" (c), | ||
215 | [tmp] "r" (7) | ||
216 | : "q0", "q1", "q2","q3", "q4","q5","q6", "q7","q8","memory" // clobbered | ||
217 | ); | ||
218 | #undef AP | ||
219 | |||
220 | } | ||
221 | |||
222 | static void | ||
223 | _op_blend_pas_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
224 | #define AP "blend_pas_dp_" | ||
225 | DATA32 *e = d + l,*tmp = e + 32,*pl=(void*)912; | ||
226 | asm volatile ( | ||
227 | ".fpu neon \n\t" | ||
228 | "vmov.i8 q8, #1 \n\t" | ||
229 | AP"loopchoose: \n\t" | ||
230 | // If aliged - go as fast we can | ||
231 | "andS %[tmp], %[d], #31 \n\t" | ||
232 | "beq "AP"quadstart \n\t" | ||
233 | |||
234 | // See if we can at least do our double loop | ||
235 | "andS %[tmp], %[d], $0x7 \n\t" | ||
236 | "beq "AP"dualstart \n\t" | ||
237 | |||
238 | // Ugly single word version | ||
239 | AP "singleloop: \n\t" | ||
240 | "vld1.32 d0[0], [%[s]]! \n\t" | ||
241 | "vld1.32 d4[0], [%[d]] \n\t" | ||
242 | |||
243 | "vmvn.u8 d8, d0 \n\t" | ||
244 | |||
245 | "vshr.u32 d8, d8,$0x18 \n\t" | ||
246 | |||
247 | // Mulitply into all fields | ||
248 | "vmul.u32 d8, d8, d16 \n\t" | ||
249 | |||
250 | // Multiply out | ||
251 | "vmull.u8 q6, d8, d4 \n\t" | ||
252 | |||
253 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
254 | |||
255 | // Add to s | ||
256 | "vqadd.u8 d0, d0,d8 \n\t" | ||
257 | "vst1.32 d0[0], [%[d]]! \n\t" | ||
258 | |||
259 | AP"dualstart: \n\t" | ||
260 | "sub %[tmp], %[e], %[d] \n\t" | ||
261 | "cmp %[tmp], #32 \n\t" | ||
262 | "blt "AP"loopout \n\t" | ||
263 | |||
264 | // If aligned - go as fast we can | ||
265 | "andS %[tmp], %[d], #31 \n\t" | ||
266 | "beq "AP"quadstart \n\t" | ||
267 | |||
268 | |||
269 | AP"dualloop: \n\t" | ||
270 | |||
271 | "vldm %[s]!, {d0) \n\t" | ||
272 | "vldm %[d], {d4} \n\t" | ||
273 | |||
274 | // Subtract from 255 (ie negate) and extract alpha channel | ||
275 | "vmvn.u8 d8, d0 \n\t" | ||
276 | "vshr.u32 d8, d8,$0x18 \n\t" | ||
277 | |||
278 | // Mulitply into all fields | ||
279 | "vmul.u32 d8, d8, d16 \n\t" | ||
280 | |||
281 | // Multiply out | ||
282 | "vmull.u8 q6, d8, d4 \n\t" | ||
283 | |||
284 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
285 | |||
286 | // Add to s | ||
287 | "vqadd.u8 d0, d0,d8 \n\t" | ||
288 | "vstm %[d]!, {d0} \n\t" | ||
289 | |||
290 | "andS %[tmp], %[d], $0x1f \n\t" | ||
291 | "bne "AP"dualloop \n\t" | ||
292 | |||
293 | |||
294 | AP"quadstart: \n\t" | ||
295 | "sub %[tmp], %[e], %[d] \n\t" | ||
296 | "cmp %[tmp], #32 \n\t" | ||
297 | "blt "AP"loopout \n\t" | ||
298 | |||
299 | "sub %[tmp], %[e], #31 \n\t" | ||
300 | |||
301 | AP"quadloop:\n\t" | ||
302 | "vldm %[s]!, {d0,d1,d2,d3) \n\t" | ||
303 | "vldm %[d], {d4,d5,d6,d7} \n\t" | ||
304 | |||
305 | // Subtract from 255 (ie negate) and extract alpha channel | ||
306 | "vmvn.u8 q4, q0 \n\t" | ||
307 | "vmvn.u8 q5, q1 \n\t" | ||
308 | "vshr.u32 q4, q4,$0x18 \n\t" | ||
309 | "vshr.u32 q5, q5,$0x18 \n\t" | ||
310 | |||
311 | // Prepare to preload | ||
312 | "add %[pl], %[s], #32 \n\t" | ||
313 | |||
314 | // Mulitply into all fields | ||
315 | "vmul.u32 q4, q4, q8 \n\t" | ||
316 | "vmul.u32 q5, q5, q8 \n\t" | ||
317 | "pld [%[pl]] \n\t" | ||
318 | |||
319 | // Multiply out | ||
320 | "vmull.u8 q6, d8, d4 \n\t" | ||
321 | "vmull.u8 q7, d10, d6 \n\t" | ||
322 | "vmull.u8 q2, d9, d5 \n\t" | ||
323 | "vmull.u8 q3, d11, d7 \n\t" | ||
324 | |||
325 | "add %[pl], %[d], #32 \n\t" | ||
326 | |||
327 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
328 | "vqrshrn.u16 d10, q7, #8 \n\t" | ||
329 | "vqrshrn.u16 d9, q2, #8 \n\t" | ||
330 | "vqrshrn.u16 d11, q3, #8 \n\t" | ||
331 | "pld [%[pl]] \n\t" | ||
332 | |||
333 | "cmp %[tmp], %[pl] \n\t" | ||
334 | // Add to s | ||
335 | "vqadd.u8 q0, q0,q4 \n\t" | ||
336 | "vqadd.u8 q1, q1,q5 \n\t" | ||
337 | |||
338 | "vstm %[d]!, {d0,d1,d2,d3} \n\t" | ||
339 | |||
340 | "bhi "AP"quadloop \n\t" | ||
341 | |||
342 | AP "loopout: \n\t" | ||
343 | "cmp %[d], %[e] \n\t" | ||
344 | "beq "AP"done \n\t" | ||
345 | |||
346 | "sub %[tmp],%[e], %[d] \n\t" | ||
347 | "cmp %[tmp],$0x04 \n\t" | ||
348 | "beq "AP"singleloop2 \n\t" | ||
349 | |||
350 | "sub %[tmp],%[e],$0x7 \n\t" | ||
351 | |||
352 | AP"dualloop2: \n\t" | ||
353 | "vldm %[s]!, {d0) \n\t" | ||
354 | "vldm %[d], {d4} \n\t" | ||
355 | |||
356 | // Subtract from 255 (ie negate) and extract alpha channel | ||
357 | "vmvn.u8 d8, d0 \n\t" | ||
358 | "vshr.u32 d8, d8,$0x18 \n\t" | ||
359 | |||
360 | // Mulitply into all fields | ||
361 | "vmul.u32 d8, d8, d16 \n\t" | ||
362 | |||
363 | // Multiply out | ||
364 | "vmull.u8 q6, d8, d4 \n\t" | ||
365 | |||
366 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
367 | |||
368 | // Add to s | ||
369 | "vqadd.u8 d0, d0,d8 \n\t" | ||
370 | |||
371 | "vstm %[d]!, {d0} \n\t" | ||
372 | "cmp %[tmp], %[d] \n\t" | ||
373 | |||
374 | "bhi "AP"dualloop2 \n\t" | ||
375 | |||
376 | // Single ?? | ||
377 | "cmp %[e], %[d] \n\t" | ||
378 | "beq "AP"done \n\t" | ||
379 | |||
380 | AP "singleloop2: \n\t" | ||
381 | "vld1.32 d0[0], [%[s]] \n\t" | ||
382 | "vld1.32 d4[0], [%[d]] \n\t" | ||
383 | |||
384 | "vmvn.u8 d8, d0 \n\t" | ||
385 | |||
386 | "vshr.u32 d8, d8,$0x18 \n\t" | ||
387 | |||
388 | // Mulitply into all fields | ||
389 | "vmul.u32 d8, d8, d16 \n\t" | ||
390 | |||
391 | // Multiply out | ||
392 | "vmull.u8 q6, d8, d4 \n\t" | ||
393 | |||
394 | "vqrshrn.u16 d8, q6, #8 \n\t" | ||
395 | |||
396 | // Add to s | ||
397 | "vqadd.u8 d0, d0,d8 \n\t" | ||
398 | |||
399 | "vst1.32 d0[0], [%[d]] \n\t" | ||
400 | AP "done:\n\t" | ||
401 | |||
402 | |||
403 | : /* Out */ | ||
404 | : /* In */ [s] "r" (s), [e] "r" (e), [d] "r" (d), [tmp] "r" (tmp), | ||
405 | [pl] "r" (pl) | ||
406 | : /* Clobbered */ | ||
407 | "q0","q1","q2","q3","q4","q5","q6","q7","q8","memory" | ||
408 | ); | ||
409 | #undef AP | ||
410 | } | ||
411 | |||
412 | #define _op_blend_pan_dp_neon NULL | ||
413 | |||
414 | #define _op_blend_p_dpan_neon _op_blend_p_dp_neon | ||
415 | #define _op_blend_pas_dpan_neon _op_blend_pas_dp_neon | ||
416 | #define _op_blend_pan_dpan_neon _op_blend_pan_dp_neon | ||
417 | |||
418 | static void | ||
419 | init_blend_pixel_span_funcs_neon(void) | ||
420 | { | ||
421 | op_blend_span_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_blend_p_dp_neon; | ||
422 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_blend_pas_dp_neon; | ||
423 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_blend_pan_dp_neon; | ||
424 | |||
425 | op_blend_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_p_dpan_neon; | ||
426 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_pas_dpan_neon; | ||
427 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_pan_dpan_neon; | ||
428 | } | ||
429 | #endif | ||
430 | |||
431 | #ifdef BUILD_NEON | ||
432 | static void | ||
433 | _op_blend_pt_p_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
434 | c = 256 - (s >> 24); | ||
435 | *d = s + MUL_256(c, *d); | ||
436 | } | ||
437 | |||
438 | |||
439 | #define _op_blend_pt_pan_dp_neon NULL | ||
440 | #define _op_blend_pt_pas_dp_neon _op_blend_pt_p_dp_neon | ||
441 | |||
442 | #define _op_blend_pt_p_dpan_neon _op_blend_pt_p_dp_neon | ||
443 | #define _op_blend_pt_pan_dpan_neon _op_blend_pt_pan_dp_neon | ||
444 | #define _op_blend_pt_pas_dpan_neon _op_blend_pt_pas_dp_neon | ||
445 | |||
446 | static void | ||
447 | init_blend_pixel_pt_funcs_neon(void) | ||
448 | { | ||
449 | op_blend_pt_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_blend_pt_p_dp_neon; | ||
450 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_blend_pt_pas_dp_neon; | ||
451 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_blend_pt_pan_dp_neon; | ||
452 | |||
453 | op_blend_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_p_dpan_neon; | ||
454 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_pas_dpan_neon; | ||
455 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_pt_pan_dpan_neon; | ||
456 | } | ||
457 | #endif | ||
458 | |||
459 | /*-----*/ | ||
460 | |||
461 | /* blend_rel pixel -> dst */ | ||
462 | |||
463 | #ifdef BUILD_NEON | ||
464 | static void | ||
465 | _op_blend_rel_p_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
466 | DATA32 *e = d + l; | ||
467 | while (d < e) { | ||
468 | l = 256 - (*s >> 24); | ||
469 | c = 1 + (*d >> 24); | ||
470 | *d = MUL_256(c, *s) + MUL_256(l, *d); | ||
471 | d++; | ||
472 | s++; | ||
473 | } | ||
474 | } | ||
475 | |||
476 | static void | ||
477 | _op_blend_rel_pan_dp_neon(DATA32 *s, DATA8 *m, DATA32 c, DATA32 *d, int l) { | ||
478 | DATA32 *e = d + l; | ||
479 | while (d < e) { | ||
480 | c = 1 + (*d >> 24); | ||
481 | *d++ = MUL_256(c, *s); | ||
482 | s++; | ||
483 | } | ||
484 | } | ||
485 | |||
486 | #define _op_blend_rel_pas_dp_neon _op_blend_rel_p_dp_neon | ||
487 | |||
488 | #define _op_blend_rel_p_dpan_neon _op_blend_p_dpan_neon | ||
489 | #define _op_blend_rel_pan_dpan_neon _op_blend_pan_dpan_neon | ||
490 | #define _op_blend_rel_pas_dpan_neon _op_blend_pas_dpan_neon | ||
491 | |||
492 | static void | ||
493 | init_blend_rel_pixel_span_funcs_neon(void) | ||
494 | { | ||
495 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_blend_rel_p_dp_neon; | ||
496 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_blend_rel_pas_dp_neon; | ||
497 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_blend_rel_pan_dp_neon; | ||
498 | |||
499 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_p_dpan_neon; | ||
500 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pas_dpan_neon; | ||
501 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pan_dpan_neon; | ||
502 | } | ||
503 | #endif | ||
504 | |||
505 | #ifdef BUILD_NEON | ||
506 | static void | ||
507 | _op_blend_rel_pt_p_dp_neon(DATA32 s, DATA8 m, DATA32 c, DATA32 *d) { | ||
508 | c = 256 - (s >> 24); | ||
509 | *d = MUL_SYM(*d >> 24, s) + MUL_256(c, *d); | ||
510 | } | ||
511 | |||
512 | #define _op_blend_rel_pt_pas_dp_neon _op_blend_rel_pt_p_dp_neon | ||
513 | #define _op_blend_rel_pt_pan_dp_neon _op_blend_rel_pt_p_dp_neon | ||
514 | |||
515 | #define _op_blend_rel_pt_p_dpan_neon _op_blend_pt_p_dpan_neon | ||
516 | #define _op_blend_rel_pt_pas_dpan_neon _op_blend_pt_pas_dpan_neon | ||
517 | #define _op_blend_rel_pt_pan_dpan_neon _op_blend_pt_pan_dpan_neon | ||
518 | |||
519 | static void | ||
520 | init_blend_rel_pixel_pt_funcs_neon(void) | ||
521 | { | ||
522 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP][CPU_NEON] = _op_blend_rel_pt_p_dp_neon; | ||
523 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_NEON] = _op_blend_rel_pt_pas_dp_neon; | ||
524 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_NEON] = _op_blend_rel_pt_pan_dp_neon; | ||
525 | |||
526 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_p_dpan_neon; | ||
527 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_pas_dpan_neon; | ||
528 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_NEON] = _op_blend_rel_pt_pan_dpan_neon; | ||
529 | } | ||
530 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_sse3.c b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_sse3.c new file mode 100644 index 0000000..2e72fec --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend/op_blend_pixel_sse3.c | |||
@@ -0,0 +1,316 @@ | |||
1 | /* blend pixel --> dst */ | ||
2 | |||
3 | #ifdef BUILD_SSE3 | ||
4 | |||
5 | static void | ||
6 | _op_blend_p_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
7 | |||
8 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
9 | { /* UOP */ | ||
10 | |||
11 | int alpha = 256 - (*s >> 24); | ||
12 | *d = *s + MUL_256(alpha, *d); | ||
13 | s++; d++; l--; | ||
14 | }, | ||
15 | { /* A4OP */ | ||
16 | |||
17 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
18 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
19 | |||
20 | __m128i a0 = sub4_alpha_sse3(s0); | ||
21 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
22 | d0 = _mm_add_epi32(mul0, s0); | ||
23 | |||
24 | _mm_store_si128((__m128i *)d, d0); | ||
25 | |||
26 | s += 4; d += 4; l -= 4; | ||
27 | }, | ||
28 | { /* A8OP */ | ||
29 | |||
30 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
31 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
32 | |||
33 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
34 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
35 | |||
36 | __m128i a0 = sub4_alpha_sse3(s0); | ||
37 | __m128i a1 = sub4_alpha_sse3(s1); | ||
38 | |||
39 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
40 | __m128i mul1 = mul_256_sse3(a1, d1); | ||
41 | |||
42 | d0 = _mm_add_epi32(mul0, s0); | ||
43 | d1 = _mm_add_epi32(mul1, s1); | ||
44 | |||
45 | _mm_store_si128((__m128i *)d, d0); | ||
46 | _mm_store_si128((__m128i *)(d+4), d1); | ||
47 | |||
48 | s += 8; d += 8; l -= 8; | ||
49 | }) | ||
50 | } | ||
51 | |||
52 | static void | ||
53 | _op_blend_pas_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c __UNUSED__, DATA32 *d, int l) { | ||
54 | |||
55 | int alpha; | ||
56 | |||
57 | const __m128i zero = _mm_setzero_si128(); | ||
58 | |||
59 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
60 | { /* UOP */ | ||
61 | switch (*s & 0xff000000) | ||
62 | { | ||
63 | case 0: | ||
64 | break; | ||
65 | case 0xff000000: | ||
66 | *d = *s; | ||
67 | break; | ||
68 | default: | ||
69 | alpha = 256 - (*s >> 24); | ||
70 | *d = *s + MUL_256(alpha, *d); | ||
71 | break; | ||
72 | } | ||
73 | s++; d++; l--; | ||
74 | }, | ||
75 | { /* A4OP */ | ||
76 | |||
77 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
78 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
79 | |||
80 | __m128i a0 = sub4_alpha_sse3(s0); | ||
81 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
82 | |||
83 | mul0 = _mm_add_epi32(s0, mul0); | ||
84 | |||
85 | __m128i zmask0 = _mm_cmpeq_epi32(_mm_srli_epi32(s0, 24), zero); | ||
86 | __m128i imask0 = ~zmask0; | ||
87 | |||
88 | mul0 = _mm_and_si128(imask0, mul0); | ||
89 | d0 = _mm_and_si128(zmask0, d0); | ||
90 | |||
91 | d0 = _mm_add_epi32(mul0, d0); | ||
92 | |||
93 | _mm_store_si128((__m128i *)d, d0); | ||
94 | |||
95 | s += 4; d += 4; l -= 4; | ||
96 | }, | ||
97 | { /* A8OP */ | ||
98 | |||
99 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
100 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
101 | |||
102 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
103 | __m128i d1 = _mm_load_si128((__m128i *)(d+4)); | ||
104 | |||
105 | __m128i a0 = sub4_alpha_sse3(s0); | ||
106 | __m128i a1 = sub4_alpha_sse3(s1); | ||
107 | |||
108 | __m128i mul0 = mul_256_sse3(a0, d0); | ||
109 | __m128i mul1 = mul_256_sse3(a1, d1); | ||
110 | |||
111 | mul0 = _mm_add_epi32(s0, mul0); | ||
112 | mul1 = _mm_add_epi32(s1, mul1); | ||
113 | |||
114 | __m128i zmask0 = _mm_cmpeq_epi32(_mm_srli_epi32(s0, 24), zero); | ||
115 | __m128i zmask1 = _mm_cmpeq_epi32(_mm_srli_epi32(s1, 24), zero); | ||
116 | |||
117 | __m128i imask0 = ~zmask0; | ||
118 | __m128i imask1 = ~zmask1; | ||
119 | |||
120 | mul0 = _mm_and_si128(imask0, mul0); | ||
121 | d0 = _mm_and_si128(zmask0, d0); | ||
122 | |||
123 | mul1 = _mm_and_si128(imask1, mul1); | ||
124 | d1 = _mm_and_si128(zmask1, d1); | ||
125 | |||
126 | d0 = _mm_add_epi32(mul0, d0); | ||
127 | d1 = _mm_add_epi32(mul1, d1); | ||
128 | |||
129 | _mm_store_si128((__m128i *)d, d0); | ||
130 | _mm_store_si128((__m128i *)(d+4), d1); | ||
131 | |||
132 | s += 8; d += 8; l -= 8; | ||
133 | }) | ||
134 | } | ||
135 | |||
136 | #define _op_blend_pan_dp_sse3 NULL | ||
137 | |||
138 | #define _op_blend_p_dpan_sse3 _op_blend_p_dp_sse3 | ||
139 | #define _op_blend_pas_dpan_sse3 _op_blend_pas_dp_sse3 | ||
140 | #define _op_blend_pan_dpan_sse3 _op_blend_pan_dp_sse3 | ||
141 | |||
142 | static void | ||
143 | init_blend_pixel_span_funcs_sse3(void) | ||
144 | { | ||
145 | op_blend_span_funcs[SP][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_p_dp_sse3; | ||
146 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_pas_dp_sse3; | ||
147 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_pan_dp_sse3; | ||
148 | |||
149 | |||
150 | // FIXME: BUGGY BUGGY Core i5 750 (32bit), 4.5.2 (Ubuntu/Linaro 4.5.2-8ubuntu4), ello (text and rectangle) | ||
151 | // op_blend_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_p_dpan_sse3; | ||
152 | op_blend_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_pas_dpan_sse3; | ||
153 | op_blend_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_pan_dpan_sse3; | ||
154 | } | ||
155 | |||
156 | #define _op_blend_pt_p_dp_sse3 NULL | ||
157 | |||
158 | #define _op_blend_pt_pas_dp_sse3 _op_blend_pt_p_dp_sse3 | ||
159 | #define _op_blend_pt_pan_dp_sse3 NULL | ||
160 | |||
161 | #define _op_blend_pt_p_dpan_sse3 _op_blend_pt_p_dp_sse3 | ||
162 | #define _op_blend_pt_pan_dpan_sse3 _op_blend_pt_pan_dp_sse3 | ||
163 | #define _op_blend_pt_pas_dpan_sse3 _op_blend_pt_pas_dp_sse3 | ||
164 | |||
165 | static void | ||
166 | init_blend_pixel_pt_funcs_sse3(void) | ||
167 | { | ||
168 | op_blend_pt_funcs[SP][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_pt_p_dp_sse3; | ||
169 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_pt_pas_dp_sse3; | ||
170 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_pt_pan_dp_sse3; | ||
171 | |||
172 | op_blend_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_p_dpan_sse3; | ||
173 | op_blend_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pas_dpan_sse3; | ||
174 | op_blend_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_pt_pan_dpan_sse3; | ||
175 | } | ||
176 | |||
177 | /*-----*/ | ||
178 | |||
179 | /* blend_rel pixel -> dst */ | ||
180 | |||
181 | static void | ||
182 | _op_blend_rel_p_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
183 | |||
184 | const __m128i ones = _mm_set_epi32(1, 1, 1, 1); | ||
185 | |||
186 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
187 | { /* UOP */ | ||
188 | |||
189 | int alpha = 256 - (*s >> 24); | ||
190 | c = 1 + (*d >> 24); | ||
191 | *d = MUL_256(c, *s) + MUL_256(alpha, *d); | ||
192 | d++; s++; l--; | ||
193 | }, | ||
194 | { /*A4OP */ | ||
195 | |||
196 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
197 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
198 | |||
199 | __m128i c0 = _mm_add_epi32(_mm_srli_epi32(d0, 24), ones); | ||
200 | __m128i a0 = sub4_alpha_sse3(s0); | ||
201 | |||
202 | d0 = _mm_add_epi32(mul_256_sse3(c0, s0), mul_256_sse3(a0, d0)); | ||
203 | |||
204 | _mm_store_si128((__m128i *)d, d0); | ||
205 | |||
206 | d += 4; s += 4; l -= 4; | ||
207 | }, | ||
208 | { /* A8OP */ | ||
209 | |||
210 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
211 | __m128i d0 = _mm_load_si128 ((__m128i *)d); | ||
212 | |||
213 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
214 | __m128i d1 = _mm_load_si128 ((__m128i *)(d+4)); | ||
215 | |||
216 | __m128i c0 = _mm_add_epi32(_mm_srli_epi32(d0, 24), ones); | ||
217 | __m128i c1 = _mm_add_epi32(_mm_srli_epi32(d1, 24), ones); | ||
218 | |||
219 | __m128i a0 = sub4_alpha_sse3(s0); | ||
220 | __m128i a1 = sub4_alpha_sse3(s1); | ||
221 | |||
222 | d0 = _mm_add_epi32(mul_256_sse3(c0, s0), mul_256_sse3(a0, d0)); | ||
223 | d1 = _mm_add_epi32(mul_256_sse3(c1, s1), mul_256_sse3(a1, d1)); | ||
224 | |||
225 | _mm_store_si128((__m128i *)d, d0); | ||
226 | _mm_store_si128((__m128i *)(d+4), d1); | ||
227 | |||
228 | d += 8; s += 8; l -= 8; | ||
229 | }) | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | _op_blend_rel_pan_dp_sse3(DATA32 *s, DATA8 *m __UNUSED__, DATA32 c, DATA32 *d, int l) { | ||
234 | |||
235 | const __m128i ones = _mm_set_epi32(1, 1, 1, 1); | ||
236 | |||
237 | LOOP_ALIGNED_U1_A48_SSE3(d, l, | ||
238 | { /* UOP */ | ||
239 | |||
240 | c = 1 + (*d >> 24); | ||
241 | *d++ = MUL_256(c, *s); | ||
242 | s++; l--; | ||
243 | }, | ||
244 | { /* A4OP */ | ||
245 | |||
246 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
247 | __m128i d0 = _mm_load_si128((__m128i *)d); | ||
248 | |||
249 | __m128i c0 = _mm_add_epi32(_mm_srli_epi32(d0, 24), ones); | ||
250 | d0 = mul_256_sse3(c0, s0); | ||
251 | |||
252 | _mm_store_si128((__m128i *)d, d0); | ||
253 | |||
254 | d += 4; s += 4; l -= 4; | ||
255 | }, | ||
256 | { /* A8OP */ | ||
257 | |||
258 | __m128i s0 = _mm_lddqu_si128((__m128i *)s); | ||
259 | __m128i d0 = _mm_load_si128 ((__m128i *)d); | ||
260 | |||
261 | __m128i s1 = _mm_lddqu_si128((__m128i *)(s+4)); | ||
262 | __m128i d1 = _mm_load_si128 ((__m128i *)(d+4)); | ||
263 | |||
264 | __m128i c0 = _mm_add_epi32(_mm_srli_epi32(d0, 24), ones); | ||
265 | __m128i c1 = _mm_add_epi32(_mm_srli_epi32(d1, 24), ones); | ||
266 | |||
267 | d0 = mul_256_sse3(c0, s0); | ||
268 | d1 = mul_256_sse3(c1, s1); | ||
269 | |||
270 | _mm_store_si128((__m128i *)d, d0); | ||
271 | _mm_store_si128((__m128i *)(d+4), d1); | ||
272 | |||
273 | d += 8; s += 8; l -= 8; | ||
274 | }) | ||
275 | } | ||
276 | |||
277 | #define _op_blend_rel_pas_dp_sse3 _op_blend_rel_p_dp_sse3 | ||
278 | |||
279 | #define _op_blend_rel_p_dpan_sse3 _op_blend_p_dpan_sse3 | ||
280 | #define _op_blend_rel_pan_dpan_sse3 _op_blend_pan_dpan_sse3 | ||
281 | #define _op_blend_rel_pas_dpan_sse3 _op_blend_pas_dpan_sse3 | ||
282 | |||
283 | static void | ||
284 | init_blend_rel_pixel_span_funcs_sse3(void) | ||
285 | { | ||
286 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_rel_p_dp_sse3; | ||
287 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_rel_pas_dp_sse3; | ||
288 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_rel_pan_dp_sse3; | ||
289 | |||
290 | op_blend_rel_span_funcs[SP][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_p_dpan_sse3; | ||
291 | op_blend_rel_span_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pas_dpan_sse3; | ||
292 | op_blend_rel_span_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pan_dpan_sse3; | ||
293 | } | ||
294 | |||
295 | #define _op_blend_rel_pt_p_dp_sse3 NULL | ||
296 | #define _op_blend_rel_pt_pan_dp_sse3 NULL | ||
297 | |||
298 | #define _op_blend_rel_pt_pas_dp_sse3 _op_blend_rel_pt_p_dp_sse3 | ||
299 | |||
300 | #define _op_blend_rel_pt_p_dpan_sse3 _op_blend_pt_p_dpan_sse3 | ||
301 | #define _op_blend_rel_pt_pan_dpan_sse3 _op_blend_pt_pan_dpan_sse3 | ||
302 | #define _op_blend_rel_pt_pas_dpan_sse3 _op_blend_pt_pas_dpan_sse3 | ||
303 | |||
304 | static void | ||
305 | init_blend_rel_pixel_pt_funcs_sse3(void) | ||
306 | { | ||
307 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_p_dp_sse3; | ||
308 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pas_dp_sse3; | ||
309 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP][CPU_SSE3] = _op_blend_rel_pt_pan_dp_sse3; | ||
310 | |||
311 | op_blend_rel_pt_funcs[SP][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_p_dpan_sse3; | ||
312 | op_blend_rel_pt_funcs[SP_AS][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pas_dpan_sse3; | ||
313 | op_blend_rel_pt_funcs[SP_AN][SM_N][SC_N][DP_AN][CPU_SSE3] = _op_blend_rel_pt_pan_dpan_sse3; | ||
314 | } | ||
315 | |||
316 | #endif | ||
diff --git a/libraries/evas/src/lib/engines/common/evas_op_blend_main_.c b/libraries/evas/src/lib/engines/common/evas_op_blend_main_.c new file mode 100644 index 0000000..a1a5de8 --- /dev/null +++ b/libraries/evas/src/lib/engines/common/evas_op_blend_main_.c | |||
@@ -0,0 +1,671 @@ | |||
1 | #include "evas_common.h" | ||
2 | |||
3 | RGBA_Gfx_Func op_blend_span_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
4 | RGBA_Gfx_Pt_Func op_blend_pt_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
5 | |||
6 | static void op_blend_init(void); | ||
7 | static void op_blend_shutdown(void); | ||
8 | |||
9 | static RGBA_Gfx_Func op_blend_pixel_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels); | ||
10 | static RGBA_Gfx_Func op_blend_color_span_get(DATA32 col, RGBA_Image *dst, int pixels); | ||
11 | static RGBA_Gfx_Func op_blend_pixel_color_span_get(RGBA_Image *src, DATA32 col, RGBA_Image *dst, int pixels); | ||
12 | static RGBA_Gfx_Func op_blend_mask_color_span_get(DATA32 col, RGBA_Image *dst, int pixels); | ||
13 | static RGBA_Gfx_Func op_blend_pixel_mask_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels); | ||
14 | |||
15 | static RGBA_Gfx_Pt_Func op_blend_pixel_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst); | ||
16 | static RGBA_Gfx_Pt_Func op_blend_color_pt_get(DATA32 col, RGBA_Image *dst); | ||
17 | static RGBA_Gfx_Pt_Func op_blend_pixel_color_pt_get(Image_Entry_Flags src_flags, DATA32 col, RGBA_Image *dst); | ||
18 | static RGBA_Gfx_Pt_Func op_blend_mask_color_pt_get(DATA32 col, RGBA_Image *dst); | ||
19 | static RGBA_Gfx_Pt_Func op_blend_pixel_mask_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst); | ||
20 | |||
21 | static RGBA_Gfx_Compositor _composite_blend = { "blend", | ||
22 | op_blend_init, op_blend_shutdown, | ||
23 | op_blend_pixel_span_get, op_blend_color_span_get, | ||
24 | op_blend_pixel_color_span_get, op_blend_mask_color_span_get, | ||
25 | op_blend_pixel_mask_span_get, | ||
26 | op_blend_pixel_pt_get, op_blend_color_pt_get, | ||
27 | op_blend_pixel_color_pt_get, op_blend_mask_color_pt_get, | ||
28 | op_blend_pixel_mask_pt_get | ||
29 | }; | ||
30 | |||
31 | RGBA_Gfx_Compositor * | ||
32 | evas_common_gfx_compositor_blend_get(void) | ||
33 | { | ||
34 | return &(_composite_blend); | ||
35 | } | ||
36 | |||
37 | |||
38 | RGBA_Gfx_Func op_blend_rel_span_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
39 | RGBA_Gfx_Pt_Func op_blend_rel_pt_funcs[SP_LAST][SM_LAST][SC_LAST][DP_LAST][CPU_LAST]; | ||
40 | |||
41 | static void op_blend_rel_init(void); | ||
42 | static void op_blend_rel_shutdown(void); | ||
43 | |||
44 | static RGBA_Gfx_Func op_blend_rel_pixel_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels); | ||
45 | static RGBA_Gfx_Func op_blend_rel_color_span_get(DATA32 col, RGBA_Image *dst, int pixels); | ||
46 | static RGBA_Gfx_Func op_blend_rel_pixel_color_span_get(RGBA_Image *src, DATA32 col, RGBA_Image *dst, int pixels); | ||
47 | static RGBA_Gfx_Func op_blend_rel_mask_color_span_get(DATA32 col, RGBA_Image *dst, int pixels); | ||
48 | static RGBA_Gfx_Func op_blend_rel_pixel_mask_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels); | ||
49 | |||
50 | static RGBA_Gfx_Pt_Func op_blend_rel_pixel_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst); | ||
51 | static RGBA_Gfx_Pt_Func op_blend_rel_color_pt_get(DATA32 col, RGBA_Image *dst); | ||
52 | static RGBA_Gfx_Pt_Func op_blend_rel_pixel_color_pt_get(Image_Entry_Flags src_flags, DATA32 col, RGBA_Image *dst); | ||
53 | static RGBA_Gfx_Pt_Func op_blend_rel_mask_color_pt_get(DATA32 col, RGBA_Image *dst); | ||
54 | static RGBA_Gfx_Pt_Func op_blend_rel_pixel_mask_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst); | ||
55 | |||
56 | static RGBA_Gfx_Compositor _composite_blend_rel = { "blend_rel", | ||
57 | op_blend_rel_init, op_blend_rel_shutdown, | ||
58 | op_blend_rel_pixel_span_get, op_blend_rel_color_span_get, | ||
59 | op_blend_rel_pixel_color_span_get, op_blend_rel_mask_color_span_get, | ||
60 | op_blend_rel_pixel_mask_span_get, | ||
61 | op_blend_rel_pixel_pt_get, op_blend_rel_color_pt_get, | ||
62 | op_blend_rel_pixel_color_pt_get, op_blend_rel_mask_color_pt_get, | ||
63 | op_blend_rel_pixel_mask_pt_get | ||
64 | }; | ||
65 | |||
66 | RGBA_Gfx_Compositor * | ||
67 | evas_common_gfx_compositor_blend_rel_get(void) | ||
68 | { | ||
69 | return &(_composite_blend_rel); | ||
70 | } | ||
71 | |||
72 | |||
73 | # include "./evas_op_blend/op_blend_pixel_.c" | ||
74 | # include "./evas_op_blend/op_blend_color_.c" | ||
75 | # include "./evas_op_blend/op_blend_pixel_color_.c" | ||
76 | # include "./evas_op_blend/op_blend_pixel_mask_.c" | ||
77 | # include "./evas_op_blend/op_blend_mask_color_.c" | ||
78 | //# include "./evas_op_blend/op_blend_pixel_mask_color_.c" | ||
79 | |||
80 | # include "./evas_op_blend/op_blend_pixel_i386.c" | ||
81 | # include "./evas_op_blend/op_blend_color_i386.c" | ||
82 | # include "./evas_op_blend/op_blend_pixel_color_i386.c" | ||
83 | # include "./evas_op_blend/op_blend_pixel_mask_i386.c" | ||
84 | # include "./evas_op_blend/op_blend_mask_color_i386.c" | ||
85 | //# include "./evas_op_blend/op_blend_pixel_mask_color_i386.c" | ||
86 | |||
87 | # include "./evas_op_blend/op_blend_pixel_neon.c" | ||
88 | # include "./evas_op_blend/op_blend_color_neon.c" | ||
89 | # include "./evas_op_blend/op_blend_pixel_color_neon.c" | ||
90 | # include "./evas_op_blend/op_blend_pixel_mask_neon.c" | ||
91 | # include "./evas_op_blend/op_blend_mask_color_neon.c" | ||
92 | //# include "./evas_op_blend/op_blend_pixel_mask_color_neon.c" | ||
93 | |||
94 | #ifdef BUILD_SSE3 | ||
95 | void evas_common_op_blend_init_sse3(void); | ||
96 | #endif | ||
97 | |||
98 | static void | ||
99 | op_blend_init(void) | ||
100 | { | ||
101 | memset(op_blend_span_funcs, 0, sizeof(op_blend_span_funcs)); | ||
102 | memset(op_blend_pt_funcs, 0, sizeof(op_blend_pt_funcs)); | ||
103 | #ifdef BUILD_SSE3 | ||
104 | evas_common_op_blend_init_sse3(); | ||
105 | #endif | ||
106 | #ifdef BUILD_MMX | ||
107 | init_blend_pixel_span_funcs_mmx(); | ||
108 | init_blend_pixel_color_span_funcs_mmx(); | ||
109 | init_blend_pixel_mask_span_funcs_mmx(); | ||
110 | init_blend_color_span_funcs_mmx(); | ||
111 | init_blend_mask_color_span_funcs_mmx(); | ||
112 | |||
113 | init_blend_pixel_pt_funcs_mmx(); | ||
114 | init_blend_pixel_color_pt_funcs_mmx(); | ||
115 | init_blend_pixel_mask_pt_funcs_mmx(); | ||
116 | init_blend_color_pt_funcs_mmx(); | ||
117 | init_blend_mask_color_pt_funcs_mmx(); | ||
118 | #endif | ||
119 | #ifdef BUILD_NEON | ||
120 | init_blend_pixel_span_funcs_neon(); | ||
121 | init_blend_pixel_color_span_funcs_neon(); | ||
122 | init_blend_pixel_mask_span_funcs_neon(); | ||
123 | init_blend_color_span_funcs_neon(); | ||
124 | init_blend_mask_color_span_funcs_neon(); | ||
125 | |||
126 | init_blend_pixel_pt_funcs_neon(); | ||
127 | init_blend_pixel_color_pt_funcs_neon(); | ||
128 | init_blend_pixel_mask_pt_funcs_neon(); | ||
129 | init_blend_color_pt_funcs_neon(); | ||
130 | init_blend_mask_color_pt_funcs_neon(); | ||
131 | #endif | ||
132 | #ifdef BUILD_C | ||
133 | init_blend_pixel_span_funcs_c(); | ||
134 | init_blend_pixel_color_span_funcs_c(); | ||
135 | init_blend_pixel_mask_span_funcs_c(); | ||
136 | init_blend_color_span_funcs_c(); | ||
137 | init_blend_mask_color_span_funcs_c(); | ||
138 | |||
139 | init_blend_pixel_pt_funcs_c(); | ||
140 | init_blend_pixel_color_pt_funcs_c(); | ||
141 | init_blend_pixel_mask_pt_funcs_c(); | ||
142 | init_blend_color_pt_funcs_c(); | ||
143 | init_blend_mask_color_pt_funcs_c(); | ||
144 | #endif | ||
145 | } | ||
146 | |||
147 | static void | ||
148 | op_blend_shutdown(void) | ||
149 | { | ||
150 | } | ||
151 | |||
152 | static RGBA_Gfx_Func | ||
153 | blend_gfx_span_func_cpu(int s, int m, int c, int d) | ||
154 | { | ||
155 | RGBA_Gfx_Func func = NULL; | ||
156 | int cpu = CPU_N; | ||
157 | #ifdef BUILD_SSE3 | ||
158 | if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) | ||
159 | { | ||
160 | cpu = CPU_SSE3; | ||
161 | func = op_blend_span_funcs[s][m][c][d][cpu]; | ||
162 | if(func) return func; | ||
163 | } | ||
164 | #endif | ||
165 | #ifdef BUILD_MMX | ||
166 | if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) | ||
167 | { | ||
168 | cpu = CPU_MMX; | ||
169 | func = op_blend_span_funcs[s][m][c][d][cpu]; | ||
170 | if (func) return func; | ||
171 | } | ||
172 | #endif | ||
173 | #ifdef BUILD_NEON | ||
174 | if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) | ||
175 | { | ||
176 | cpu = CPU_NEON; | ||
177 | func = op_blend_span_funcs[s][m][c][d][cpu]; | ||
178 | if (func) return func; | ||
179 | } | ||
180 | #endif | ||
181 | #ifdef BUILD_C | ||
182 | cpu = CPU_C; | ||
183 | func = op_blend_span_funcs[s][m][c][d][cpu]; | ||
184 | if (func) return func; | ||
185 | #endif | ||
186 | return func; | ||
187 | } | ||
188 | |||
189 | static RGBA_Gfx_Func | ||
190 | op_blend_pixel_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels __UNUSED__) | ||
191 | { | ||
192 | int s = SP_AN, m = SM_N, c = SC_N, d = DP_AN; | ||
193 | |||
194 | if (src && src->cache_entry.flags.alpha) | ||
195 | { | ||
196 | s = SP; | ||
197 | if (src->cache_entry.flags.alpha_sparse) | ||
198 | s = SP_AS; | ||
199 | } | ||
200 | if (dst && dst->cache_entry.flags.alpha) | ||
201 | d = DP; | ||
202 | return blend_gfx_span_func_cpu(s, m, c, d); | ||
203 | } | ||
204 | |||
205 | static RGBA_Gfx_Func | ||
206 | op_blend_color_span_get(DATA32 col, RGBA_Image *dst, int pixels __UNUSED__) | ||
207 | { | ||
208 | int s = SP_N, m = SM_N, c = SC_AN, d = DP_AN; | ||
209 | |||
210 | if ((col >> 24) < 255) | ||
211 | c = SC; | ||
212 | if (col == ((col >> 24) * 0x01010101)) | ||
213 | c = SC_AA; | ||
214 | if (col == 0xffffffff) | ||
215 | c = SC_N; | ||
216 | if (dst && dst->cache_entry.flags.alpha) | ||
217 | d = DP; | ||
218 | return blend_gfx_span_func_cpu(s, m, c, d); | ||
219 | } | ||
220 | |||
221 | static RGBA_Gfx_Func | ||
222 | op_blend_pixel_color_span_get(RGBA_Image *src, DATA32 col, RGBA_Image *dst, int pixels __UNUSED__) | ||
223 | { | ||
224 | int s = SP_AN, m = SM_N, c = SC_AN, d = DP_AN; | ||
225 | |||
226 | if (src && src->cache_entry.flags.alpha) | ||
227 | { | ||
228 | s = SP; | ||
229 | if (src->cache_entry.flags.alpha_sparse) | ||
230 | s = SP_AS; | ||
231 | } | ||
232 | if ((col >> 24) < 255) | ||
233 | c = SC; | ||
234 | if (col == ((col >> 24) * 0x01010101)) | ||
235 | c = SC_AA; | ||
236 | if (col == 0xffffffff) | ||
237 | c = SC_N; | ||
238 | if (dst && dst->cache_entry.flags.alpha) | ||
239 | d = DP; | ||
240 | return blend_gfx_span_func_cpu(s, m, c, d); | ||
241 | } | ||
242 | |||
243 | static RGBA_Gfx_Func | ||
244 | op_blend_mask_color_span_get(DATA32 col, RGBA_Image *dst, int pixels __UNUSED__) | ||
245 | { | ||
246 | int s = SP_N, m = SM_AS, c = SC_AN, d = DP_AN; | ||
247 | |||
248 | if ((col >> 24) < 255) | ||
249 | c = SC; | ||
250 | if (col == ((col >> 24) * 0x01010101)) | ||
251 | c = SC_AA; | ||
252 | if (col == 0xffffffff) | ||
253 | c = SC_N; | ||
254 | if (dst && dst->cache_entry.flags.alpha) | ||
255 | d = DP; | ||
256 | return blend_gfx_span_func_cpu(s, m, c, d); | ||
257 | } | ||
258 | |||
259 | static RGBA_Gfx_Func | ||
260 | op_blend_pixel_mask_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels __UNUSED__) | ||
261 | { | ||
262 | int s = SP_AN, m = SM_AS, c = SC_N, d = DP_AN; | ||
263 | |||
264 | if (src && src->cache_entry.flags.alpha) | ||
265 | { | ||
266 | s = SP; | ||
267 | if (src->cache_entry.flags.alpha_sparse) | ||
268 | s = SP_AS; | ||
269 | } | ||
270 | if (dst && dst->cache_entry.flags.alpha) | ||
271 | d = DP; | ||
272 | return blend_gfx_span_func_cpu(s, m, c, d); | ||
273 | } | ||
274 | |||
275 | |||
276 | static RGBA_Gfx_Pt_Func | ||
277 | blend_gfx_pt_func_cpu(int s, int m, int c, int d) | ||
278 | { | ||
279 | RGBA_Gfx_Pt_Func func = NULL; | ||
280 | int cpu = CPU_N; | ||
281 | #ifdef BUILD_SSE3 | ||
282 | if(evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) | ||
283 | { | ||
284 | cpu = CPU_SSE3; | ||
285 | func = op_blend_pt_funcs[s][m][c][d][cpu]; | ||
286 | if(func) return func; | ||
287 | } | ||
288 | #endif | ||
289 | #ifdef BUILD_MMX | ||
290 | if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) | ||
291 | { | ||
292 | cpu = CPU_MMX; | ||
293 | func = op_blend_pt_funcs[s][m][c][d][cpu]; | ||
294 | if (func) return func; | ||
295 | } | ||
296 | #endif | ||
297 | #ifdef BUILD_NEON | ||
298 | if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) | ||
299 | { | ||
300 | cpu = CPU_NEON; | ||
301 | func = op_blend_pt_funcs[s][m][c][d][cpu]; | ||
302 | if (func) return func; | ||
303 | } | ||
304 | #endif | ||
305 | #ifdef BUILD_C | ||
306 | cpu = CPU_C; | ||
307 | func = op_blend_pt_funcs[s][m][c][d][cpu]; | ||
308 | if (func) return func; | ||
309 | #endif | ||
310 | return func; | ||
311 | } | ||
312 | |||
313 | static RGBA_Gfx_Pt_Func | ||
314 | op_blend_pixel_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst) | ||
315 | { | ||
316 | int s = SP_AN, m = SM_N, c = SC_N, d = DP_AN; | ||
317 | |||
318 | if (src_flags.alpha) | ||
319 | s = SP; | ||
320 | if (dst && dst->cache_entry.flags.alpha) | ||
321 | d = DP; | ||
322 | return blend_gfx_pt_func_cpu(s, m, c, d); | ||
323 | } | ||
324 | |||
325 | static RGBA_Gfx_Pt_Func | ||
326 | op_blend_color_pt_get(DATA32 col, RGBA_Image *dst) | ||
327 | { | ||
328 | int s = SP_N, m = SM_N, c = SC_AN, d = DP_AN; | ||
329 | |||
330 | if ((col >> 24) < 255) | ||
331 | c = SC; | ||
332 | if (col == ((col >> 24) * 0x01010101)) | ||
333 | c = SC_AA; | ||
334 | if (col == 0xffffffff) | ||
335 | c = SC_N; | ||
336 | if (dst && dst->cache_entry.flags.alpha) | ||
337 | d = DP; | ||
338 | return blend_gfx_pt_func_cpu(s, m, c, d); | ||
339 | } | ||
340 | |||
341 | static RGBA_Gfx_Pt_Func | ||
342 | op_blend_pixel_color_pt_get(Image_Entry_Flags src_flags, DATA32 col, RGBA_Image *dst) | ||
343 | { | ||
344 | int s = SP_AN, m = SM_N, c = SC_AN, d = DP_AN; | ||
345 | |||
346 | if (src_flags.alpha) | ||
347 | s = SP; | ||
348 | if ((col >> 24) < 255) | ||
349 | c = SC; | ||
350 | if (col == ((col >> 24) * 0x01010101)) | ||
351 | c = SC_AA; | ||
352 | if (col == 0xffffffff) | ||
353 | c = SC_N; | ||
354 | if (dst && dst->cache_entry.flags.alpha) | ||
355 | d = DP; | ||
356 | return blend_gfx_pt_func_cpu(s, m, c, d); | ||
357 | } | ||
358 | |||
359 | static RGBA_Gfx_Pt_Func | ||
360 | op_blend_mask_color_pt_get(DATA32 col, RGBA_Image *dst) | ||
361 | { | ||
362 | int s = SP_N, m = SM_AS, c = SC_AN, d = DP_AN; | ||
363 | |||
364 | if ((col >> 24) < 255) | ||
365 | c = SC; | ||
366 | if (col == ((col >> 24) * 0x01010101)) | ||
367 | c = SC_AA; | ||
368 | if (col == 0xffffffff) | ||
369 | c = SC_N; | ||
370 | if (dst && dst->cache_entry.flags.alpha) | ||
371 | d = DP; | ||
372 | return blend_gfx_pt_func_cpu(s, m, c, d); | ||
373 | } | ||
374 | |||
375 | static RGBA_Gfx_Pt_Func | ||
376 | op_blend_pixel_mask_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst) | ||
377 | { | ||
378 | int s = SP_AN, m = SM_AS, c = SC_N, d = DP_AN; | ||
379 | |||
380 | if (src_flags.alpha) | ||
381 | s = SP; | ||
382 | if (dst && dst->cache_entry.flags.alpha) | ||
383 | d = DP; | ||
384 | return blend_gfx_pt_func_cpu(s, m, c, d); | ||
385 | } | ||
386 | |||
387 | void evas_common_op_blend_rel_init_sse3(void); | ||
388 | |||
389 | static void | ||
390 | op_blend_rel_init(void) | ||
391 | { | ||
392 | memset(op_blend_rel_span_funcs, 0, sizeof(op_blend_rel_span_funcs)); | ||
393 | memset(op_blend_rel_pt_funcs, 0, sizeof(op_blend_rel_pt_funcs)); | ||
394 | #ifdef BUILD_SSE3 | ||
395 | evas_common_op_blend_rel_init_sse3(); | ||
396 | #endif | ||
397 | #ifdef BUILD_MMX | ||
398 | init_blend_rel_pixel_span_funcs_mmx(); | ||
399 | init_blend_rel_pixel_color_span_funcs_mmx(); | ||
400 | init_blend_rel_pixel_mask_span_funcs_mmx(); | ||
401 | init_blend_rel_color_span_funcs_mmx(); | ||
402 | init_blend_rel_mask_color_span_funcs_mmx(); | ||
403 | |||
404 | init_blend_rel_pixel_pt_funcs_mmx(); | ||
405 | init_blend_rel_pixel_color_pt_funcs_mmx(); | ||
406 | init_blend_rel_pixel_mask_pt_funcs_mmx(); | ||
407 | init_blend_rel_color_pt_funcs_mmx(); | ||
408 | init_blend_rel_mask_color_pt_funcs_mmx(); | ||
409 | #endif | ||
410 | #ifdef BUILD_NEON | ||
411 | init_blend_rel_pixel_span_funcs_neon(); | ||
412 | init_blend_rel_pixel_color_span_funcs_neon(); | ||
413 | init_blend_rel_pixel_mask_span_funcs_neon(); | ||
414 | init_blend_rel_color_span_funcs_neon(); | ||
415 | init_blend_rel_mask_color_span_funcs_neon(); | ||
416 | |||
417 | init_blend_rel_pixel_pt_funcs_neon(); | ||
418 | init_blend_rel_pixel_color_pt_funcs_neon(); | ||
419 | init_blend_rel_pixel_mask_pt_funcs_neon(); | ||
420 | init_blend_rel_color_pt_funcs_neon(); | ||
421 | init_blend_rel_mask_color_pt_funcs_neon(); | ||
422 | #endif | ||
423 | #ifdef BUILD_C | ||
424 | init_blend_rel_pixel_span_funcs_c(); | ||
425 | init_blend_rel_pixel_color_span_funcs_c(); | ||
426 | init_blend_rel_pixel_mask_span_funcs_c(); | ||
427 | init_blend_rel_color_span_funcs_c(); | ||
428 | init_blend_rel_mask_color_span_funcs_c(); | ||
429 | |||
430 | init_blend_rel_pixel_pt_funcs_c(); | ||
431 | init_blend_rel_pixel_color_pt_funcs_c(); | ||
432 | init_blend_rel_pixel_mask_pt_funcs_c(); | ||
433 | init_blend_rel_color_pt_funcs_c(); | ||
434 | init_blend_rel_mask_color_pt_funcs_c(); | ||
435 | #endif | ||
436 | } | ||
437 | |||
438 | static void | ||
439 | op_blend_rel_shutdown(void) | ||
440 | { | ||
441 | } | ||
442 | |||
443 | static RGBA_Gfx_Func | ||
444 | blend_rel_gfx_span_func_cpu(int s, int m, int c, int d) | ||
445 | { | ||
446 | RGBA_Gfx_Func func = NULL; | ||
447 | int cpu = CPU_N; | ||
448 | #ifdef BUILD_SSE3 | ||
449 | if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) | ||
450 | { | ||
451 | cpu = CPU_SSE3; | ||
452 | func = op_blend_rel_span_funcs[s][m][c][d][cpu]; | ||
453 | if(func) return func; | ||
454 | } | ||
455 | #endif | ||
456 | #ifdef BUILD_MMX | ||
457 | if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) | ||
458 | { | ||
459 | cpu = CPU_MMX; | ||
460 | func = op_blend_rel_span_funcs[s][m][c][d][cpu]; | ||
461 | if (func) return func; | ||
462 | } | ||
463 | #endif | ||
464 | #ifdef BUILD_NEON | ||
465 | if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) | ||
466 | { | ||
467 | cpu = CPU_NEON; | ||
468 | func = op_blend_rel_span_funcs[s][m][c][d][cpu]; | ||
469 | if (func) return func; | ||
470 | } | ||
471 | #endif | ||
472 | #ifdef BUILD_C | ||
473 | cpu = CPU_C; | ||
474 | func = op_blend_rel_span_funcs[s][m][c][d][cpu]; | ||
475 | if (func) return func; | ||
476 | #endif | ||
477 | return func; | ||
478 | } | ||
479 | |||
480 | static RGBA_Gfx_Func | ||
481 | op_blend_rel_pixel_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels __UNUSED__) | ||
482 | { | ||
483 | int s = SP_AN, m = SM_N, c = SC_N, d = DP_AN; | ||
484 | |||
485 | if (src && src->cache_entry.flags.alpha) | ||
486 | { | ||
487 | s = SP; | ||
488 | if (src->cache_entry.flags.alpha_sparse) | ||
489 | s = SP_AS; | ||
490 | } | ||
491 | if (dst && dst->cache_entry.flags.alpha) | ||
492 | d = DP; | ||
493 | return blend_rel_gfx_span_func_cpu(s, m, c, d); | ||
494 | } | ||
495 | |||
496 | static RGBA_Gfx_Func | ||
497 | op_blend_rel_color_span_get(DATA32 col, RGBA_Image *dst, int pixels __UNUSED__) | ||
498 | { | ||
499 | int s = SP_N, m = SM_N, c = SC_AN, d = DP_AN; | ||
500 | |||
501 | if ((col >> 24) < 255) | ||
502 | c = SC; | ||
503 | if (col == ((col >> 24) * 0x01010101)) | ||
504 | c = SC_AA; | ||
505 | if (col == 0xffffffff) | ||
506 | c = SC_N; | ||
507 | if (dst && dst->cache_entry.flags.alpha) | ||
508 | d = DP; | ||
509 | return blend_rel_gfx_span_func_cpu(s, m, c, d); | ||
510 | } | ||
511 | |||
512 | static RGBA_Gfx_Func | ||
513 | op_blend_rel_pixel_color_span_get(RGBA_Image *src, DATA32 col, RGBA_Image *dst, int pixels __UNUSED__) | ||
514 | { | ||
515 | int s = SP_AN, m = SM_N, c = SC_AN, d = DP_AN; | ||
516 | |||
517 | if (src && src->cache_entry.flags.alpha) | ||
518 | s = SP; | ||
519 | if ((col >> 24) < 255) | ||
520 | c = SC; | ||
521 | if (col == ((col >> 24) * 0x01010101)) | ||
522 | c = SC_AA; | ||
523 | if (col == 0xffffffff) | ||
524 | c = SC_N; | ||
525 | if (dst && dst->cache_entry.flags.alpha) | ||
526 | d = DP; | ||
527 | return blend_rel_gfx_span_func_cpu(s, m, c, d); | ||
528 | } | ||
529 | |||
530 | static RGBA_Gfx_Func | ||
531 | op_blend_rel_mask_color_span_get(DATA32 col, RGBA_Image *dst, int pixels __UNUSED__) | ||
532 | { | ||
533 | int s = SP_N, m = SM_AS, c = SC_AN, d = DP_AN; | ||
534 | |||
535 | if ((col >> 24) < 255) | ||
536 | c = SC; | ||
537 | if (col == ((col >> 24) * 0x01010101)) | ||
538 | c = SC_AA; | ||
539 | if (col == 0xffffffff) | ||
540 | c = SC_N; | ||
541 | if (dst && dst->cache_entry.flags.alpha) | ||
542 | d = DP; | ||
543 | return blend_rel_gfx_span_func_cpu(s, m, c, d); | ||
544 | } | ||
545 | |||
546 | static RGBA_Gfx_Func | ||
547 | op_blend_rel_pixel_mask_span_get(RGBA_Image *src, RGBA_Image *dst, int pixels __UNUSED__) | ||
548 | { | ||
549 | int s = SP_AN, m = SM_AS, c = SC_N, d = DP_AN; | ||
550 | |||
551 | if (src && src->cache_entry.flags.alpha) | ||
552 | { | ||
553 | s = SP; | ||
554 | if (src->cache_entry.flags.alpha_sparse) | ||
555 | s = SP_AS; | ||
556 | } | ||
557 | if (dst && dst->cache_entry.flags.alpha) | ||
558 | d = DP; | ||
559 | return blend_rel_gfx_span_func_cpu(s, m, c, d); | ||
560 | } | ||
561 | |||
562 | static RGBA_Gfx_Pt_Func | ||
563 | blend_rel_gfx_pt_func_cpu(int s, int m, int c, int d) | ||
564 | { | ||
565 | RGBA_Gfx_Pt_Func func = NULL; | ||
566 | int cpu = CPU_N; | ||
567 | #ifdef BUILD_SSE3 | ||
568 | if (evas_common_cpu_has_feature(CPU_FEATURE_SSE3)) | ||
569 | { | ||
570 | cpu = CPU_SSE3; | ||
571 | func = op_blend_rel_pt_funcs[s][m][c][d][cpu]; | ||
572 | if(func) return func; | ||
573 | } | ||
574 | #endif | ||
575 | #ifdef BUILD_MMX | ||
576 | if (evas_common_cpu_has_feature(CPU_FEATURE_MMX)) | ||
577 | { | ||
578 | cpu = CPU_MMX; | ||
579 | func = op_blend_rel_pt_funcs[s][m][c][d][cpu]; | ||
580 | if (func) return func; | ||
581 | } | ||
582 | #endif | ||
583 | #ifdef BUILD_NEON | ||
584 | if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) | ||
585 | { | ||
586 | cpu = CPU_NEON; | ||
587 | func = op_blend_rel_pt_funcs[s][m][c][d][cpu]; | ||
588 | if (func) return func; | ||
589 | } | ||
590 | #endif | ||
591 | #ifdef BUILD_C | ||
592 | cpu = CPU_C; | ||
593 | func = op_blend_rel_pt_funcs[s][m][c][d][cpu]; | ||
594 | if (func) return func; | ||
595 | #endif | ||
596 | return func; | ||
597 | } | ||
598 | |||
599 | static RGBA_Gfx_Pt_Func | ||
600 | op_blend_rel_pixel_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst) | ||
601 | { | ||
602 | int s = SP_AN, m = SM_N, c = SC_N, d = DP_AN; | ||
603 | |||
604 | if (src_flags.alpha) | ||
605 | s = SP; | ||
606 | if (dst && dst->cache_entry.flags.alpha) | ||
607 | d = DP; | ||
608 | return blend_rel_gfx_pt_func_cpu(s, m, c, d); | ||
609 | } | ||
610 | |||
611 | static RGBA_Gfx_Pt_Func | ||
612 | op_blend_rel_color_pt_get(DATA32 col, RGBA_Image *dst) | ||
613 | { | ||
614 | int s = SP_N, m = SM_N, c = SC_AN, d = DP_AN; | ||
615 | |||
616 | if ((col >> 24) < 255) | ||
617 | c = SC; | ||
618 | if (col == ((col >> 24) * 0x01010101)) | ||
619 | c = SC_AA; | ||
620 | if (col == 0xffffffff) | ||
621 | c = SC_N; | ||
622 | if (dst && dst->cache_entry.flags.alpha) | ||
623 | d = DP; | ||
624 | return blend_rel_gfx_pt_func_cpu(s, m, c, d); | ||
625 | } | ||
626 | |||
627 | static RGBA_Gfx_Pt_Func | ||
628 | op_blend_rel_pixel_color_pt_get(Image_Entry_Flags src_flags, DATA32 col, RGBA_Image *dst) | ||
629 | { | ||
630 | int s = SP_AN, m = SM_N, c = SC_AN, d = DP_AN; | ||
631 | |||
632 | if (src_flags.alpha) | ||
633 | s = SP; | ||
634 | if ((col >> 24) < 255) | ||
635 | c = SC; | ||
636 | if (col == ((col >> 24) * 0x01010101)) | ||
637 | c = SC_AA; | ||
638 | if (col == 0xffffffff) | ||
639 | c = SC_N; | ||
640 | if (dst && dst->cache_entry.flags.alpha) | ||
641 | d = DP; | ||
642 | return blend_rel_gfx_pt_func_cpu(s, m, c, d); | ||
643 | } | ||
644 | |||
645 | static RGBA_Gfx_Pt_Func | ||
646 | op_blend_rel_mask_color_pt_get(DATA32 col, RGBA_Image *dst) | ||
647 | { | ||
648 | int s = SP_N, m = SM_AS, c = SC_AN, d = DP_AN; | ||
649 | |||
650 | if ((col >> 24) < 255) | ||
651 | c = SC; | ||
652 | if (col == ((col >> 24) * 0x01010101)) | ||
653 | c = SC_AA; | ||
654 | if (col == 0xffffffff) | ||
655 | c = SC_N; | ||
656 | if (dst && dst->cache_entry.flags.alpha) | ||
657 | d = DP; | ||
658 | return blend_rel_gfx_pt_func_cpu(s, m, c, d); | ||
659 | } | ||
660 | |||
661 | static RGBA_Gfx_Pt_Func | ||
662 | op_blend_rel_pixel_mask_pt_get(Image_Entry_Flags src_flags, RGBA_Image *dst) | ||
663 | { | ||
664 | int s = SP_AN, m = SM_AS, c = SC_N, d = DP_AN; | ||
665 | |||
666 | if (src_flags.alpha) | ||
667 | s = SP; | ||
668 | if (dst && dst->cache_entry.flags.alpha) | ||
669 | d = DP; | ||
670 | return blend_rel_gfx_pt_func_cpu(s, m, c, d); | ||
671 | } | ||