diff options
author | David Walter Seikel | 2012-01-23 23:36:30 +1000 |
---|---|---|
committer | David Walter Seikel | 2012-01-23 23:36:30 +1000 |
commit | 6523585c66c04cea54df50013df8886b589847d8 (patch) | |
tree | 0b22aee7064166d88595eda260ca2d17c0773da5 /libraries/luajit-2.0/src/lj_opt_split.c | |
parent | Update the EFL to what I'm actually using, coz I'm using some stuff not yet r... (diff) | |
download | SledjHamr-6523585c66c04cea54df50013df8886b589847d8.zip SledjHamr-6523585c66c04cea54df50013df8886b589847d8.tar.gz SledjHamr-6523585c66c04cea54df50013df8886b589847d8.tar.bz2 SledjHamr-6523585c66c04cea54df50013df8886b589847d8.tar.xz |
Add luaproc and LuaJIT libraries.
Two versions of LuaJIT, the stable release, and the dev version. Try the dev version first, until ih fails badly.
Diffstat (limited to '')
-rw-r--r-- | libraries/luajit-2.0/src/lj_opt_split.c | 723 |
1 files changed, 723 insertions, 0 deletions
diff --git a/libraries/luajit-2.0/src/lj_opt_split.c b/libraries/luajit-2.0/src/lj_opt_split.c new file mode 100644 index 0000000..913a7a0 --- /dev/null +++ b/libraries/luajit-2.0/src/lj_opt_split.c | |||
@@ -0,0 +1,723 @@ | |||
1 | /* | ||
2 | ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. | ||
3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_opt_split_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI)) | ||
12 | |||
13 | #include "lj_err.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_ir.h" | ||
16 | #include "lj_jit.h" | ||
17 | #include "lj_ircall.h" | ||
18 | #include "lj_iropt.h" | ||
19 | #include "lj_vm.h" | ||
20 | |||
21 | /* SPLIT pass: | ||
22 | ** | ||
23 | ** This pass splits up 64 bit IR instructions into multiple 32 bit IR | ||
24 | ** instructions. It's only active for soft-float targets or for 32 bit CPUs | ||
25 | ** which lack native 64 bit integer operations (the FFI is currently the | ||
26 | ** only emitter for 64 bit integer instructions). | ||
27 | ** | ||
28 | ** Splitting the IR in a separate pass keeps each 32 bit IR assembler | ||
29 | ** backend simple. Only a small amount of extra functionality needs to be | ||
30 | ** implemented. This is much easier than adding support for allocating | ||
31 | ** register pairs to each backend (believe me, I tried). A few simple, but | ||
32 | ** important optimizations can be performed by the SPLIT pass, which would | ||
33 | ** be tedious to do in the backend. | ||
34 | ** | ||
35 | ** The basic idea is to replace each 64 bit IR instruction with its 32 bit | ||
36 | ** equivalent plus an extra HIOP instruction. The splitted IR is not passed | ||
37 | ** through FOLD or any other optimizations, so each HIOP is guaranteed to | ||
38 | ** immediately follow it's counterpart. The actual functionality of HIOP is | ||
39 | ** inferred from the previous instruction. | ||
40 | ** | ||
41 | ** The operands of HIOP hold the hiword input references. The output of HIOP | ||
42 | ** is the hiword output reference, which is also used to hold the hiword | ||
43 | ** register or spill slot information. The register allocator treats this | ||
44 | ** instruction independently of any other instruction, which improves code | ||
45 | ** quality compared to using fixed register pairs. | ||
46 | ** | ||
47 | ** It's easier to split up some instructions into two regular 32 bit | ||
48 | ** instructions. E.g. XLOAD is split up into two XLOADs with two different | ||
49 | ** addresses. Obviously 64 bit constants need to be split up into two 32 bit | ||
50 | ** constants, too. Some hiword instructions can be entirely omitted, e.g. | ||
51 | ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls | ||
52 | ** are split up into two 32 bit arguments each. | ||
53 | ** | ||
54 | ** On soft-float targets, floating-point instructions are directly converted | ||
55 | ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). | ||
56 | ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). | ||
57 | ** | ||
58 | ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with | ||
59 | ** two int64_t fields: | ||
60 | ** | ||
61 | ** 0100 p32 ADD base +8 | ||
62 | ** 0101 i64 XLOAD 0100 | ||
63 | ** 0102 i64 ADD 0101 +1 | ||
64 | ** 0103 p32 ADD base +16 | ||
65 | ** 0104 i64 XSTORE 0103 0102 | ||
66 | ** | ||
67 | ** mov rax, [esi+0x8] | ||
68 | ** add rax, +0x01 | ||
69 | ** mov [esi+0x10], rax | ||
70 | ** | ||
71 | ** Here's the transformed IR and the x86 machine code after the SPLIT pass: | ||
72 | ** | ||
73 | ** 0100 p32 ADD base +8 | ||
74 | ** 0101 int XLOAD 0100 | ||
75 | ** 0102 p32 ADD base +12 | ||
76 | ** 0103 int XLOAD 0102 | ||
77 | ** 0104 int ADD 0101 +1 | ||
78 | ** 0105 int HIOP 0103 +0 | ||
79 | ** 0106 p32 ADD base +16 | ||
80 | ** 0107 int XSTORE 0106 0104 | ||
81 | ** 0108 p32 ADD base +20 | ||
82 | ** 0109 int XSTORE 0108 0105 | ||
83 | ** | ||
84 | ** mov eax, [esi+0x8] | ||
85 | ** mov ecx, [esi+0xc] | ||
86 | ** add eax, +0x01 | ||
87 | ** adc ecx, +0x00 | ||
88 | ** mov [esi+0x10], eax | ||
89 | ** mov [esi+0x14], ecx | ||
90 | ** | ||
91 | ** You may notice the reassociated hiword address computation, which is | ||
92 | ** later fused into the mov operands by the assembler. | ||
93 | */ | ||
94 | |||
95 | /* Some local macros to save typing. Undef'd at the end. */ | ||
96 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
97 | |||
98 | /* Directly emit the transformed IR without updating chains etc. */ | ||
99 | static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) | ||
100 | { | ||
101 | IRRef nref = lj_ir_nextins(J); | ||
102 | IRIns *ir = IR(nref); | ||
103 | ir->ot = ot; | ||
104 | ir->op1 = op1; | ||
105 | ir->op2 = op2; | ||
106 | return nref; | ||
107 | } | ||
108 | |||
109 | #if LJ_SOFTFP | ||
110 | /* Emit a (checked) number to integer conversion. */ | ||
111 | static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) | ||
112 | { | ||
113 | IRRef tmp, res; | ||
114 | #if LJ_LE | ||
115 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); | ||
116 | #else | ||
117 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); | ||
118 | #endif | ||
119 | res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); | ||
120 | if (check) { | ||
121 | tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); | ||
122 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | ||
123 | split_emit(J, IRTGI(IR_EQ), tmp, lo); | ||
124 | split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); | ||
125 | } | ||
126 | return res; | ||
127 | } | ||
128 | |||
129 | /* Emit a CALLN with one split 64 bit argument. */ | ||
130 | static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, | ||
131 | IRIns *ir, IRCallID id) | ||
132 | { | ||
133 | IRRef tmp, op1 = ir->op1; | ||
134 | J->cur.nins--; | ||
135 | #if LJ_LE | ||
136 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | ||
137 | #else | ||
138 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | ||
139 | #endif | ||
140 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | ||
141 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | ||
142 | } | ||
143 | |||
144 | /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ | ||
145 | static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, | ||
146 | IRIns *ir, IRCallID id) | ||
147 | { | ||
148 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; | ||
149 | J->cur.nins--; | ||
150 | #if LJ_LE | ||
151 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | ||
152 | #else | ||
153 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | ||
154 | #endif | ||
155 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
156 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | ||
157 | return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | ||
158 | } | ||
159 | #endif | ||
160 | |||
161 | /* Emit a CALLN with two split 64 bit arguments. */ | ||
162 | static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, | ||
163 | IRIns *ir, IRCallID id) | ||
164 | { | ||
165 | IRRef tmp, op1 = ir->op1, op2 = ir->op2; | ||
166 | J->cur.nins--; | ||
167 | #if LJ_LE | ||
168 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | ||
169 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
170 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | ||
171 | #else | ||
172 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | ||
173 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | ||
174 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | ||
175 | #endif | ||
176 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | ||
177 | return split_emit(J, | ||
178 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), | ||
179 | tmp, tmp); | ||
180 | } | ||
181 | |||
182 | /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ | ||
183 | static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) | ||
184 | { | ||
185 | IRRef nref = oir[ref].prev; | ||
186 | IRIns *ir = IR(nref); | ||
187 | int32_t ofs = 4; | ||
188 | if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { | ||
189 | /* Reassociate address. */ | ||
190 | ofs += IR(ir->op2)->i; | ||
191 | nref = ir->op1; | ||
192 | if (ofs == 0) return nref; | ||
193 | } | ||
194 | return split_emit(J, IRTI(IR_ADD), nref, lj_ir_kint(J, ofs)); | ||
195 | } | ||
196 | |||
197 | /* Transform the old IR to the new IR. */ | ||
198 | static void split_ir(jit_State *J) | ||
199 | { | ||
200 | IRRef nins = J->cur.nins, nk = J->cur.nk; | ||
201 | MSize irlen = nins - nk; | ||
202 | MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); | ||
203 | IRIns *oir = (IRIns *)lj_str_needbuf(J->L, &G(J->L)->tmpbuf, need); | ||
204 | IRRef1 *hisubst; | ||
205 | IRRef ref; | ||
206 | |||
207 | /* Copy old IR to buffer. */ | ||
208 | memcpy(oir, IR(nk), irlen*sizeof(IRIns)); | ||
209 | /* Bias hiword substitution table and old IR. Loword kept in field prev. */ | ||
210 | hisubst = (IRRef1 *)&oir[irlen] - nk; | ||
211 | oir -= nk; | ||
212 | |||
213 | /* Remove all IR instructions, but retain IR constants. */ | ||
214 | J->cur.nins = REF_FIRST; | ||
215 | J->loopref = 0; | ||
216 | |||
217 | /* Process constants and fixed references. */ | ||
218 | for (ref = nk; ref <= REF_BASE; ref++) { | ||
219 | IRIns *ir = &oir[ref]; | ||
220 | if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { | ||
221 | /* Split up 64 bit constant. */ | ||
222 | TValue tv = *ir_k64(ir); | ||
223 | ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); | ||
224 | hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); | ||
225 | } else { | ||
226 | ir->prev = ref; /* Identity substitution for loword. */ | ||
227 | hisubst[ref] = 0; | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* Process old IR instructions. */ | ||
232 | for (ref = REF_FIRST; ref < nins; ref++) { | ||
233 | IRIns *ir = &oir[ref]; | ||
234 | IRRef nref = lj_ir_nextins(J); | ||
235 | IRIns *nir = IR(nref); | ||
236 | IRRef hi = 0; | ||
237 | |||
238 | /* Copy-substitute old instruction to new instruction. */ | ||
239 | nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; | ||
240 | nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; | ||
241 | ir->prev = nref; /* Loword substitution. */ | ||
242 | nir->o = ir->o; | ||
243 | nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); | ||
244 | hisubst[ref] = 0; | ||
245 | |||
246 | /* Split 64 bit instructions. */ | ||
247 | #if LJ_SOFTFP | ||
248 | if (irt_isnum(ir->t)) { | ||
249 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ | ||
250 | /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ | ||
251 | switch (ir->o) { | ||
252 | case IR_ADD: | ||
253 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); | ||
254 | break; | ||
255 | case IR_SUB: | ||
256 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); | ||
257 | break; | ||
258 | case IR_MUL: | ||
259 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); | ||
260 | break; | ||
261 | case IR_DIV: | ||
262 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); | ||
263 | break; | ||
264 | case IR_POW: | ||
265 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi); | ||
266 | break; | ||
267 | case IR_FPMATH: | ||
268 | /* Try to rejoin pow from EXP2, MUL and LOG2. */ | ||
269 | if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) { | ||
270 | IRIns *irp = IR(nir->op1); | ||
271 | if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) { | ||
272 | IRIns *irm4 = IR(irp->op1); | ||
273 | IRIns *irm3 = IR(irm4->op1); | ||
274 | IRIns *irm12 = IR(irm3->op1); | ||
275 | IRIns *irl1 = IR(irm12->op1); | ||
276 | if (irm12->op1 > J->loopref && irl1->o == IR_CALLN && | ||
277 | irl1->op2 == IRCALL_lj_vm_log2) { | ||
278 | IRRef tmp = irl1->op1; /* Recycle first two args from LOG2. */ | ||
279 | IRRef arg3 = irm3->op2, arg4 = irm4->op2; | ||
280 | J->cur.nins--; | ||
281 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3); | ||
282 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4); | ||
283 | ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow); | ||
284 | hi = split_emit(J, IRT(IR_HIOP, LJ_SOFTFP), tmp, tmp); | ||
285 | break; | ||
286 | } | ||
287 | } | ||
288 | } | ||
289 | hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); | ||
290 | break; | ||
291 | case IR_ATAN2: | ||
292 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2); | ||
293 | break; | ||
294 | case IR_LDEXP: | ||
295 | hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); | ||
296 | break; | ||
297 | case IR_NEG: case IR_ABS: | ||
298 | nir->o = IR_CONV; /* Pass through loword. */ | ||
299 | nir->op2 = (IRT_INT << 5) | IRT_INT; | ||
300 | hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), | ||
301 | hisubst[ir->op1], hisubst[ir->op2]); | ||
302 | break; | ||
303 | case IR_SLOAD: | ||
304 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ | ||
305 | nir->op2 &= ~IRSLOAD_CONVERT; | ||
306 | ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, | ||
307 | IRCALL_softfp_i2d); | ||
308 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | ||
309 | break; | ||
310 | } | ||
311 | /* fallthrough */ | ||
312 | case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | ||
313 | case IR_STRTO: | ||
314 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | ||
315 | break; | ||
316 | case IR_XLOAD: { | ||
317 | IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ | ||
318 | J->cur.nins--; | ||
319 | hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ | ||
320 | nref = lj_ir_nextins(J); | ||
321 | nir = IR(nref); | ||
322 | *nir = inslo; /* Re-emit lo XLOAD immediately before hi XLOAD. */ | ||
323 | hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); | ||
324 | #if LJ_LE | ||
325 | ir->prev = nref; | ||
326 | #else | ||
327 | ir->prev = hi; hi = nref; | ||
328 | #endif | ||
329 | break; | ||
330 | } | ||
331 | case IR_ASTORE: case IR_HSTORE: case IR_USTORE: | ||
332 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); | ||
333 | break; | ||
334 | case IR_XSTORE: { | ||
335 | #if LJ_LE | ||
336 | IRRef hiref = hisubst[ir->op2]; | ||
337 | #else | ||
338 | IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2]; | ||
339 | #endif | ||
340 | split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP), | ||
341 | split_ptr(J, oir, ir->op1), hiref); | ||
342 | break; | ||
343 | } | ||
344 | case IR_CONV: { /* Conversion to number. Others handled below. */ | ||
345 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
346 | UNUSED(st); | ||
347 | #if LJ_32 && LJ_HASFFI | ||
348 | if (st == IRT_I64 || st == IRT_U64) { | ||
349 | hi = split_call_l(J, hisubst, oir, ir, | ||
350 | st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); | ||
351 | break; | ||
352 | } | ||
353 | #endif | ||
354 | lua_assert(st == IRT_INT || | ||
355 | (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT))); | ||
356 | nir->o = IR_CALLN; | ||
357 | #if LJ_32 && LJ_HASFFI | ||
358 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : | ||
359 | st == IRT_FLOAT ? IRCALL_softfp_f2d : | ||
360 | IRCALL_softfp_ui2d; | ||
361 | #else | ||
362 | nir->op2 = IRCALL_softfp_i2d; | ||
363 | #endif | ||
364 | hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | ||
365 | break; | ||
366 | } | ||
367 | case IR_CALLS: | ||
368 | case IR_CALLXS: | ||
369 | goto split_call; | ||
370 | case IR_PHI: | ||
371 | if (nir->op1 == nir->op2) | ||
372 | J->cur.nins--; /* Drop useless PHIs. */ | ||
373 | if (hisubst[ir->op1] != hisubst[ir->op2]) | ||
374 | split_emit(J, IRT(IR_PHI, IRT_SOFTFP), | ||
375 | hisubst[ir->op1], hisubst[ir->op2]); | ||
376 | break; | ||
377 | default: | ||
378 | lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX); | ||
379 | hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), | ||
380 | hisubst[ir->op1], hisubst[ir->op2]); | ||
381 | break; | ||
382 | } | ||
383 | } else | ||
384 | #endif | ||
385 | #if LJ_32 && LJ_HASFFI | ||
386 | if (irt_isint64(ir->t)) { | ||
387 | IRRef hiref = hisubst[ir->op1]; | ||
388 | nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ | ||
389 | switch (ir->o) { | ||
390 | case IR_ADD: | ||
391 | case IR_SUB: | ||
392 | /* Use plain op for hiword if loword cannot produce a carry/borrow. */ | ||
393 | if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { | ||
394 | ir->prev = nir->op1; /* Pass through loword. */ | ||
395 | nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; | ||
396 | hi = nref; | ||
397 | break; | ||
398 | } | ||
399 | /* fallthrough */ | ||
400 | case IR_NEG: | ||
401 | hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); | ||
402 | break; | ||
403 | case IR_MUL: | ||
404 | hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); | ||
405 | break; | ||
406 | case IR_DIV: | ||
407 | hi = split_call_ll(J, hisubst, oir, ir, | ||
408 | irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | ||
409 | IRCALL_lj_carith_divu64); | ||
410 | break; | ||
411 | case IR_MOD: | ||
412 | hi = split_call_ll(J, hisubst, oir, ir, | ||
413 | irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | ||
414 | IRCALL_lj_carith_modu64); | ||
415 | break; | ||
416 | case IR_POW: | ||
417 | hi = split_call_ll(J, hisubst, oir, ir, | ||
418 | irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | ||
419 | IRCALL_lj_carith_powu64); | ||
420 | break; | ||
421 | case IR_FLOAD: | ||
422 | lua_assert(ir->op2 == IRFL_CDATA_INT64); | ||
423 | hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); | ||
424 | #if LJ_BE | ||
425 | ir->prev = hi; hi = nref; | ||
426 | #endif | ||
427 | break; | ||
428 | case IR_XLOAD: | ||
429 | hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); | ||
430 | #if LJ_BE | ||
431 | ir->prev = hi; hi = nref; | ||
432 | #endif | ||
433 | break; | ||
434 | case IR_XSTORE: | ||
435 | #if LJ_LE | ||
436 | hiref = hisubst[ir->op2]; | ||
437 | #else | ||
438 | hiref = nir->op2; nir->op2 = hisubst[ir->op2]; | ||
439 | #endif | ||
440 | split_emit(J, IRTI(IR_XSTORE), split_ptr(J, oir, ir->op1), hiref); | ||
441 | break; | ||
442 | case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ | ||
443 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
444 | #if LJ_SOFTFP | ||
445 | if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ | ||
446 | hi = split_call_l(J, hisubst, oir, ir, | ||
447 | irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); | ||
448 | } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ | ||
449 | nir->o = IR_CALLN; | ||
450 | nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; | ||
451 | hi = split_emit(J, IRTI(IR_HIOP), nref, nref); | ||
452 | } | ||
453 | #else | ||
454 | if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ | ||
455 | hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); | ||
456 | } | ||
457 | #endif | ||
458 | else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ | ||
459 | /* Drop cast, since assembler doesn't care. */ | ||
460 | goto fwdlo; | ||
461 | } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ | ||
462 | IRRef k31 = lj_ir_kint(J, 31); | ||
463 | nir = IR(nref); /* May have been reallocated. */ | ||
464 | ir->prev = nir->op1; /* Pass through loword. */ | ||
465 | nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ | ||
466 | nir->op2 = k31; | ||
467 | hi = nref; | ||
468 | } else { /* Zero-extend to 64 bit. */ | ||
469 | hi = lj_ir_kint(J, 0); | ||
470 | goto fwdlo; | ||
471 | } | ||
472 | break; | ||
473 | } | ||
474 | case IR_CALLXS: | ||
475 | goto split_call; | ||
476 | case IR_PHI: { | ||
477 | IRRef hiref2; | ||
478 | if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || | ||
479 | nir->op1 == nir->op2) | ||
480 | J->cur.nins--; /* Drop useless PHIs. */ | ||
481 | hiref2 = hisubst[ir->op2]; | ||
482 | if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) | ||
483 | split_emit(J, IRTI(IR_PHI), hiref, hiref2); | ||
484 | break; | ||
485 | } | ||
486 | default: | ||
487 | lua_assert(ir->o <= IR_NE); /* Comparisons. */ | ||
488 | split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); | ||
489 | break; | ||
490 | } | ||
491 | } else | ||
492 | #endif | ||
493 | #if LJ_SOFTFP | ||
494 | if (ir->o == IR_SLOAD) { | ||
495 | if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ | ||
496 | nir->op2 &= ~IRSLOAD_CONVERT; | ||
497 | if (!(nir->op2 & IRSLOAD_TYPECHECK)) | ||
498 | nir->t.irt = IRT_INT; /* Drop guard. */ | ||
499 | split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | ||
500 | ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); | ||
501 | } | ||
502 | } else if (ir->o == IR_TOBIT) { | ||
503 | IRRef tmp, op1 = ir->op1; | ||
504 | J->cur.nins--; | ||
505 | #if LJ_LE | ||
506 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | ||
507 | #else | ||
508 | tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | ||
509 | #endif | ||
510 | ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); | ||
511 | } else if (ir->o == IR_TOSTR) { | ||
512 | if (hisubst[ir->op1]) { | ||
513 | if (irref_isk(ir->op1)) | ||
514 | nir->op1 = ir->op1; | ||
515 | else | ||
516 | split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); | ||
517 | } | ||
518 | } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { | ||
519 | if (irref_isk(ir->op2) && hisubst[ir->op2]) | ||
520 | nir->op2 = ir->op2; | ||
521 | } else | ||
522 | #endif | ||
523 | if (ir->o == IR_CONV) { /* See above, too. */ | ||
524 | IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | ||
525 | #if LJ_32 && LJ_HASFFI | ||
526 | if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ | ||
527 | #if LJ_SOFTFP | ||
528 | if (irt_isfloat(ir->t)) { | ||
529 | split_call_l(J, hisubst, oir, ir, | ||
530 | st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); | ||
531 | J->cur.nins--; /* Drop unused HIOP. */ | ||
532 | } | ||
533 | #else | ||
534 | if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ | ||
535 | ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), | ||
536 | hisubst[ir->op1], nref); | ||
537 | } | ||
538 | #endif | ||
539 | else { /* Truncate to lower 32 bits. */ | ||
540 | fwdlo: | ||
541 | ir->prev = nir->op1; /* Forward loword. */ | ||
542 | /* Replace with NOP to avoid messing up the snapshot logic. */ | ||
543 | nir->ot = IRT(IR_NOP, IRT_NIL); | ||
544 | nir->op1 = nir->op2 = 0; | ||
545 | } | ||
546 | } | ||
547 | #endif | ||
548 | #if LJ_SOFTFP && LJ_32 && LJ_HASFFI | ||
549 | else if (irt_isfloat(ir->t)) { | ||
550 | if (st == IRT_NUM) { | ||
551 | split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); | ||
552 | J->cur.nins--; /* Drop unused HIOP. */ | ||
553 | } else { | ||
554 | nir->o = IR_CALLN; | ||
555 | nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; | ||
556 | } | ||
557 | } else if (st == IRT_FLOAT) { | ||
558 | nir->o = IR_CALLN; | ||
559 | nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; | ||
560 | } else | ||
561 | #endif | ||
562 | #if LJ_SOFTFP | ||
563 | if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { | ||
564 | if (irt_isguard(ir->t)) { | ||
565 | lua_assert(st == IRT_NUM && irt_isint(ir->t)); | ||
566 | J->cur.nins--; | ||
567 | ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); | ||
568 | } else { | ||
569 | split_call_l(J, hisubst, oir, ir, | ||
570 | #if LJ_32 && LJ_HASFFI | ||
571 | st == IRT_NUM ? | ||
572 | (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : | ||
573 | (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) | ||
574 | #else | ||
575 | IRCALL_softfp_d2i | ||
576 | #endif | ||
577 | ); | ||
578 | J->cur.nins--; /* Drop unused HIOP. */ | ||
579 | } | ||
580 | } | ||
581 | #endif | ||
582 | } else if (ir->o == IR_CALLXS) { | ||
583 | IRRef hiref; | ||
584 | split_call: | ||
585 | hiref = hisubst[ir->op1]; | ||
586 | if (hiref) { | ||
587 | IROpT ot = nir->ot; | ||
588 | IRRef op2 = nir->op2; | ||
589 | nir->ot = IRT(IR_CARG, IRT_NIL); | ||
590 | #if LJ_LE | ||
591 | nir->op2 = hiref; | ||
592 | #else | ||
593 | nir->op2 = nir->op1; nir->op1 = hiref; | ||
594 | #endif | ||
595 | ir->prev = nref = split_emit(J, ot, nref, op2); | ||
596 | } | ||
597 | if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) | ||
598 | hi = split_emit(J, | ||
599 | IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), | ||
600 | nref, nref); | ||
601 | } else if (ir->o == IR_CARG) { | ||
602 | IRRef hiref = hisubst[ir->op1]; | ||
603 | if (hiref) { | ||
604 | IRRef op2 = nir->op2; | ||
605 | #if LJ_LE | ||
606 | nir->op2 = hiref; | ||
607 | #else | ||
608 | nir->op2 = nir->op1; nir->op1 = hiref; | ||
609 | #endif | ||
610 | ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); | ||
611 | nir = IR(nref); | ||
612 | } | ||
613 | hiref = hisubst[ir->op2]; | ||
614 | if (hiref) { | ||
615 | #if !LJ_TARGET_X86 | ||
616 | int carg = 0; | ||
617 | IRIns *cir; | ||
618 | for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) | ||
619 | carg++; | ||
620 | if ((carg & 1) == 0) { /* Align 64 bit arguments. */ | ||
621 | IRRef op2 = nir->op2; | ||
622 | nir->op2 = REF_NIL; | ||
623 | nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); | ||
624 | nir = IR(nref); | ||
625 | } | ||
626 | #endif | ||
627 | #if LJ_BE | ||
628 | { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } | ||
629 | #endif | ||
630 | ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); | ||
631 | } | ||
632 | } else if (ir->o == IR_CNEWI) { | ||
633 | if (hisubst[ir->op2]) | ||
634 | split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); | ||
635 | } else if (ir->o == IR_LOOP) { | ||
636 | J->loopref = nref; /* Needed by assembler. */ | ||
637 | } | ||
638 | hisubst[ref] = hi; /* Store hiword substitution. */ | ||
639 | } | ||
640 | |||
641 | /* Add PHI marks. */ | ||
642 | for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { | ||
643 | IRIns *ir = IR(ref); | ||
644 | if (ir->o != IR_PHI) break; | ||
645 | if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); | ||
646 | if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); | ||
647 | } | ||
648 | |||
649 | /* Substitute snapshot maps. */ | ||
650 | oir[nins].prev = J->cur.nins; /* Substitution for last snapshot. */ | ||
651 | { | ||
652 | SnapNo i, nsnap = J->cur.nsnap; | ||
653 | for (i = 0; i < nsnap; i++) { | ||
654 | SnapShot *snap = &J->cur.snap[i]; | ||
655 | SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | ||
656 | MSize n, nent = snap->nent; | ||
657 | snap->ref = snap->ref == REF_FIRST ? REF_FIRST : oir[snap->ref].prev; | ||
658 | for (n = 0; n < nent; n++) { | ||
659 | SnapEntry sn = map[n]; | ||
660 | IRIns *ir = &oir[snap_ref(sn)]; | ||
661 | if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) | ||
662 | map[n] = ((sn & 0xffff0000) | ir->prev); | ||
663 | } | ||
664 | } | ||
665 | } | ||
666 | } | ||
667 | |||
668 | /* Protected callback for split pass. */ | ||
669 | static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) | ||
670 | { | ||
671 | jit_State *J = (jit_State *)ud; | ||
672 | split_ir(J); | ||
673 | UNUSED(L); UNUSED(dummy); | ||
674 | return NULL; | ||
675 | } | ||
676 | |||
677 | #if defined(LUA_USE_ASSERT) || LJ_SOFTFP | ||
678 | /* Slow, but sure way to check whether a SPLIT pass is needed. */ | ||
679 | static int split_needsplit(jit_State *J) | ||
680 | { | ||
681 | IRIns *ir, *irend; | ||
682 | IRRef ref; | ||
683 | for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) | ||
684 | if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) | ||
685 | return 1; | ||
686 | if (LJ_SOFTFP) { | ||
687 | for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) | ||
688 | if ((IR(ref)->op2 & IRSLOAD_CONVERT)) | ||
689 | return 1; | ||
690 | } | ||
691 | for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { | ||
692 | IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); | ||
693 | if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || | ||
694 | st == IRT_I64 || st == IRT_U64) | ||
695 | return 1; | ||
696 | } | ||
697 | return 0; /* Nope. */ | ||
698 | } | ||
699 | #endif | ||
700 | |||
701 | /* SPLIT pass. */ | ||
702 | void lj_opt_split(jit_State *J) | ||
703 | { | ||
704 | #if LJ_SOFTFP | ||
705 | if (!J->needsplit) | ||
706 | J->needsplit = split_needsplit(J); | ||
707 | #else | ||
708 | lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */ | ||
709 | #endif | ||
710 | if (J->needsplit) { | ||
711 | int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); | ||
712 | if (errcode) { | ||
713 | /* Completely reset the trace to avoid inconsistent dump on abort. */ | ||
714 | J->cur.nins = J->cur.nk = REF_BASE; | ||
715 | J->cur.nsnap = 0; | ||
716 | lj_err_throw(J->L, errcode); /* Propagate errors. */ | ||
717 | } | ||
718 | } | ||
719 | } | ||
720 | |||
721 | #undef IR | ||
722 | |||
723 | #endif | ||