diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/luajit-2.0/src/lj_record.c | 2228 |
1 files changed, 2228 insertions, 0 deletions
diff --git a/libraries/luajit-2.0/src/lj_record.c b/libraries/luajit-2.0/src/lj_record.c new file mode 100644 index 0000000..2c27a71 --- /dev/null +++ b/libraries/luajit-2.0/src/lj_record.c | |||
@@ -0,0 +1,2228 @@ | |||
1 | /* | ||
2 | ** Trace recorder (bytecode -> SSA IR). | ||
3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | #define lj_record_c | ||
7 | #define LUA_CORE | ||
8 | |||
9 | #include "lj_obj.h" | ||
10 | |||
11 | #if LJ_HASJIT | ||
12 | |||
13 | #include "lj_err.h" | ||
14 | #include "lj_str.h" | ||
15 | #include "lj_tab.h" | ||
16 | #include "lj_meta.h" | ||
17 | #include "lj_frame.h" | ||
18 | #include "lj_bc.h" | ||
19 | #include "lj_ff.h" | ||
20 | #include "lj_ir.h" | ||
21 | #include "lj_jit.h" | ||
22 | #include "lj_ircall.h" | ||
23 | #include "lj_iropt.h" | ||
24 | #include "lj_trace.h" | ||
25 | #include "lj_record.h" | ||
26 | #include "lj_ffrecord.h" | ||
27 | #include "lj_snap.h" | ||
28 | #include "lj_dispatch.h" | ||
29 | #include "lj_vm.h" | ||
30 | |||
31 | /* Some local macros to save typing. Undef'd at the end. */ | ||
32 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
33 | |||
34 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
35 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
36 | |||
37 | /* Emit raw IR without passing through optimizations. */ | ||
38 | #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | ||
39 | |||
40 | /* -- Sanity checks ------------------------------------------------------- */ | ||
41 | |||
42 | #ifdef LUA_USE_ASSERT | ||
43 | /* Sanity check the whole IR -- sloooow. */ | ||
44 | static void rec_check_ir(jit_State *J) | ||
45 | { | ||
46 | IRRef i, nins = J->cur.nins, nk = J->cur.nk; | ||
47 | lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536); | ||
48 | for (i = nins-1; i >= nk; i--) { | ||
49 | IRIns *ir = IR(i); | ||
50 | uint32_t mode = lj_ir_mode[ir->o]; | ||
51 | IRRef op1 = ir->op1; | ||
52 | IRRef op2 = ir->op2; | ||
53 | switch (irm_op1(mode)) { | ||
54 | case IRMnone: lua_assert(op1 == 0); break; | ||
55 | case IRMref: lua_assert(op1 >= nk); | ||
56 | lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break; | ||
57 | case IRMlit: break; | ||
58 | case IRMcst: lua_assert(i < REF_BIAS); continue; | ||
59 | } | ||
60 | switch (irm_op2(mode)) { | ||
61 | case IRMnone: lua_assert(op2 == 0); break; | ||
62 | case IRMref: lua_assert(op2 >= nk); | ||
63 | lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break; | ||
64 | case IRMlit: break; | ||
65 | case IRMcst: lua_assert(0); break; | ||
66 | } | ||
67 | if (ir->prev) { | ||
68 | lua_assert(ir->prev >= nk); | ||
69 | lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i); | ||
70 | lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o); | ||
71 | } | ||
72 | } | ||
73 | } | ||
74 | |||
75 | /* Compare stack slots and frames of the recorder and the VM. */ | ||
76 | static void rec_check_slots(jit_State *J) | ||
77 | { | ||
78 | BCReg s, nslots = J->baseslot + J->maxslot; | ||
79 | int32_t depth = 0; | ||
80 | cTValue *base = J->L->base - J->baseslot; | ||
81 | lua_assert(J->baseslot >= 1 && J->baseslot < LJ_MAX_JSLOTS); | ||
82 | lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME)); | ||
83 | lua_assert(nslots < LJ_MAX_JSLOTS); | ||
84 | for (s = 0; s < nslots; s++) { | ||
85 | TRef tr = J->slot[s]; | ||
86 | if (tr) { | ||
87 | cTValue *tv = &base[s]; | ||
88 | IRRef ref = tref_ref(tr); | ||
89 | IRIns *ir; | ||
90 | lua_assert(ref >= J->cur.nk && ref < J->cur.nins); | ||
91 | ir = IR(ref); | ||
92 | lua_assert(irt_t(ir->t) == tref_t(tr)); | ||
93 | if (s == 0) { | ||
94 | lua_assert(tref_isfunc(tr)); | ||
95 | } else if ((tr & TREF_FRAME)) { | ||
96 | GCfunc *fn = gco2func(frame_gc(tv)); | ||
97 | BCReg delta = (BCReg)(tv - frame_prev(tv)); | ||
98 | lua_assert(tref_isfunc(tr)); | ||
99 | if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir)); | ||
100 | lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta)); | ||
101 | depth++; | ||
102 | } else if ((tr & TREF_CONT)) { | ||
103 | lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void)); | ||
104 | lua_assert((J->slot[s+1] & TREF_FRAME)); | ||
105 | depth++; | ||
106 | } else { | ||
107 | if (tvisnumber(tv)) | ||
108 | lua_assert(tref_isnumber(tr)); /* Could be IRT_INT etc., too. */ | ||
109 | else | ||
110 | lua_assert(itype2irt(tv) == tref_type(tr)); | ||
111 | if (tref_isk(tr)) { /* Compare constants. */ | ||
112 | TValue tvk; | ||
113 | lj_ir_kvalue(J->L, &tvk, ir); | ||
114 | if (!(tvisnum(&tvk) && tvisnan(&tvk))) | ||
115 | lua_assert(lj_obj_equal(tv, &tvk)); | ||
116 | else | ||
117 | lua_assert(tvisnum(tv) && tvisnan(tv)); | ||
118 | } | ||
119 | } | ||
120 | } | ||
121 | } | ||
122 | lua_assert(J->framedepth == depth); | ||
123 | } | ||
124 | #endif | ||
125 | |||
126 | /* -- Type handling and specialization ------------------------------------ */ | ||
127 | |||
128 | /* Note: these functions return tagged references (TRef). */ | ||
129 | |||
130 | /* Specialize a slot to a specific type. Note: slot can be negative! */ | ||
131 | static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode) | ||
132 | { | ||
133 | /* Caller may set IRT_GUARD in t. */ | ||
134 | TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode); | ||
135 | J->base[slot] = ref; | ||
136 | return ref; | ||
137 | } | ||
138 | |||
139 | /* Specialize a slot to the runtime type. Note: slot can be negative! */ | ||
140 | static TRef sload(jit_State *J, int32_t slot) | ||
141 | { | ||
142 | IRType t = itype2irt(&J->L->base[slot]); | ||
143 | TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot, | ||
144 | IRSLOAD_TYPECHECK); | ||
145 | if (irtype_ispri(t)) ref = TREF_PRI(t); /* Canonicalize primitive refs. */ | ||
146 | J->base[slot] = ref; | ||
147 | return ref; | ||
148 | } | ||
149 | |||
150 | /* Get TRef from slot. Load slot and specialize if not done already. */ | ||
151 | #define getslot(J, s) (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s))) | ||
152 | |||
153 | /* Get TRef for current function. */ | ||
154 | static TRef getcurrf(jit_State *J) | ||
155 | { | ||
156 | if (J->base[-1]) | ||
157 | return J->base[-1]; | ||
158 | lua_assert(J->baseslot == 1); | ||
159 | return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY); | ||
160 | } | ||
161 | |||
162 | /* Compare for raw object equality. | ||
163 | ** Returns 0 if the objects are the same. | ||
164 | ** Returns 1 if they are different, but the same type. | ||
165 | ** Returns 2 for two different types. | ||
166 | ** Comparisons between primitives always return 1 -- no caller cares about it. | ||
167 | */ | ||
168 | int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv) | ||
169 | { | ||
170 | int diff = !lj_obj_equal(av, bv); | ||
171 | if (!tref_isk2(a, b)) { /* Shortcut, also handles primitives. */ | ||
172 | IRType ta = tref_isinteger(a) ? IRT_INT : tref_type(a); | ||
173 | IRType tb = tref_isinteger(b) ? IRT_INT : tref_type(b); | ||
174 | if (ta != tb) { | ||
175 | /* Widen mixed number/int comparisons to number/number comparison. */ | ||
176 | if (ta == IRT_INT && tb == IRT_NUM) { | ||
177 | a = emitir(IRTN(IR_CONV), a, IRCONV_NUM_INT); | ||
178 | ta = IRT_NUM; | ||
179 | } else if (ta == IRT_NUM && tb == IRT_INT) { | ||
180 | b = emitir(IRTN(IR_CONV), b, IRCONV_NUM_INT); | ||
181 | } else { | ||
182 | return 2; /* Two different types are never equal. */ | ||
183 | } | ||
184 | } | ||
185 | emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b); | ||
186 | } | ||
187 | return diff; | ||
188 | } | ||
189 | |||
190 | /* -- Record loop ops ----------------------------------------------------- */ | ||
191 | |||
192 | /* Loop event. */ | ||
193 | typedef enum { | ||
194 | LOOPEV_LEAVE, /* Loop is left or not entered. */ | ||
195 | LOOPEV_ENTERLO, /* Loop is entered with a low iteration count left. */ | ||
196 | LOOPEV_ENTER /* Loop is entered. */ | ||
197 | } LoopEvent; | ||
198 | |||
199 | /* Canonicalize slots: convert integers to numbers. */ | ||
200 | static void canonicalize_slots(jit_State *J) | ||
201 | { | ||
202 | BCReg s; | ||
203 | if (LJ_DUALNUM) return; | ||
204 | for (s = J->baseslot+J->maxslot-1; s >= 1; s--) { | ||
205 | TRef tr = J->slot[s]; | ||
206 | if (tref_isinteger(tr)) { | ||
207 | IRIns *ir = IR(tref_ref(tr)); | ||
208 | if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY))) | ||
209 | J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT); | ||
210 | } | ||
211 | } | ||
212 | } | ||
213 | |||
214 | /* Stop recording. */ | ||
215 | static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk) | ||
216 | { | ||
217 | lj_trace_end(J); | ||
218 | J->cur.linktype = (uint8_t)linktype; | ||
219 | J->cur.link = (uint16_t)lnk; | ||
220 | /* Looping back at the same stack level? */ | ||
221 | if (lnk == J->cur.traceno && J->framedepth + J->retdepth == 0) { | ||
222 | if ((J->flags & JIT_F_OPT_LOOP)) /* Shall we try to create a loop? */ | ||
223 | goto nocanon; /* Do not canonicalize or we lose the narrowing. */ | ||
224 | if (J->cur.root) /* Otherwise ensure we always link to the root trace. */ | ||
225 | J->cur.link = J->cur.root; | ||
226 | } | ||
227 | canonicalize_slots(J); | ||
228 | nocanon: | ||
229 | /* Note: all loop ops must set J->pc to the following instruction! */ | ||
230 | lj_snap_add(J); /* Add loop snapshot. */ | ||
231 | J->needsnap = 0; | ||
232 | J->mergesnap = 1; /* In case recording continues. */ | ||
233 | } | ||
234 | |||
235 | /* Search bytecode backwards for a int/num constant slot initializer. */ | ||
236 | static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t) | ||
237 | { | ||
238 | /* This algorithm is rather simplistic and assumes quite a bit about | ||
239 | ** how the bytecode is generated. It works fine for FORI initializers, | ||
240 | ** but it won't necessarily work in other cases (e.g. iterator arguments). | ||
241 | ** It doesn't do anything fancy, either (like backpropagating MOVs). | ||
242 | */ | ||
243 | const BCIns *pc, *startpc = proto_bc(J->pt); | ||
244 | for (pc = endpc-1; pc > startpc; pc--) { | ||
245 | BCIns ins = *pc; | ||
246 | BCOp op = bc_op(ins); | ||
247 | /* First try to find the last instruction that stores to this slot. */ | ||
248 | if (bcmode_a(op) == BCMbase && bc_a(ins) <= slot) { | ||
249 | return 0; /* Multiple results, e.g. from a CALL or KNIL. */ | ||
250 | } else if (bcmode_a(op) == BCMdst && bc_a(ins) == slot) { | ||
251 | if (op == BC_KSHORT || op == BC_KNUM) { /* Found const. initializer. */ | ||
252 | /* Now try to verify there's no forward jump across it. */ | ||
253 | const BCIns *kpc = pc; | ||
254 | for (; pc > startpc; pc--) | ||
255 | if (bc_op(*pc) == BC_JMP) { | ||
256 | const BCIns *target = pc+bc_j(*pc)+1; | ||
257 | if (target > kpc && target <= endpc) | ||
258 | return 0; /* Conditional assignment. */ | ||
259 | } | ||
260 | if (op == BC_KSHORT) { | ||
261 | int32_t k = (int32_t)(int16_t)bc_d(ins); | ||
262 | return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k); | ||
263 | } else { | ||
264 | cTValue *tv = proto_knumtv(J->pt, bc_d(ins)); | ||
265 | if (t == IRT_INT) { | ||
266 | int32_t k = numberVint(tv); | ||
267 | if (tvisint(tv) || numV(tv) == (lua_Number)k) /* -0 is ok here. */ | ||
268 | return lj_ir_kint(J, k); | ||
269 | return 0; /* Type mismatch. */ | ||
270 | } else { | ||
271 | return lj_ir_knum(J, numberVnum(tv)); | ||
272 | } | ||
273 | } | ||
274 | } | ||
275 | return 0; /* Non-constant initializer. */ | ||
276 | } | ||
277 | } | ||
278 | return 0; /* No assignment to this slot found? */ | ||
279 | } | ||
280 | |||
281 | /* Load and optionally convert a FORI argument from a slot. */ | ||
282 | static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode) | ||
283 | { | ||
284 | int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0; | ||
285 | return sloadt(J, (int32_t)slot, | ||
286 | t + (((mode & IRSLOAD_TYPECHECK) || | ||
287 | (conv && t == IRT_INT && !(mode >> 16))) ? | ||
288 | IRT_GUARD : 0), | ||
289 | mode + conv); | ||
290 | } | ||
291 | |||
292 | /* Peek before FORI to find a const initializer. Otherwise load from slot. */ | ||
293 | static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot, | ||
294 | IRType t, int mode) | ||
295 | { | ||
296 | TRef tr = J->base[slot]; | ||
297 | if (!tr) { | ||
298 | tr = find_kinit(J, fori, slot, t); | ||
299 | if (!tr) | ||
300 | tr = fori_load(J, slot, t, mode); | ||
301 | } | ||
302 | return tr; | ||
303 | } | ||
304 | |||
305 | /* Return the direction of the FOR loop iterator. | ||
306 | ** It's important to exactly reproduce the semantics of the interpreter. | ||
307 | */ | ||
308 | static int rec_for_direction(cTValue *o) | ||
309 | { | ||
310 | return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0; | ||
311 | } | ||
312 | |||
313 | /* Simulate the runtime behavior of the FOR loop iterator. */ | ||
314 | static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl) | ||
315 | { | ||
316 | lua_Number stopv = numberVnum(&o[FORL_STOP]); | ||
317 | lua_Number idxv = numberVnum(&o[FORL_IDX]); | ||
318 | lua_Number stepv = numberVnum(&o[FORL_STEP]); | ||
319 | if (isforl) | ||
320 | idxv += stepv; | ||
321 | if (rec_for_direction(&o[FORL_STEP])) { | ||
322 | if (idxv <= stopv) { | ||
323 | *op = IR_LE; | ||
324 | return idxv + 2*stepv > stopv ? LOOPEV_ENTERLO : LOOPEV_ENTER; | ||
325 | } | ||
326 | *op = IR_GT; return LOOPEV_LEAVE; | ||
327 | } else { | ||
328 | if (stopv <= idxv) { | ||
329 | *op = IR_GE; | ||
330 | return idxv + 2*stepv < stopv ? LOOPEV_ENTERLO : LOOPEV_ENTER; | ||
331 | } | ||
332 | *op = IR_LT; return LOOPEV_LEAVE; | ||
333 | } | ||
334 | } | ||
335 | |||
336 | /* Record checks for FOR loop overflow and step direction. */ | ||
337 | static void rec_for_check(jit_State *J, IRType t, int dir, | ||
338 | TRef stop, TRef step, int init) | ||
339 | { | ||
340 | if (!tref_isk(step)) { | ||
341 | /* Non-constant step: need a guard for the direction. */ | ||
342 | TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J); | ||
343 | emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero); | ||
344 | /* Add hoistable overflow checks for a narrowed FORL index. */ | ||
345 | if (init && t == IRT_INT) { | ||
346 | if (tref_isk(stop)) { | ||
347 | /* Constant stop: optimize check away or to a range check for step. */ | ||
348 | int32_t k = IR(tref_ref(stop))->i; | ||
349 | if (dir) { | ||
350 | if (k > 0) | ||
351 | emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k)); | ||
352 | } else { | ||
353 | if (k < 0) | ||
354 | emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k)); | ||
355 | } | ||
356 | } else { | ||
357 | /* Stop+step variable: need full overflow check. */ | ||
358 | TRef tr = emitir(IRTGI(IR_ADDOV), step, stop); | ||
359 | emitir(IRTI(IR_USE), tr, 0); /* ADDOV is weak. Avoid dead result. */ | ||
360 | } | ||
361 | } | ||
362 | } else if (init && t == IRT_INT && !tref_isk(stop)) { | ||
363 | /* Constant step: optimize overflow check to a range check for stop. */ | ||
364 | int32_t k = IR(tref_ref(step))->i; | ||
365 | k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k; | ||
366 | emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k)); | ||
367 | } | ||
368 | } | ||
369 | |||
370 | /* Record a FORL instruction. */ | ||
371 | static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev, | ||
372 | int init) | ||
373 | { | ||
374 | BCReg ra = bc_a(*fori); | ||
375 | cTValue *tv = &J->L->base[ra]; | ||
376 | TRef idx = J->base[ra+FORL_IDX]; | ||
377 | IRType t = idx ? tref_type(idx) : | ||
378 | (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM; | ||
379 | int mode = IRSLOAD_INHERIT + | ||
380 | ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0); | ||
381 | TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode); | ||
382 | TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode); | ||
383 | int tc, dir = rec_for_direction(&tv[FORL_STEP]); | ||
384 | lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI); | ||
385 | scev->t.irt = t; | ||
386 | scev->dir = dir; | ||
387 | scev->stop = tref_ref(stop); | ||
388 | scev->step = tref_ref(step); | ||
389 | rec_for_check(J, t, dir, stop, step, init); | ||
390 | scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT)); | ||
391 | tc = (LJ_DUALNUM && | ||
392 | !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step) && | ||
393 | tvisint(&tv[FORL_IDX]) == (t == IRT_INT))) ? | ||
394 | IRSLOAD_TYPECHECK : 0; | ||
395 | if (tc) { | ||
396 | J->base[ra+FORL_STOP] = stop; | ||
397 | J->base[ra+FORL_STEP] = step; | ||
398 | } | ||
399 | if (!idx) | ||
400 | idx = fori_load(J, ra+FORL_IDX, t, | ||
401 | IRSLOAD_INHERIT + tc + (J->scev.start << 16)); | ||
402 | if (!init) | ||
403 | J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step); | ||
404 | J->base[ra+FORL_EXT] = idx; | ||
405 | scev->idx = tref_ref(idx); | ||
406 | J->maxslot = ra+FORL_EXT+1; | ||
407 | } | ||
408 | |||
409 | /* Record FORL/JFORL or FORI/JFORI. */ | ||
410 | static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl) | ||
411 | { | ||
412 | BCReg ra = bc_a(*fori); | ||
413 | TValue *tv = &J->L->base[ra]; | ||
414 | TRef *tr = &J->base[ra]; | ||
415 | IROp op; | ||
416 | LoopEvent ev; | ||
417 | TRef stop; | ||
418 | IRType t; | ||
419 | if (isforl) { /* Handle FORL/JFORL opcodes. */ | ||
420 | TRef idx = tr[FORL_IDX]; | ||
421 | if (tref_ref(idx) == J->scev.idx) { | ||
422 | t = J->scev.t.irt; | ||
423 | stop = J->scev.stop; | ||
424 | idx = emitir(IRT(IR_ADD, t), idx, J->scev.step); | ||
425 | tr[FORL_EXT] = tr[FORL_IDX] = idx; | ||
426 | } else { | ||
427 | ScEvEntry scev; | ||
428 | rec_for_loop(J, fori, &scev, 0); | ||
429 | t = scev.t.irt; | ||
430 | stop = scev.stop; | ||
431 | } | ||
432 | } else { /* Handle FORI/JFORI opcodes. */ | ||
433 | BCReg i; | ||
434 | lj_meta_for(J->L, tv); | ||
435 | t = (LJ_DUALNUM || tref_isint(tr[FORL_IDX])) ? lj_opt_narrow_forl(J, tv) : | ||
436 | IRT_NUM; | ||
437 | for (i = FORL_IDX; i <= FORL_STEP; i++) { | ||
438 | if (!tr[i]) sload(J, ra+i); | ||
439 | lua_assert(tref_isnumber_str(tr[i])); | ||
440 | if (tref_isstr(tr[i])) | ||
441 | tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0); | ||
442 | if (t == IRT_INT) { | ||
443 | if (!tref_isinteger(tr[i])) | ||
444 | tr[i] = emitir(IRTGI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK); | ||
445 | } else { | ||
446 | if (!tref_isnum(tr[i])) | ||
447 | tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT); | ||
448 | } | ||
449 | } | ||
450 | tr[FORL_EXT] = tr[FORL_IDX]; | ||
451 | stop = tr[FORL_STOP]; | ||
452 | rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]), | ||
453 | stop, tr[FORL_STEP], 1); | ||
454 | } | ||
455 | |||
456 | ev = rec_for_iter(&op, tv, isforl); | ||
457 | if (ev == LOOPEV_LEAVE) { | ||
458 | J->maxslot = ra+FORL_EXT+1; | ||
459 | J->pc = fori+1; | ||
460 | } else { | ||
461 | J->maxslot = ra; | ||
462 | J->pc = fori+bc_j(*fori)+1; | ||
463 | } | ||
464 | lj_snap_add(J); | ||
465 | |||
466 | emitir(IRTG(op, t), tr[FORL_IDX], stop); | ||
467 | |||
468 | if (ev == LOOPEV_LEAVE) { | ||
469 | J->maxslot = ra; | ||
470 | J->pc = fori+bc_j(*fori)+1; | ||
471 | } else { | ||
472 | J->maxslot = ra+FORL_EXT+1; | ||
473 | J->pc = fori+1; | ||
474 | } | ||
475 | J->needsnap = 1; | ||
476 | return ev; | ||
477 | } | ||
478 | |||
479 | /* Record ITERL/JITERL. */ | ||
480 | static LoopEvent rec_iterl(jit_State *J, const BCIns iterins) | ||
481 | { | ||
482 | BCReg ra = bc_a(iterins); | ||
483 | lua_assert(J->base[ra] != 0); | ||
484 | if (!tref_isnil(J->base[ra])) { /* Looping back? */ | ||
485 | J->base[ra-1] = J->base[ra]; /* Copy result of ITERC to control var. */ | ||
486 | J->maxslot = ra-1+bc_b(J->pc[-1]); | ||
487 | J->pc += bc_j(iterins)+1; | ||
488 | return LOOPEV_ENTER; | ||
489 | } else { | ||
490 | J->maxslot = ra-3; | ||
491 | J->pc++; | ||
492 | return LOOPEV_LEAVE; | ||
493 | } | ||
494 | } | ||
495 | |||
496 | /* Record LOOP/JLOOP. Now, that was easy. */ | ||
497 | static LoopEvent rec_loop(jit_State *J, BCReg ra) | ||
498 | { | ||
499 | if (ra < J->maxslot) J->maxslot = ra; | ||
500 | J->pc++; | ||
501 | return LOOPEV_ENTER; | ||
502 | } | ||
503 | |||
504 | /* Check if a loop repeatedly failed to trace because it didn't loop back. */ | ||
505 | static int innerloopleft(jit_State *J, const BCIns *pc) | ||
506 | { | ||
507 | ptrdiff_t i; | ||
508 | for (i = 0; i < PENALTY_SLOTS; i++) | ||
509 | if (mref(J->penalty[i].pc, const BCIns) == pc) { | ||
510 | if ((J->penalty[i].reason == LJ_TRERR_LLEAVE || | ||
511 | J->penalty[i].reason == LJ_TRERR_LINNER) && | ||
512 | J->penalty[i].val >= 2*PENALTY_MIN) | ||
513 | return 1; | ||
514 | break; | ||
515 | } | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | /* Handle the case when an interpreted loop op is hit. */ | ||
520 | static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev) | ||
521 | { | ||
522 | if (J->parent == 0) { | ||
523 | if (pc == J->startpc && J->framedepth + J->retdepth == 0) { | ||
524 | /* Same loop? */ | ||
525 | if (ev == LOOPEV_LEAVE) /* Must loop back to form a root trace. */ | ||
526 | lj_trace_err(J, LJ_TRERR_LLEAVE); | ||
527 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Looping root trace. */ | ||
528 | } else if (ev != LOOPEV_LEAVE) { /* Entering inner loop? */ | ||
529 | /* It's usually better to abort here and wait until the inner loop | ||
530 | ** is traced. But if the inner loop repeatedly didn't loop back, | ||
531 | ** this indicates a low trip count. In this case try unrolling | ||
532 | ** an inner loop even in a root trace. But it's better to be a bit | ||
533 | ** more conservative here and only do it for very short loops. | ||
534 | */ | ||
535 | if (!innerloopleft(J, pc)) | ||
536 | lj_trace_err(J, LJ_TRERR_LINNER); /* Root trace hit an inner loop. */ | ||
537 | if ((ev != LOOPEV_ENTERLO && | ||
538 | J->loopref && J->cur.nins - J->loopref > 24) || --J->loopunroll < 0) | ||
539 | lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ | ||
540 | J->loopref = J->cur.nins; | ||
541 | } | ||
542 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters an inner loop. */ | ||
543 | J->loopref = J->cur.nins; | ||
544 | if (--J->loopunroll < 0) | ||
545 | lj_trace_err(J, LJ_TRERR_LUNROLL); /* Limit loop unrolling. */ | ||
546 | } /* Side trace continues across a loop that's left or not entered. */ | ||
547 | } | ||
548 | |||
549 | /* Handle the case when an already compiled loop op is hit. */ | ||
550 | static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev) | ||
551 | { | ||
552 | if (J->parent == 0) { /* Root trace hit an inner loop. */ | ||
553 | /* Better let the inner loop spawn a side trace back here. */ | ||
554 | lj_trace_err(J, LJ_TRERR_LINNER); | ||
555 | } else if (ev != LOOPEV_LEAVE) { /* Side trace enters a compiled loop. */ | ||
556 | J->instunroll = 0; /* Cannot continue across a compiled loop op. */ | ||
557 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | ||
558 | rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno); /* Form an extra loop. */ | ||
559 | else | ||
560 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the loop. */ | ||
561 | } /* Side trace continues across a loop that's left or not entered. */ | ||
562 | } | ||
563 | |||
564 | /* -- Record calls and returns -------------------------------------------- */ | ||
565 | |||
566 | /* Specialize to the runtime value of the called function or its prototype. */ | ||
567 | static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr) | ||
568 | { | ||
569 | TRef kfunc; | ||
570 | if (isluafunc(fn)) { | ||
571 | GCproto *pt = funcproto(fn); | ||
572 | /* 3 or more closures created? Probably not a monomorphic function. */ | ||
573 | if (pt->flags >= 3*PROTO_CLCOUNT) { /* Specialize to prototype instead. */ | ||
574 | TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC); | ||
575 | emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt))); | ||
576 | (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); /* Prevent GC of proto. */ | ||
577 | return tr; | ||
578 | } | ||
579 | } | ||
580 | /* Otherwise specialize to the function (closure) value itself. */ | ||
581 | kfunc = lj_ir_kfunc(J, fn); | ||
582 | emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc); | ||
583 | return kfunc; | ||
584 | } | ||
585 | |||
586 | /* Record call setup. */ | ||
587 | static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs) | ||
588 | { | ||
589 | RecordIndex ix; | ||
590 | TValue *functv = &J->L->base[func]; | ||
591 | TRef *fbase = &J->base[func]; | ||
592 | ptrdiff_t i; | ||
593 | for (i = 0; i <= nargs; i++) | ||
594 | (void)getslot(J, func+i); /* Ensure func and all args have a reference. */ | ||
595 | if (!tref_isfunc(fbase[0])) { /* Resolve __call metamethod. */ | ||
596 | ix.tab = fbase[0]; | ||
597 | copyTV(J->L, &ix.tabv, functv); | ||
598 | if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj)) | ||
599 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
600 | for (i = ++nargs; i > 0; i--) /* Shift arguments up. */ | ||
601 | fbase[i] = fbase[i-1]; | ||
602 | fbase[0] = ix.mobj; /* Replace function. */ | ||
603 | functv = &ix.mobjv; | ||
604 | } | ||
605 | fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]); | ||
606 | J->maxslot = (BCReg)nargs; | ||
607 | } | ||
608 | |||
609 | /* Record call. */ | ||
610 | void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs) | ||
611 | { | ||
612 | rec_call_setup(J, func, nargs); | ||
613 | /* Bump frame. */ | ||
614 | J->framedepth++; | ||
615 | J->base += func+1; | ||
616 | J->baseslot += func+1; | ||
617 | } | ||
618 | |||
619 | /* Record tail call. */ | ||
620 | void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs) | ||
621 | { | ||
622 | rec_call_setup(J, func, nargs); | ||
623 | if (frame_isvarg(J->L->base - 1)) { | ||
624 | BCReg cbase = (BCReg)frame_delta(J->L->base - 1); | ||
625 | if (--J->framedepth < 0) | ||
626 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
627 | J->baseslot -= (BCReg)cbase; | ||
628 | J->base -= cbase; | ||
629 | func += cbase; | ||
630 | } | ||
631 | /* Move func + args down. */ | ||
632 | memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1)); | ||
633 | /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */ | ||
634 | /* Tailcalls can form a loop, so count towards the loop unroll limit. */ | ||
635 | if (++J->tailcalled > J->loopunroll) | ||
636 | lj_trace_err(J, LJ_TRERR_LUNROLL); | ||
637 | } | ||
638 | |||
639 | /* Check unroll limits for down-recursion. */ | ||
640 | static int check_downrec_unroll(jit_State *J, GCproto *pt) | ||
641 | { | ||
642 | IRRef ptref; | ||
643 | for (ptref = J->chain[IR_KGC]; ptref; ptref = IR(ptref)->prev) | ||
644 | if (ir_kgc(IR(ptref)) == obj2gco(pt)) { | ||
645 | int count = 0; | ||
646 | IRRef ref; | ||
647 | for (ref = J->chain[IR_RETF]; ref; ref = IR(ref)->prev) | ||
648 | if (IR(ref)->op1 == ptref) | ||
649 | count++; | ||
650 | if (count) { | ||
651 | if (J->pc == J->startpc) { | ||
652 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) | ||
653 | return 1; | ||
654 | } else { | ||
655 | lj_trace_err(J, LJ_TRERR_DOWNREC); | ||
656 | } | ||
657 | } | ||
658 | } | ||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | /* Record return. */ | ||
663 | void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) | ||
664 | { | ||
665 | TValue *frame = J->L->base - 1; | ||
666 | ptrdiff_t i; | ||
667 | for (i = 0; i < gotresults; i++) | ||
668 | (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ | ||
669 | while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ | ||
670 | BCReg cbase = (BCReg)frame_delta(frame); | ||
671 | if (--J->framedepth < 0) | ||
672 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
673 | lua_assert(J->baseslot > 1); | ||
674 | gotresults++; | ||
675 | rbase += cbase; | ||
676 | J->baseslot -= (BCReg)cbase; | ||
677 | J->base -= cbase; | ||
678 | J->base[--rbase] = TREF_TRUE; /* Prepend true to results. */ | ||
679 | frame = frame_prevd(frame); | ||
680 | } | ||
681 | /* Return to lower frame via interpreter for unhandled cases. */ | ||
682 | if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) && | ||
683 | (!frame_islua(frame) || | ||
684 | (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) { | ||
685 | /* NYI: specialize to frame type and return directly, not via RET*. */ | ||
686 | for (i = -1; i < (ptrdiff_t)rbase; i++) | ||
687 | J->base[i] = 0; /* Purge dead slots. */ | ||
688 | J->maxslot = rbase + (BCReg)gotresults; | ||
689 | rec_stop(J, LJ_TRLINK_RETURN, 0); /* Return to interpreter. */ | ||
690 | return; | ||
691 | } | ||
692 | if (frame_isvarg(frame)) { | ||
693 | BCReg cbase = (BCReg)frame_delta(frame); | ||
694 | if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ | ||
695 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
696 | lua_assert(J->baseslot > 1); | ||
697 | rbase += cbase; | ||
698 | J->baseslot -= (BCReg)cbase; | ||
699 | J->base -= cbase; | ||
700 | frame = frame_prevd(frame); | ||
701 | } | ||
702 | if (frame_islua(frame)) { /* Return to Lua frame. */ | ||
703 | BCIns callins = *(frame_pc(frame)-1); | ||
704 | ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults; | ||
705 | BCReg cbase = bc_a(callins); | ||
706 | GCproto *pt = funcproto(frame_func(frame - (cbase+1))); | ||
707 | if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { | ||
708 | if (check_downrec_unroll(J, pt)) { | ||
709 | J->maxslot = (BCReg)(rbase + gotresults); | ||
710 | lj_snap_purge(J); | ||
711 | rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-recursion. */ | ||
712 | return; | ||
713 | } | ||
714 | lj_snap_add(J); | ||
715 | } | ||
716 | for (i = 0; i < nresults; i++) /* Adjust results. */ | ||
717 | J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL; | ||
718 | J->maxslot = cbase+(BCReg)nresults; | ||
719 | if (J->framedepth > 0) { /* Return to a frame that is part of the trace. */ | ||
720 | J->framedepth--; | ||
721 | lua_assert(J->baseslot > cbase+1); | ||
722 | J->baseslot -= cbase+1; | ||
723 | J->base -= cbase+1; | ||
724 | } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { | ||
725 | /* Return to lower frame would leave the loop in a root trace. */ | ||
726 | lj_trace_err(J, LJ_TRERR_LLEAVE); | ||
727 | } else { /* Return to lower frame. Guard for the target we return to. */ | ||
728 | TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); | ||
729 | TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); | ||
730 | emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc); | ||
731 | J->retdepth++; | ||
732 | J->needsnap = 1; | ||
733 | lua_assert(J->baseslot == 1); | ||
734 | /* Shift result slots up and clear the slots of the new frame below. */ | ||
735 | memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults); | ||
736 | memset(J->base-1, 0, sizeof(TRef)*(cbase+1)); | ||
737 | } | ||
738 | } else if (frame_iscont(frame)) { /* Return to continuation frame. */ | ||
739 | ASMFunction cont = frame_contf(frame); | ||
740 | BCReg cbase = (BCReg)frame_delta(frame); | ||
741 | if ((J->framedepth -= 2) < 0) | ||
742 | lj_trace_err(J, LJ_TRERR_NYIRETL); | ||
743 | J->baseslot -= (BCReg)cbase; | ||
744 | J->base -= cbase; | ||
745 | J->maxslot = cbase-2; | ||
746 | if (cont == lj_cont_ra) { | ||
747 | /* Copy result to destination slot. */ | ||
748 | BCReg dst = bc_a(*(frame_contpc(frame)-1)); | ||
749 | J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL; | ||
750 | if (dst >= J->maxslot) J->maxslot = dst+1; | ||
751 | } else if (cont == lj_cont_nop) { | ||
752 | /* Nothing to do here. */ | ||
753 | } else if (cont == lj_cont_cat) { | ||
754 | lua_assert(0); | ||
755 | } else { | ||
756 | /* Result type already specialized. */ | ||
757 | lua_assert(cont == lj_cont_condf || cont == lj_cont_condt); | ||
758 | } | ||
759 | } else { | ||
760 | lj_trace_err(J, LJ_TRERR_NYIRETL); /* NYI: handle return to C frame. */ | ||
761 | } | ||
762 | lua_assert(J->baseslot >= 1); | ||
763 | } | ||
764 | |||
765 | /* -- Metamethod handling ------------------------------------------------- */ | ||
766 | |||
767 | /* Prepare to record call to metamethod. */ | ||
768 | static BCReg rec_mm_prep(jit_State *J, ASMFunction cont) | ||
769 | { | ||
770 | BCReg s, top = curr_proto(J->L)->framesize; | ||
771 | TRef trcont; | ||
772 | setcont(&J->L->base[top], cont); | ||
773 | #if LJ_64 | ||
774 | trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin)); | ||
775 | #else | ||
776 | trcont = lj_ir_kptr(J, (void *)cont); | ||
777 | #endif | ||
778 | J->base[top] = trcont | TREF_CONT; | ||
779 | J->framedepth++; | ||
780 | for (s = J->maxslot; s < top; s++) | ||
781 | J->base[s] = 0; /* Clear frame gap to avoid resurrecting previous refs. */ | ||
782 | return top+1; | ||
783 | } | ||
784 | |||
785 | /* Record metamethod lookup. */ | ||
786 | int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm) | ||
787 | { | ||
788 | RecordIndex mix; | ||
789 | GCtab *mt; | ||
790 | if (tref_istab(ix->tab)) { | ||
791 | mt = tabref(tabV(&ix->tabv)->metatable); | ||
792 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); | ||
793 | } else if (tref_isudata(ix->tab)) { | ||
794 | int udtype = udataV(&ix->tabv)->udtype; | ||
795 | mt = tabref(udataV(&ix->tabv)->metatable); | ||
796 | /* The metatables of special userdata objects are treated as immutable. */ | ||
797 | if (udtype != UDTYPE_USERDATA) { | ||
798 | cTValue *mo; | ||
799 | if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) { | ||
800 | /* Specialize to the C library namespace object. */ | ||
801 | emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv))); | ||
802 | } else { | ||
803 | /* Specialize to the type of userdata. */ | ||
804 | TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE); | ||
805 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, udtype)); | ||
806 | } | ||
807 | immutable_mt: | ||
808 | mo = lj_tab_getstr(mt, mmname_str(J2G(J), mm)); | ||
809 | if (!mo || tvisnil(mo)) | ||
810 | return 0; /* No metamethod. */ | ||
811 | /* Treat metamethod or index table as immutable, too. */ | ||
812 | if (!(tvisfunc(mo) || tvistab(mo))) | ||
813 | lj_trace_err(J, LJ_TRERR_BADTYPE); | ||
814 | copyTV(J->L, &ix->mobjv, mo); | ||
815 | ix->mobj = lj_ir_kgc(J, gcV(mo), tvisfunc(mo) ? IRT_FUNC : IRT_TAB); | ||
816 | ix->mtv = mt; | ||
817 | ix->mt = TREF_NIL; /* Dummy value for comparison semantics. */ | ||
818 | return 1; /* Got metamethod or index table. */ | ||
819 | } | ||
820 | mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META); | ||
821 | } else { | ||
822 | /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */ | ||
823 | mt = tabref(basemt_obj(J2G(J), &ix->tabv)); | ||
824 | if (mt == NULL) { | ||
825 | ix->mt = TREF_NIL; | ||
826 | return 0; /* No metamethod. */ | ||
827 | } | ||
828 | /* The cdata metatable is treated as immutable. */ | ||
829 | if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt; | ||
830 | ix->mt = mix.tab = lj_ir_ktab(J, mt); | ||
831 | goto nocheck; | ||
832 | } | ||
833 | ix->mt = mt ? mix.tab : TREF_NIL; | ||
834 | emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB)); | ||
835 | nocheck: | ||
836 | if (mt) { | ||
837 | GCstr *mmstr = mmname_str(J2G(J), mm); | ||
838 | cTValue *mo = lj_tab_getstr(mt, mmstr); | ||
839 | if (mo && !tvisnil(mo)) | ||
840 | copyTV(J->L, &ix->mobjv, mo); | ||
841 | ix->mtv = mt; | ||
842 | settabV(J->L, &mix.tabv, mt); | ||
843 | setstrV(J->L, &mix.keyv, mmstr); | ||
844 | mix.key = lj_ir_kstr(J, mmstr); | ||
845 | mix.val = 0; | ||
846 | mix.idxchain = 0; | ||
847 | ix->mobj = lj_record_idx(J, &mix); | ||
848 | return !tref_isnil(ix->mobj); /* 1 if metamethod found, 0 if not. */ | ||
849 | } | ||
850 | return 0; /* No metamethod. */ | ||
851 | } | ||
852 | |||
853 | /* Record call to arithmetic metamethod. */ | ||
854 | static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm) | ||
855 | { | ||
856 | /* Set up metamethod call first to save ix->tab and ix->tabv. */ | ||
857 | BCReg func = rec_mm_prep(J, lj_cont_ra); | ||
858 | TRef *base = J->base + func; | ||
859 | TValue *basev = J->L->base + func; | ||
860 | base[1] = ix->tab; base[2] = ix->key; | ||
861 | copyTV(J->L, basev+1, &ix->tabv); | ||
862 | copyTV(J->L, basev+2, &ix->keyv); | ||
863 | if (!lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ | ||
864 | if (mm != MM_unm) { | ||
865 | ix->tab = ix->key; | ||
866 | copyTV(J->L, &ix->tabv, &ix->keyv); | ||
867 | if (lj_record_mm_lookup(J, ix, mm)) /* Lookup mm on 2nd operand. */ | ||
868 | goto ok; | ||
869 | } | ||
870 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
871 | } | ||
872 | ok: | ||
873 | base[0] = ix->mobj; | ||
874 | copyTV(J->L, basev+0, &ix->mobjv); | ||
875 | lj_record_call(J, func, 2); | ||
876 | return 0; /* No result yet. */ | ||
877 | } | ||
878 | |||
879 | /* Record call to __len metamethod. */ | ||
880 | static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv) | ||
881 | { | ||
882 | RecordIndex ix; | ||
883 | ix.tab = tr; | ||
884 | copyTV(J->L, &ix.tabv, tv); | ||
885 | if (lj_record_mm_lookup(J, &ix, MM_len)) { | ||
886 | BCReg func = rec_mm_prep(J, lj_cont_ra); | ||
887 | TRef *base = J->base + func; | ||
888 | TValue *basev = J->L->base + func; | ||
889 | base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv); | ||
890 | base[1] = tr; copyTV(J->L, basev+1, tv); | ||
891 | #ifdef LUAJIT_ENABLE_LUA52COMPAT | ||
892 | base[2] = tr; copyTV(J->L, basev+2, tv); | ||
893 | #else | ||
894 | base[2] = TREF_NIL; setnilV(basev+2); | ||
895 | #endif | ||
896 | lj_record_call(J, func, 2); | ||
897 | } else { | ||
898 | #ifdef LUAJIT_ENABLE_LUA52COMPAT | ||
899 | if (tref_istab(tr)) | ||
900 | return lj_ir_call(J, IRCALL_lj_tab_len, tr); | ||
901 | #endif | ||
902 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
903 | } | ||
904 | return 0; /* No result yet. */ | ||
905 | } | ||
906 | |||
907 | /* Call a comparison metamethod. */ | ||
908 | static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op) | ||
909 | { | ||
910 | BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt); | ||
911 | TRef *base = J->base + func; | ||
912 | TValue *tv = J->L->base + func; | ||
913 | base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key; | ||
914 | copyTV(J->L, tv+0, &ix->mobjv); | ||
915 | copyTV(J->L, tv+1, &ix->valv); | ||
916 | copyTV(J->L, tv+2, &ix->keyv); | ||
917 | lj_record_call(J, func, 2); | ||
918 | } | ||
919 | |||
920 | /* Record call to equality comparison metamethod (for tab and udata only). */ | ||
921 | static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op) | ||
922 | { | ||
923 | ix->tab = ix->val; | ||
924 | copyTV(J->L, &ix->tabv, &ix->valv); | ||
925 | if (lj_record_mm_lookup(J, ix, MM_eq)) { /* Lookup mm on 1st operand. */ | ||
926 | cTValue *bv; | ||
927 | TRef mo1 = ix->mobj; | ||
928 | TValue mo1v; | ||
929 | copyTV(J->L, &mo1v, &ix->mobjv); | ||
930 | /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ | ||
931 | bv = &ix->keyv; | ||
932 | if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { | ||
933 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); | ||
934 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
935 | } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { | ||
936 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); | ||
937 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
938 | } else { /* Lookup metamethod on 2nd operand and compare both. */ | ||
939 | ix->tab = ix->key; | ||
940 | copyTV(J->L, &ix->tabv, bv); | ||
941 | if (!lj_record_mm_lookup(J, ix, MM_eq) || | ||
942 | lj_record_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) | ||
943 | return; | ||
944 | } | ||
945 | rec_mm_callcomp(J, ix, op); | ||
946 | } | ||
947 | } | ||
948 | |||
949 | /* Record call to ordered comparison metamethods (for arbitrary objects). */ | ||
950 | static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op) | ||
951 | { | ||
952 | ix->tab = ix->val; | ||
953 | copyTV(J->L, &ix->tabv, &ix->valv); | ||
954 | while (1) { | ||
955 | MMS mm = (op & 2) ? MM_le : MM_lt; /* Try __le + __lt or only __lt. */ | ||
956 | if (lj_record_mm_lookup(J, ix, mm)) { /* Lookup mm on 1st operand. */ | ||
957 | cTValue *bv; | ||
958 | TRef mo1 = ix->mobj; | ||
959 | TValue mo1v; | ||
960 | copyTV(J->L, &mo1v, &ix->mobjv); | ||
961 | /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */ | ||
962 | bv = &ix->keyv; | ||
963 | if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) { | ||
964 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META); | ||
965 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
966 | } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) { | ||
967 | TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META); | ||
968 | emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt); | ||
969 | } else { /* Lookup metamethod on 2nd operand and compare both. */ | ||
970 | ix->tab = ix->key; | ||
971 | copyTV(J->L, &ix->tabv, bv); | ||
972 | if (!lj_record_mm_lookup(J, ix, mm) || | ||
973 | lj_record_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv)) | ||
974 | goto nomatch; | ||
975 | } | ||
976 | rec_mm_callcomp(J, ix, op); | ||
977 | return; | ||
978 | } | ||
979 | nomatch: | ||
980 | /* First lookup failed. Retry with __lt and swapped operands. */ | ||
981 | if (!(op & 2)) break; /* Already at __lt. Interpreter will throw. */ | ||
982 | ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab; | ||
983 | copyTV(J->L, &ix->tabv, &ix->keyv); | ||
984 | copyTV(J->L, &ix->keyv, &ix->valv); | ||
985 | copyTV(J->L, &ix->valv, &ix->tabv); | ||
986 | op ^= 3; | ||
987 | } | ||
988 | } | ||
989 | |||
990 | #if LJ_HASFFI | ||
991 | /* Setup call to cdata comparison metamethod. */ | ||
992 | static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm) | ||
993 | { | ||
994 | lj_snap_add(J); | ||
995 | if (tref_iscdata(ix->val)) { | ||
996 | ix->tab = ix->val; | ||
997 | copyTV(J->L, &ix->tabv, &ix->valv); | ||
998 | } else { | ||
999 | lua_assert(tref_iscdata(ix->key)); | ||
1000 | ix->tab = ix->key; | ||
1001 | copyTV(J->L, &ix->tabv, &ix->keyv); | ||
1002 | } | ||
1003 | lj_record_mm_lookup(J, ix, mm); | ||
1004 | rec_mm_callcomp(J, ix, op); | ||
1005 | } | ||
1006 | #endif | ||
1007 | |||
1008 | /* -- Indexed access ------------------------------------------------------ */ | ||
1009 | |||
1010 | /* Record bounds-check. */ | ||
1011 | static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize) | ||
1012 | { | ||
1013 | /* Try to emit invariant bounds checks. */ | ||
1014 | if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) == | ||
1015 | (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) { | ||
1016 | IRRef ref = tref_ref(ikey); | ||
1017 | IRIns *ir = IR(ref); | ||
1018 | int32_t ofs = 0; | ||
1019 | IRRef ofsref = 0; | ||
1020 | /* Handle constant offsets. */ | ||
1021 | if (ir->o == IR_ADD && irref_isk(ir->op2)) { | ||
1022 | ofsref = ir->op2; | ||
1023 | ofs = IR(ofsref)->i; | ||
1024 | ref = ir->op1; | ||
1025 | ir = IR(ref); | ||
1026 | } | ||
1027 | /* Got scalar evolution analysis results for this reference? */ | ||
1028 | if (ref == J->scev.idx) { | ||
1029 | int32_t stop; | ||
1030 | lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD); | ||
1031 | stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]); | ||
1032 | /* Runtime value for stop of loop is within bounds? */ | ||
1033 | if ((int64_t)stop + ofs < (int64_t)asize) { | ||
1034 | /* Emit invariant bounds check for stop. */ | ||
1035 | emitir(IRTG(IR_ABC, IRT_P32), asizeref, ofs == 0 ? J->scev.stop : | ||
1036 | emitir(IRTI(IR_ADD), J->scev.stop, ofsref)); | ||
1037 | /* Emit invariant bounds check for start, if not const or negative. */ | ||
1038 | if (!(J->scev.dir && J->scev.start && | ||
1039 | (int64_t)IR(J->scev.start)->i + ofs >= 0)) | ||
1040 | emitir(IRTG(IR_ABC, IRT_P32), asizeref, ikey); | ||
1041 | return; | ||
1042 | } | ||
1043 | } | ||
1044 | } | ||
1045 | emitir(IRTGI(IR_ABC), asizeref, ikey); /* Emit regular bounds check. */ | ||
1046 | } | ||
1047 | |||
1048 | /* Record indexed key lookup. */ | ||
1049 | static TRef rec_idx_key(jit_State *J, RecordIndex *ix) | ||
1050 | { | ||
1051 | TRef key; | ||
1052 | GCtab *t = tabV(&ix->tabv); | ||
1053 | ix->oldv = lj_tab_get(J->L, t, &ix->keyv); /* Lookup previous value. */ | ||
1054 | |||
1055 | /* Integer keys are looked up in the array part first. */ | ||
1056 | key = ix->key; | ||
1057 | if (tref_isnumber(key)) { | ||
1058 | int32_t k = numberVint(&ix->keyv); | ||
1059 | if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k) | ||
1060 | k = LJ_MAX_ASIZE; | ||
1061 | if ((MSize)k < LJ_MAX_ASIZE) { /* Potential array key? */ | ||
1062 | TRef ikey = lj_opt_narrow_index(J, key); | ||
1063 | TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | ||
1064 | if ((MSize)k < t->asize) { /* Currently an array key? */ | ||
1065 | TRef arrayref; | ||
1066 | rec_idx_abc(J, asizeref, ikey, t->asize); | ||
1067 | arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY); | ||
1068 | return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey); | ||
1069 | } else { /* Currently not in array (may be an array extension)? */ | ||
1070 | emitir(IRTGI(IR_ULE), asizeref, ikey); /* Inv. bounds check. */ | ||
1071 | if (k == 0 && tref_isk(key)) | ||
1072 | key = lj_ir_knum_zero(J); /* Canonicalize 0 or +-0.0 to +0.0. */ | ||
1073 | /* And continue with the hash lookup. */ | ||
1074 | } | ||
1075 | } else if (!tref_isk(key)) { | ||
1076 | /* We can rule out const numbers which failed the integerness test | ||
1077 | ** above. But all other numbers are potential array keys. | ||
1078 | */ | ||
1079 | if (t->asize == 0) { /* True sparse tables have an empty array part. */ | ||
1080 | /* Guard that the array part stays empty. */ | ||
1081 | TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE); | ||
1082 | emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0)); | ||
1083 | } else { | ||
1084 | lj_trace_err(J, LJ_TRERR_NYITMIX); | ||
1085 | } | ||
1086 | } | ||
1087 | } | ||
1088 | |||
1089 | /* Otherwise the key is located in the hash part. */ | ||
1090 | if (t->hmask == 0) { /* Shortcut for empty hash part. */ | ||
1091 | /* Guard that the hash part stays empty. */ | ||
1092 | TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | ||
1093 | emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0)); | ||
1094 | return lj_ir_kkptr(J, niltvg(J2G(J))); | ||
1095 | } | ||
1096 | if (tref_isinteger(key)) /* Hash keys are based on numbers, not ints. */ | ||
1097 | ix->key = key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); | ||
1098 | if (tref_isk(key)) { | ||
1099 | /* Optimize lookup of constant hash keys. */ | ||
1100 | MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val); | ||
1101 | if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) && | ||
1102 | hslot <= 65535*(MSize)sizeof(Node)) { | ||
1103 | TRef node, kslot; | ||
1104 | TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK); | ||
1105 | emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask)); | ||
1106 | node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE); | ||
1107 | kslot = lj_ir_kslot(J, key, hslot / sizeof(Node)); | ||
1108 | return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot); | ||
1109 | } | ||
1110 | } | ||
1111 | /* Fall back to a regular hash lookup. */ | ||
1112 | return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key); | ||
1113 | } | ||
1114 | |||
1115 | /* Determine whether a key is NOT one of the fast metamethod names. */ | ||
1116 | static int nommstr(jit_State *J, TRef key) | ||
1117 | { | ||
1118 | if (tref_isstr(key)) { | ||
1119 | if (tref_isk(key)) { | ||
1120 | GCstr *str = ir_kstr(IR(tref_ref(key))); | ||
1121 | uint32_t mm; | ||
1122 | for (mm = 0; mm <= MM_FAST; mm++) | ||
1123 | if (mmname_str(J2G(J), mm) == str) | ||
1124 | return 0; /* MUST be one the fast metamethod names. */ | ||
1125 | } else { | ||
1126 | return 0; /* Variable string key MAY be a metamethod name. */ | ||
1127 | } | ||
1128 | } | ||
1129 | return 1; /* CANNOT be a metamethod name. */ | ||
1130 | } | ||
1131 | |||
1132 | /* Record indexed load/store. */ | ||
1133 | TRef lj_record_idx(jit_State *J, RecordIndex *ix) | ||
1134 | { | ||
1135 | TRef xref; | ||
1136 | IROp xrefop, loadop; | ||
1137 | cTValue *oldv; | ||
1138 | |||
1139 | while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */ | ||
1140 | /* Never call raw lj_record_idx() on non-table. */ | ||
1141 | lua_assert(ix->idxchain != 0); | ||
1142 | if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index)) | ||
1143 | lj_trace_err(J, LJ_TRERR_NOMM); | ||
1144 | handlemm: | ||
1145 | if (tref_isfunc(ix->mobj)) { /* Handle metamethod call. */ | ||
1146 | BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra); | ||
1147 | TRef *base = J->base + func; | ||
1148 | TValue *tv = J->L->base + func; | ||
1149 | base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key; | ||
1150 | setfuncV(J->L, tv+0, funcV(&ix->mobjv)); | ||
1151 | copyTV(J->L, tv+1, &ix->tabv); | ||
1152 | copyTV(J->L, tv+2, &ix->keyv); | ||
1153 | if (ix->val) { | ||
1154 | base[3] = ix->val; | ||
1155 | copyTV(J->L, tv+3, &ix->valv); | ||
1156 | lj_record_call(J, func, 3); /* mobj(tab, key, val) */ | ||
1157 | return 0; | ||
1158 | } else { | ||
1159 | lj_record_call(J, func, 2); /* res = mobj(tab, key) */ | ||
1160 | return 0; /* No result yet. */ | ||
1161 | } | ||
1162 | } | ||
1163 | /* Otherwise retry lookup with metaobject. */ | ||
1164 | ix->tab = ix->mobj; | ||
1165 | copyTV(J->L, &ix->tabv, &ix->mobjv); | ||
1166 | if (--ix->idxchain == 0) | ||
1167 | lj_trace_err(J, LJ_TRERR_IDXLOOP); | ||
1168 | } | ||
1169 | |||
1170 | /* First catch nil and NaN keys for tables. */ | ||
1171 | if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) { | ||
1172 | if (ix->val) /* Better fail early. */ | ||
1173 | lj_trace_err(J, LJ_TRERR_STORENN); | ||
1174 | if (tref_isk(ix->key)) { | ||
1175 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) | ||
1176 | goto handlemm; | ||
1177 | return TREF_NIL; | ||
1178 | } | ||
1179 | } | ||
1180 | |||
1181 | /* Record the key lookup. */ | ||
1182 | xref = rec_idx_key(J, ix); | ||
1183 | xrefop = IR(tref_ref(xref))->o; | ||
1184 | loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD; | ||
1185 | /* The lj_meta_tset() inconsistency is gone, but better play safe. */ | ||
1186 | oldv = xrefop == IR_KKPTR ? (cTValue *)ir_kptr(IR(tref_ref(xref))) : ix->oldv; | ||
1187 | |||
1188 | if (ix->val == 0) { /* Indexed load */ | ||
1189 | IRType t = itype2irt(oldv); | ||
1190 | TRef res; | ||
1191 | if (oldv == niltvg(J2G(J))) { | ||
1192 | emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | ||
1193 | res = TREF_NIL; | ||
1194 | } else { | ||
1195 | res = emitir(IRTG(loadop, t), xref, 0); | ||
1196 | } | ||
1197 | if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index)) | ||
1198 | goto handlemm; | ||
1199 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitives. */ | ||
1200 | return res; | ||
1201 | } else { /* Indexed store. */ | ||
1202 | GCtab *mt = tabref(tabV(&ix->tabv)->metatable); | ||
1203 | int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val); | ||
1204 | if (tvisnil(oldv)) { /* Previous value was nil? */ | ||
1205 | /* Need to duplicate the hasmm check for the early guards. */ | ||
1206 | int hasmm = 0; | ||
1207 | if (ix->idxchain && mt) { | ||
1208 | cTValue *mo = lj_tab_getstr(mt, mmname_str(J2G(J), MM_newindex)); | ||
1209 | hasmm = mo && !tvisnil(mo); | ||
1210 | } | ||
1211 | if (hasmm) | ||
1212 | emitir(IRTG(loadop, IRT_NIL), xref, 0); /* Guard for nil value. */ | ||
1213 | else if (xrefop == IR_HREF) | ||
1214 | emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32), | ||
1215 | xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | ||
1216 | if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) { | ||
1217 | lua_assert(hasmm); | ||
1218 | goto handlemm; | ||
1219 | } | ||
1220 | lua_assert(!hasmm); | ||
1221 | if (oldv == niltvg(J2G(J))) { /* Need to insert a new key. */ | ||
1222 | TRef key = ix->key; | ||
1223 | if (tref_isinteger(key)) /* NEWREF needs a TValue as a key. */ | ||
1224 | key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT); | ||
1225 | xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key); | ||
1226 | keybarrier = 0; /* NEWREF already takes care of the key barrier. */ | ||
1227 | } | ||
1228 | } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) { | ||
1229 | /* Cannot derive that the previous value was non-nil, must do checks. */ | ||
1230 | if (xrefop == IR_HREF) /* Guard against store to niltv. */ | ||
1231 | emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J)))); | ||
1232 | if (ix->idxchain) { /* Metamethod lookup required? */ | ||
1233 | /* A check for NULL metatable is cheaper (hoistable) than a load. */ | ||
1234 | if (!mt) { | ||
1235 | TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META); | ||
1236 | emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB)); | ||
1237 | } else { | ||
1238 | IRType t = itype2irt(oldv); | ||
1239 | emitir(IRTG(loadop, t), xref, 0); /* Guard for non-nil value. */ | ||
1240 | } | ||
1241 | } | ||
1242 | } else { | ||
1243 | keybarrier = 0; /* Previous non-nil value kept the key alive. */ | ||
1244 | } | ||
1245 | /* Convert int to number before storing. */ | ||
1246 | if (!LJ_DUALNUM && tref_isinteger(ix->val)) | ||
1247 | ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT); | ||
1248 | emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val); | ||
1249 | if (keybarrier || tref_isgcv(ix->val)) | ||
1250 | emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0); | ||
1251 | /* Invalidate neg. metamethod cache for stores with certain string keys. */ | ||
1252 | if (!nommstr(J, ix->key)) { | ||
1253 | TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM); | ||
1254 | emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0)); | ||
1255 | } | ||
1256 | J->needsnap = 1; | ||
1257 | return 0; | ||
1258 | } | ||
1259 | } | ||
1260 | |||
1261 | /* -- Upvalue access ------------------------------------------------------ */ | ||
1262 | |||
1263 | /* Record upvalue load/store. */ | ||
1264 | static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val) | ||
1265 | { | ||
1266 | GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv; | ||
1267 | TRef fn = getcurrf(J); | ||
1268 | IRRef uref; | ||
1269 | int needbarrier = 0; | ||
1270 | /* Note: this effectively limits LJ_MAX_UPVAL to 127. */ | ||
1271 | uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff); | ||
1272 | if (!uvp->closed) { | ||
1273 | /* In current stack? */ | ||
1274 | if (uvval(uvp) >= tvref(J->L->stack) && | ||
1275 | uvval(uvp) < tvref(J->L->maxstack)) { | ||
1276 | int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot)); | ||
1277 | if (slot >= 0) { /* Aliases an SSA slot? */ | ||
1278 | slot -= (int32_t)J->baseslot; /* Note: slot number may be negative! */ | ||
1279 | /* NYI: add IR to guard that it's still aliasing the same slot. */ | ||
1280 | if (val == 0) { | ||
1281 | return getslot(J, slot); | ||
1282 | } else { | ||
1283 | J->base[slot] = val; | ||
1284 | if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1); | ||
1285 | return 0; | ||
1286 | } | ||
1287 | } | ||
1288 | } | ||
1289 | uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv)); | ||
1290 | } else { | ||
1291 | needbarrier = 1; | ||
1292 | uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv)); | ||
1293 | } | ||
1294 | if (val == 0) { /* Upvalue load */ | ||
1295 | IRType t = itype2irt(uvval(uvp)); | ||
1296 | TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0); | ||
1297 | if (irtype_ispri(t)) res = TREF_PRI(t); /* Canonicalize primitive refs. */ | ||
1298 | return res; | ||
1299 | } else { /* Upvalue store. */ | ||
1300 | /* Convert int to number before storing. */ | ||
1301 | if (!LJ_DUALNUM && tref_isinteger(val)) | ||
1302 | val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | ||
1303 | emitir(IRT(IR_USTORE, tref_type(val)), uref, val); | ||
1304 | if (needbarrier && tref_isgcv(val)) | ||
1305 | emitir(IRT(IR_OBAR, IRT_NIL), uref, val); | ||
1306 | J->needsnap = 1; | ||
1307 | return 0; | ||
1308 | } | ||
1309 | } | ||
1310 | |||
1311 | /* -- Record calls to Lua functions --------------------------------------- */ | ||
1312 | |||
1313 | /* Check unroll limits for calls. */ | ||
1314 | static void check_call_unroll(jit_State *J, TraceNo lnk) | ||
1315 | { | ||
1316 | cTValue *frame = J->L->base - 1; | ||
1317 | void *pc = mref(frame_func(frame)->l.pc, void); | ||
1318 | int32_t depth = J->framedepth; | ||
1319 | int32_t count = 0; | ||
1320 | if ((J->pt->flags & PROTO_VARARG)) depth--; /* Vararg frame still missing. */ | ||
1321 | for (; depth > 0; depth--) { /* Count frames with same prototype. */ | ||
1322 | frame = frame_prev(frame); | ||
1323 | if (mref(frame_func(frame)->l.pc, void) == pc) | ||
1324 | count++; | ||
1325 | } | ||
1326 | if (J->pc == J->startpc) { | ||
1327 | if (count + J->tailcalled > J->param[JIT_P_recunroll]) { | ||
1328 | J->pc++; | ||
1329 | if (J->framedepth + J->retdepth == 0) | ||
1330 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Tail-recursion. */ | ||
1331 | else | ||
1332 | rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno); /* Up-recursion. */ | ||
1333 | } | ||
1334 | } else { | ||
1335 | if (count > J->param[JIT_P_callunroll]) { | ||
1336 | if (lnk) { /* Possible tail- or up-recursion. */ | ||
1337 | lj_trace_flush(J, lnk); /* Flush trace that only returns. */ | ||
1338 | /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ | ||
1339 | hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); | ||
1340 | } | ||
1341 | lj_trace_err(J, LJ_TRERR_CUNROLL); | ||
1342 | } | ||
1343 | } | ||
1344 | } | ||
1345 | |||
1346 | /* Record Lua function setup. */ | ||
1347 | static void rec_func_setup(jit_State *J) | ||
1348 | { | ||
1349 | GCproto *pt = J->pt; | ||
1350 | BCReg s, numparams = pt->numparams; | ||
1351 | if ((pt->flags & PROTO_NOJIT)) | ||
1352 | lj_trace_err(J, LJ_TRERR_CJITOFF); | ||
1353 | if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS) | ||
1354 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
1355 | /* Fill up missing parameters with nil. */ | ||
1356 | for (s = J->maxslot; s < numparams; s++) | ||
1357 | J->base[s] = TREF_NIL; | ||
1358 | /* The remaining slots should never be read before they are written. */ | ||
1359 | J->maxslot = numparams; | ||
1360 | } | ||
1361 | |||
1362 | /* Record Lua vararg function setup. */ | ||
1363 | static void rec_func_vararg(jit_State *J) | ||
1364 | { | ||
1365 | GCproto *pt = J->pt; | ||
1366 | BCReg s, fixargs, vframe = J->maxslot+1; | ||
1367 | lua_assert((pt->flags & PROTO_VARARG)); | ||
1368 | if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS) | ||
1369 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
1370 | J->base[vframe-1] = J->base[-1]; /* Copy function up. */ | ||
1371 | /* Copy fixarg slots up and set their original slots to nil. */ | ||
1372 | fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot; | ||
1373 | for (s = 0; s < fixargs; s++) { | ||
1374 | J->base[vframe+s] = J->base[s]; | ||
1375 | J->base[s] = TREF_NIL; | ||
1376 | } | ||
1377 | J->maxslot = fixargs; | ||
1378 | J->framedepth++; | ||
1379 | J->base += vframe; | ||
1380 | J->baseslot += vframe; | ||
1381 | } | ||
1382 | |||
1383 | /* Record entry to a Lua function. */ | ||
1384 | static void rec_func_lua(jit_State *J) | ||
1385 | { | ||
1386 | rec_func_setup(J); | ||
1387 | check_call_unroll(J, 0); | ||
1388 | } | ||
1389 | |||
1390 | /* Record entry to an already compiled function. */ | ||
1391 | static void rec_func_jit(jit_State *J, TraceNo lnk) | ||
1392 | { | ||
1393 | GCtrace *T; | ||
1394 | rec_func_setup(J); | ||
1395 | T = traceref(J, lnk); | ||
1396 | if (T->linktype == LJ_TRLINK_RETURN) { /* Trace returns to interpreter? */ | ||
1397 | check_call_unroll(J, lnk); | ||
1398 | /* Temporarily unpatch JFUNC* to continue recording across function. */ | ||
1399 | J->patchins = *J->pc; | ||
1400 | J->patchpc = (BCIns *)J->pc; | ||
1401 | *J->patchpc = T->startins; | ||
1402 | return; | ||
1403 | } | ||
1404 | J->instunroll = 0; /* Cannot continue across a compiled function. */ | ||
1405 | if (J->pc == J->startpc && J->framedepth + J->retdepth == 0) | ||
1406 | rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno); /* Extra tail-recursion. */ | ||
1407 | else | ||
1408 | rec_stop(J, LJ_TRLINK_ROOT, lnk); /* Link to the function. */ | ||
1409 | } | ||
1410 | |||
1411 | /* -- Vararg handling ----------------------------------------------------- */ | ||
1412 | |||
1413 | /* Detect y = select(x, ...) idiom. */ | ||
1414 | static int select_detect(jit_State *J) | ||
1415 | { | ||
1416 | BCIns ins = J->pc[1]; | ||
1417 | if (bc_op(ins) == BC_CALLM && bc_b(ins) == 2 && bc_c(ins) == 1) { | ||
1418 | cTValue *func = &J->L->base[bc_a(ins)]; | ||
1419 | if (tvisfunc(func) && funcV(func)->c.ffid == FF_select) | ||
1420 | return 1; | ||
1421 | } | ||
1422 | return 0; | ||
1423 | } | ||
1424 | |||
1425 | /* Record vararg instruction. */ | ||
1426 | static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults) | ||
1427 | { | ||
1428 | int32_t numparams = J->pt->numparams; | ||
1429 | ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1; | ||
1430 | lua_assert(frame_isvarg(J->L->base-1)); | ||
1431 | if (J->framedepth > 0) { /* Simple case: varargs defined on-trace. */ | ||
1432 | ptrdiff_t i; | ||
1433 | if (nvararg < 0) nvararg = 0; | ||
1434 | if (nresults == -1) { | ||
1435 | nresults = nvararg; | ||
1436 | J->maxslot = dst + (BCReg)nvararg; | ||
1437 | } else if (dst + nresults > J->maxslot) { | ||
1438 | J->maxslot = dst + (BCReg)nresults; | ||
1439 | } | ||
1440 | for (i = 0; i < nresults; i++) { | ||
1441 | J->base[dst+i] = i < nvararg ? J->base[i - nvararg - 1] : TREF_NIL; | ||
1442 | lua_assert(J->base[dst+i] != 0); | ||
1443 | } | ||
1444 | } else { /* Unknown number of varargs passed to trace. */ | ||
1445 | TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME); | ||
1446 | int32_t frofs = 8*(1+numparams)+FRAME_VARG; | ||
1447 | if (nresults >= 0) { /* Known fixed number of results. */ | ||
1448 | ptrdiff_t i; | ||
1449 | if (nvararg > 0) { | ||
1450 | ptrdiff_t nload = nvararg >= nresults ? nresults : nvararg; | ||
1451 | TRef vbase; | ||
1452 | if (nvararg >= nresults) | ||
1453 | emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults)); | ||
1454 | else | ||
1455 | emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1))); | ||
1456 | vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | ||
1457 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | ||
1458 | for (i = 0; i < nload; i++) { | ||
1459 | IRType t = itype2irt(&J->L->base[i-1-nvararg]); | ||
1460 | TRef aref = emitir(IRT(IR_AREF, IRT_P32), | ||
1461 | vbase, lj_ir_kint(J, (int32_t)i)); | ||
1462 | TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | ||
1463 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | ||
1464 | J->base[dst+i] = tr; | ||
1465 | } | ||
1466 | } else { | ||
1467 | emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs)); | ||
1468 | nvararg = 0; | ||
1469 | } | ||
1470 | for (i = nvararg; i < nresults; i++) | ||
1471 | J->base[dst+i] = TREF_NIL; | ||
1472 | if (dst + (BCReg)nresults > J->maxslot) | ||
1473 | J->maxslot = dst + (BCReg)nresults; | ||
1474 | } else if (select_detect(J)) { /* y = select(x, ...) */ | ||
1475 | TRef tridx = J->base[dst-1]; | ||
1476 | TRef tr = TREF_NIL; | ||
1477 | ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]); | ||
1478 | if (idx < 0) goto nyivarg; | ||
1479 | if (idx != 0 && !tref_isinteger(tridx)) | ||
1480 | tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX); | ||
1481 | if (idx != 0 && tref_isk(tridx)) { | ||
1482 | emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT), | ||
1483 | fr, lj_ir_kint(J, frofs+8*(int32_t)idx)); | ||
1484 | frofs -= 8; /* Bias for 1-based index. */ | ||
1485 | } else if (idx <= nvararg) { /* Compute size. */ | ||
1486 | TRef tmp = emitir(IRTI(IR_ADD), fr, lj_ir_kint(J, -frofs)); | ||
1487 | if (numparams) | ||
1488 | emitir(IRTGI(IR_GE), tmp, lj_ir_kint(J, 0)); | ||
1489 | tr = emitir(IRTI(IR_BSHR), tmp, lj_ir_kint(J, 3)); | ||
1490 | if (idx != 0) { | ||
1491 | tridx = emitir(IRTI(IR_ADD), tridx, lj_ir_kint(J, -1)); | ||
1492 | rec_idx_abc(J, tr, tridx, (uint32_t)nvararg); | ||
1493 | } | ||
1494 | } else { | ||
1495 | TRef tmp = lj_ir_kint(J, frofs); | ||
1496 | if (idx != 0) { | ||
1497 | TRef tmp2 = emitir(IRTI(IR_BSHL), tridx, lj_ir_kint(J, 3)); | ||
1498 | tmp = emitir(IRTI(IR_ADD), tmp2, tmp); | ||
1499 | } else { | ||
1500 | tr = lj_ir_kint(J, 0); | ||
1501 | } | ||
1502 | emitir(IRTGI(IR_LT), fr, tmp); | ||
1503 | } | ||
1504 | if (idx != 0 && idx <= nvararg) { | ||
1505 | IRType t; | ||
1506 | TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr); | ||
1507 | vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8)); | ||
1508 | t = itype2irt(&J->L->base[idx-2-nvararg]); | ||
1509 | aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx); | ||
1510 | tr = emitir(IRTG(IR_VLOAD, t), aref, 0); | ||
1511 | if (irtype_ispri(t)) tr = TREF_PRI(t); /* Canonicalize primitives. */ | ||
1512 | } | ||
1513 | J->base[dst-2] = tr; | ||
1514 | J->maxslot = dst-1; | ||
1515 | J->bcskip = 2; /* Skip CALLM + select. */ | ||
1516 | } else { | ||
1517 | nyivarg: | ||
1518 | setintV(&J->errinfo, BC_VARG); | ||
1519 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | ||
1520 | } | ||
1521 | } | ||
1522 | } | ||
1523 | |||
1524 | /* -- Record allocations -------------------------------------------------- */ | ||
1525 | |||
1526 | static TRef rec_tnew(jit_State *J, uint32_t ah) | ||
1527 | { | ||
1528 | uint32_t asize = ah & 0x7ff; | ||
1529 | uint32_t hbits = ah >> 11; | ||
1530 | if (asize == 0x7ff) asize = 0x801; | ||
1531 | return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits); | ||
1532 | } | ||
1533 | |||
1534 | /* -- Record bytecode ops ------------------------------------------------- */ | ||
1535 | |||
1536 | /* Prepare for comparison. */ | ||
1537 | static void rec_comp_prep(jit_State *J) | ||
1538 | { | ||
1539 | /* Prevent merging with snapshot #0 (GC exit) since we fixup the PC. */ | ||
1540 | if (J->cur.nsnap == 1 && J->cur.snap[0].ref == J->cur.nins) | ||
1541 | emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); | ||
1542 | lj_snap_add(J); | ||
1543 | } | ||
1544 | |||
1545 | /* Fixup comparison. */ | ||
1546 | static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond) | ||
1547 | { | ||
1548 | BCIns jmpins = pc[1]; | ||
1549 | const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0); | ||
1550 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | ||
1551 | /* Set PC to opposite target to avoid re-recording the comp. in side trace. */ | ||
1552 | J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc); | ||
1553 | J->needsnap = 1; | ||
1554 | if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins); | ||
1555 | lj_snap_shrink(J); /* Shrink last snapshot if possible. */ | ||
1556 | } | ||
1557 | |||
1558 | /* Record the next bytecode instruction (_before_ it's executed). */ | ||
1559 | void lj_record_ins(jit_State *J) | ||
1560 | { | ||
1561 | cTValue *lbase; | ||
1562 | RecordIndex ix; | ||
1563 | const BCIns *pc; | ||
1564 | BCIns ins; | ||
1565 | BCOp op; | ||
1566 | TRef ra, rb, rc; | ||
1567 | |||
1568 | /* Perform post-processing action before recording the next instruction. */ | ||
1569 | if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { | ||
1570 | switch (J->postproc) { | ||
1571 | case LJ_POST_FIXCOMP: /* Fixup comparison. */ | ||
1572 | pc = frame_pc(&J2G(J)->tmptv); | ||
1573 | rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1))); | ||
1574 | /* fallthrough */ | ||
1575 | case LJ_POST_FIXGUARD: /* Fixup and emit pending guard. */ | ||
1576 | case LJ_POST_FIXGUARDSNAP: /* Fixup and emit pending guard and snapshot. */ | ||
1577 | if (!tvistruecond(&J2G(J)->tmptv2)) { | ||
1578 | J->fold.ins.o ^= 1; /* Flip guard to opposite. */ | ||
1579 | if (J->postproc == LJ_POST_FIXGUARDSNAP) { | ||
1580 | SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | ||
1581 | J->cur.snapmap[snap->mapofs+snap->nent-1]--; /* False -> true. */ | ||
1582 | } | ||
1583 | } | ||
1584 | lj_opt_fold(J); /* Emit pending guard. */ | ||
1585 | /* fallthrough */ | ||
1586 | case LJ_POST_FIXBOOL: | ||
1587 | if (!tvistruecond(&J2G(J)->tmptv2)) { | ||
1588 | BCReg s; | ||
1589 | for (s = 0; s < J->maxslot; s++) /* Fixup stack slot (if any). */ | ||
1590 | if (J->base[s] == TREF_TRUE && tvisfalse(&J->L->base[s])) { | ||
1591 | J->base[s] = TREF_FALSE; | ||
1592 | break; | ||
1593 | } | ||
1594 | } | ||
1595 | break; | ||
1596 | case LJ_POST_FFRETRY: /* Suppress recording of retried fast function. */ | ||
1597 | if (bc_op(*J->pc) >= BC__MAX) | ||
1598 | return; | ||
1599 | break; | ||
1600 | default: lua_assert(0); break; | ||
1601 | } | ||
1602 | J->postproc = LJ_POST_NONE; | ||
1603 | } | ||
1604 | |||
1605 | /* Need snapshot before recording next bytecode (e.g. after a store). */ | ||
1606 | if (J->needsnap) { | ||
1607 | J->needsnap = 0; | ||
1608 | lj_snap_purge(J); | ||
1609 | lj_snap_add(J); | ||
1610 | J->mergesnap = 1; | ||
1611 | } | ||
1612 | |||
1613 | /* Skip some bytecodes. */ | ||
1614 | if (LJ_UNLIKELY(J->bcskip > 0)) { | ||
1615 | J->bcskip--; | ||
1616 | return; | ||
1617 | } | ||
1618 | |||
1619 | /* Record only closed loops for root traces. */ | ||
1620 | pc = J->pc; | ||
1621 | if (J->framedepth == 0 && | ||
1622 | (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent) | ||
1623 | lj_trace_err(J, LJ_TRERR_LLEAVE); | ||
1624 | |||
1625 | #ifdef LUA_USE_ASSERT | ||
1626 | rec_check_slots(J); | ||
1627 | rec_check_ir(J); | ||
1628 | #endif | ||
1629 | |||
1630 | /* Keep a copy of the runtime values of var/num/str operands. */ | ||
1631 | #define rav (&ix.valv) | ||
1632 | #define rbv (&ix.tabv) | ||
1633 | #define rcv (&ix.keyv) | ||
1634 | |||
1635 | lbase = J->L->base; | ||
1636 | ins = *pc; | ||
1637 | op = bc_op(ins); | ||
1638 | ra = bc_a(ins); | ||
1639 | ix.val = 0; | ||
1640 | switch (bcmode_a(op)) { | ||
1641 | case BCMvar: | ||
1642 | copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break; | ||
1643 | default: break; /* Handled later. */ | ||
1644 | } | ||
1645 | rb = bc_b(ins); | ||
1646 | rc = bc_c(ins); | ||
1647 | switch (bcmode_b(op)) { | ||
1648 | case BCMnone: rb = 0; rc = bc_d(ins); break; /* Upgrade rc to 'rd'. */ | ||
1649 | case BCMvar: | ||
1650 | copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break; | ||
1651 | default: break; /* Handled later. */ | ||
1652 | } | ||
1653 | switch (bcmode_c(op)) { | ||
1654 | case BCMvar: | ||
1655 | copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break; | ||
1656 | case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break; | ||
1657 | case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc); | ||
1658 | copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) : | ||
1659 | lj_ir_knumint(J, numV(tv)); } break; | ||
1660 | case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc)); | ||
1661 | setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break; | ||
1662 | default: break; /* Handled later. */ | ||
1663 | } | ||
1664 | |||
1665 | switch (op) { | ||
1666 | |||
1667 | /* -- Comparison ops ---------------------------------------------------- */ | ||
1668 | |||
1669 | case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | ||
1670 | #if LJ_HASFFI | ||
1671 | if (tref_iscdata(ra) || tref_iscdata(rc)) { | ||
1672 | rec_mm_comp_cdata(J, &ix, op, ((int)op & 2) ? MM_le : MM_lt); | ||
1673 | break; | ||
1674 | } | ||
1675 | #endif | ||
1676 | /* Emit nothing for two numeric or string consts. */ | ||
1677 | if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) { | ||
1678 | IRType ta = tref_isinteger(ra) ? IRT_INT : tref_type(ra); | ||
1679 | IRType tc = tref_isinteger(rc) ? IRT_INT : tref_type(rc); | ||
1680 | int irop; | ||
1681 | if (ta != tc) { | ||
1682 | /* Widen mixed number/int comparisons to number/number comparison. */ | ||
1683 | if (ta == IRT_INT && tc == IRT_NUM) { | ||
1684 | ra = emitir(IRTN(IR_CONV), ra, IRCONV_NUM_INT); | ||
1685 | ta = IRT_NUM; | ||
1686 | } else if (ta == IRT_NUM && tc == IRT_INT) { | ||
1687 | rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT); | ||
1688 | } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) && | ||
1689 | (tc == IRT_FALSE || tc == IRT_TRUE))) { | ||
1690 | break; /* Interpreter will throw for two different types. */ | ||
1691 | } | ||
1692 | } | ||
1693 | rec_comp_prep(J); | ||
1694 | irop = (int)op - (int)BC_ISLT + (int)IR_LT; | ||
1695 | if (ta == IRT_NUM) { | ||
1696 | if ((irop & 1)) irop ^= 4; /* ISGE/ISGT are unordered. */ | ||
1697 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) | ||
1698 | irop ^= 5; | ||
1699 | } else if (ta == IRT_INT) { | ||
1700 | if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop)) | ||
1701 | irop ^= 1; | ||
1702 | } else if (ta == IRT_STR) { | ||
1703 | if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1; | ||
1704 | ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc); | ||
1705 | rc = lj_ir_kint(J, 0); | ||
1706 | ta = IRT_INT; | ||
1707 | } else { | ||
1708 | rec_mm_comp(J, &ix, (int)op); | ||
1709 | break; | ||
1710 | } | ||
1711 | emitir(IRTG(irop, ta), ra, rc); | ||
1712 | rec_comp_fixup(J, J->pc, ((int)op ^ irop) & 1); | ||
1713 | } | ||
1714 | break; | ||
1715 | |||
1716 | case BC_ISEQV: case BC_ISNEV: | ||
1717 | case BC_ISEQS: case BC_ISNES: | ||
1718 | case BC_ISEQN: case BC_ISNEN: | ||
1719 | case BC_ISEQP: case BC_ISNEP: | ||
1720 | #if LJ_HASFFI | ||
1721 | if (tref_iscdata(ra) || tref_iscdata(rc)) { | ||
1722 | rec_mm_comp_cdata(J, &ix, op, MM_eq); | ||
1723 | break; | ||
1724 | } | ||
1725 | #endif | ||
1726 | /* Emit nothing for two non-table, non-udata consts. */ | ||
1727 | if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) { | ||
1728 | int diff; | ||
1729 | rec_comp_prep(J); | ||
1730 | diff = lj_record_objcmp(J, ra, rc, rav, rcv); | ||
1731 | if (diff == 1 && (tref_istab(ra) || tref_isudata(ra))) { | ||
1732 | /* Only check __eq if different, but the same type (table or udata). */ | ||
1733 | rec_mm_equal(J, &ix, (int)op); | ||
1734 | break; | ||
1735 | } | ||
1736 | rec_comp_fixup(J, J->pc, ((int)op & 1) == !diff); | ||
1737 | } | ||
1738 | break; | ||
1739 | |||
1740 | /* -- Unary test and copy ops ------------------------------------------- */ | ||
1741 | |||
1742 | case BC_ISTC: case BC_ISFC: | ||
1743 | if ((op & 1) == tref_istruecond(rc)) | ||
1744 | rc = 0; /* Don't store if condition is not true. */ | ||
1745 | /* fallthrough */ | ||
1746 | case BC_IST: case BC_ISF: /* Type specialization suffices. */ | ||
1747 | if (bc_a(pc[1]) < J->maxslot) | ||
1748 | J->maxslot = bc_a(pc[1]); /* Shrink used slots. */ | ||
1749 | break; | ||
1750 | |||
1751 | /* -- Unary ops --------------------------------------------------------- */ | ||
1752 | |||
1753 | case BC_NOT: | ||
1754 | /* Type specialization already forces const result. */ | ||
1755 | rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE; | ||
1756 | break; | ||
1757 | |||
1758 | case BC_LEN: | ||
1759 | if (tref_isstr(rc)) | ||
1760 | rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN); | ||
1761 | #ifndef LUAJIT_ENABLE_LUA52COMPAT | ||
1762 | else if (tref_istab(rc)) | ||
1763 | rc = lj_ir_call(J, IRCALL_lj_tab_len, rc); | ||
1764 | #endif | ||
1765 | else | ||
1766 | rc = rec_mm_len(J, rc, rcv); | ||
1767 | break; | ||
1768 | |||
1769 | /* -- Arithmetic ops ---------------------------------------------------- */ | ||
1770 | |||
1771 | case BC_UNM: | ||
1772 | if (tref_isnumber_str(rc)) { | ||
1773 | rc = lj_opt_narrow_unm(J, rc, rcv); | ||
1774 | } else { | ||
1775 | ix.tab = rc; | ||
1776 | copyTV(J->L, &ix.tabv, rcv); | ||
1777 | rc = rec_mm_arith(J, &ix, MM_unm); | ||
1778 | } | ||
1779 | break; | ||
1780 | |||
1781 | case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV: | ||
1782 | /* Swap rb/rc and rbv/rcv. rav is temp. */ | ||
1783 | ix.tab = rc; ix.key = rc = rb; rb = ix.tab; | ||
1784 | copyTV(J->L, rav, rbv); | ||
1785 | copyTV(J->L, rbv, rcv); | ||
1786 | copyTV(J->L, rcv, rav); | ||
1787 | if (op == BC_MODNV) | ||
1788 | goto recmod; | ||
1789 | /* fallthrough */ | ||
1790 | case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN: | ||
1791 | case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: { | ||
1792 | MMS mm = bcmode_mm(op); | ||
1793 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) | ||
1794 | rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv, | ||
1795 | (int)mm - (int)MM_add + (int)IR_ADD); | ||
1796 | else | ||
1797 | rc = rec_mm_arith(J, &ix, mm); | ||
1798 | break; | ||
1799 | } | ||
1800 | |||
1801 | case BC_MODVN: case BC_MODVV: | ||
1802 | recmod: | ||
1803 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) | ||
1804 | rc = lj_opt_narrow_mod(J, rb, rc, rcv); | ||
1805 | else | ||
1806 | rc = rec_mm_arith(J, &ix, MM_mod); | ||
1807 | break; | ||
1808 | |||
1809 | case BC_POW: | ||
1810 | if (tref_isnumber_str(rb) && tref_isnumber_str(rc)) | ||
1811 | rc = lj_opt_narrow_pow(J, lj_ir_tonum(J, rb), rc, rcv); | ||
1812 | else | ||
1813 | rc = rec_mm_arith(J, &ix, MM_pow); | ||
1814 | break; | ||
1815 | |||
1816 | /* -- Constant and move ops --------------------------------------------- */ | ||
1817 | |||
1818 | case BC_MOV: | ||
1819 | /* Clear gap of method call to avoid resurrecting previous refs. */ | ||
1820 | if (ra > J->maxslot) J->base[ra-1] = 0; | ||
1821 | break; | ||
1822 | case BC_KSTR: case BC_KNUM: case BC_KPRI: | ||
1823 | break; | ||
1824 | case BC_KSHORT: | ||
1825 | rc = lj_ir_kint(J, (int32_t)(int16_t)rc); | ||
1826 | break; | ||
1827 | case BC_KNIL: | ||
1828 | while (ra <= rc) | ||
1829 | J->base[ra++] = TREF_NIL; | ||
1830 | if (rc >= J->maxslot) J->maxslot = rc+1; | ||
1831 | break; | ||
1832 | #if LJ_HASFFI | ||
1833 | case BC_KCDATA: | ||
1834 | rc = lj_ir_kgc(J, proto_kgc(J->pt, ~(ptrdiff_t)rc), IRT_CDATA); | ||
1835 | break; | ||
1836 | #endif | ||
1837 | |||
1838 | /* -- Upvalue and function ops ------------------------------------------ */ | ||
1839 | |||
1840 | case BC_UGET: | ||
1841 | rc = rec_upvalue(J, rc, 0); | ||
1842 | break; | ||
1843 | case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP: | ||
1844 | rec_upvalue(J, ra, rc); | ||
1845 | break; | ||
1846 | |||
1847 | /* -- Table ops --------------------------------------------------------- */ | ||
1848 | |||
1849 | case BC_GGET: case BC_GSET: | ||
1850 | settabV(J->L, &ix.tabv, tabref(J->fn->l.env)); | ||
1851 | ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV); | ||
1852 | ix.idxchain = LJ_MAX_IDXCHAIN; | ||
1853 | rc = lj_record_idx(J, &ix); | ||
1854 | break; | ||
1855 | |||
1856 | case BC_TGETB: case BC_TSETB: | ||
1857 | setintV(&ix.keyv, (int32_t)rc); | ||
1858 | ix.key = lj_ir_kint(J, (int32_t)rc); | ||
1859 | /* fallthrough */ | ||
1860 | case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS: | ||
1861 | ix.idxchain = LJ_MAX_IDXCHAIN; | ||
1862 | rc = lj_record_idx(J, &ix); | ||
1863 | break; | ||
1864 | |||
1865 | case BC_TNEW: | ||
1866 | rc = rec_tnew(J, rc); | ||
1867 | break; | ||
1868 | case BC_TDUP: | ||
1869 | rc = emitir(IRTG(IR_TDUP, IRT_TAB), | ||
1870 | lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0); | ||
1871 | break; | ||
1872 | |||
1873 | /* -- Calls and vararg handling ----------------------------------------- */ | ||
1874 | |||
1875 | case BC_ITERC: | ||
1876 | J->base[ra] = getslot(J, ra-3); | ||
1877 | J->base[ra+1] = getslot(J, ra-2); | ||
1878 | J->base[ra+2] = getslot(J, ra-1); | ||
1879 | { /* Do the actual copy now because lj_record_call needs the values. */ | ||
1880 | TValue *b = &J->L->base[ra]; | ||
1881 | copyTV(J->L, b, b-3); | ||
1882 | copyTV(J->L, b+1, b-2); | ||
1883 | copyTV(J->L, b+2, b-1); | ||
1884 | } | ||
1885 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | ||
1886 | break; | ||
1887 | |||
1888 | /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */ | ||
1889 | case BC_CALLM: | ||
1890 | rc = (BCReg)(J->L->top - J->L->base) - ra; | ||
1891 | /* fallthrough */ | ||
1892 | case BC_CALL: | ||
1893 | lj_record_call(J, ra, (ptrdiff_t)rc-1); | ||
1894 | break; | ||
1895 | |||
1896 | case BC_CALLMT: | ||
1897 | rc = (BCReg)(J->L->top - J->L->base) - ra; | ||
1898 | /* fallthrough */ | ||
1899 | case BC_CALLT: | ||
1900 | lj_record_tailcall(J, ra, (ptrdiff_t)rc-1); | ||
1901 | break; | ||
1902 | |||
1903 | case BC_VARG: | ||
1904 | rec_varg(J, ra, (ptrdiff_t)rb-1); | ||
1905 | break; | ||
1906 | |||
1907 | /* -- Returns ----------------------------------------------------------- */ | ||
1908 | |||
1909 | case BC_RETM: | ||
1910 | /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */ | ||
1911 | rc = (BCReg)(J->L->top - J->L->base) - ra + 1; | ||
1912 | /* fallthrough */ | ||
1913 | case BC_RET: case BC_RET0: case BC_RET1: | ||
1914 | lj_record_ret(J, ra, (ptrdiff_t)rc-1); | ||
1915 | break; | ||
1916 | |||
1917 | /* -- Loops and branches ------------------------------------------------ */ | ||
1918 | |||
1919 | case BC_FORI: | ||
1920 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) | ||
1921 | J->loopref = J->cur.nins; | ||
1922 | break; | ||
1923 | case BC_JFORI: | ||
1924 | lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL); | ||
1925 | if (rec_for(J, pc, 0) != LOOPEV_LEAVE) /* Link to existing loop. */ | ||
1926 | rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J])); | ||
1927 | /* Continue tracing if the loop is not entered. */ | ||
1928 | break; | ||
1929 | |||
1930 | case BC_FORL: | ||
1931 | rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1)); | ||
1932 | break; | ||
1933 | case BC_ITERL: | ||
1934 | rec_loop_interp(J, pc, rec_iterl(J, *pc)); | ||
1935 | break; | ||
1936 | case BC_LOOP: | ||
1937 | rec_loop_interp(J, pc, rec_loop(J, ra)); | ||
1938 | break; | ||
1939 | |||
1940 | case BC_JFORL: | ||
1941 | rec_loop_jit(J, rc, rec_for(J, pc+bc_j(traceref(J, rc)->startins), 1)); | ||
1942 | break; | ||
1943 | case BC_JITERL: | ||
1944 | rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins)); | ||
1945 | break; | ||
1946 | case BC_JLOOP: | ||
1947 | rec_loop_jit(J, rc, rec_loop(J, ra)); | ||
1948 | break; | ||
1949 | |||
1950 | case BC_IFORL: | ||
1951 | case BC_IITERL: | ||
1952 | case BC_ILOOP: | ||
1953 | case BC_IFUNCF: | ||
1954 | case BC_IFUNCV: | ||
1955 | lj_trace_err(J, LJ_TRERR_BLACKL); | ||
1956 | break; | ||
1957 | |||
1958 | case BC_JMP: | ||
1959 | if (ra < J->maxslot) | ||
1960 | J->maxslot = ra; /* Shrink used slots. */ | ||
1961 | break; | ||
1962 | |||
1963 | /* -- Function headers -------------------------------------------------- */ | ||
1964 | |||
1965 | case BC_FUNCF: | ||
1966 | rec_func_lua(J); | ||
1967 | break; | ||
1968 | case BC_JFUNCF: | ||
1969 | rec_func_jit(J, rc); | ||
1970 | break; | ||
1971 | |||
1972 | case BC_FUNCV: | ||
1973 | rec_func_vararg(J); | ||
1974 | rec_func_lua(J); | ||
1975 | break; | ||
1976 | case BC_JFUNCV: | ||
1977 | lua_assert(0); /* Cannot happen. No hotcall counting for varag funcs. */ | ||
1978 | break; | ||
1979 | |||
1980 | case BC_FUNCC: | ||
1981 | case BC_FUNCCW: | ||
1982 | lj_ffrecord_func(J); | ||
1983 | break; | ||
1984 | |||
1985 | default: | ||
1986 | if (op >= BC__MAX) { | ||
1987 | lj_ffrecord_func(J); | ||
1988 | break; | ||
1989 | } | ||
1990 | /* fallthrough */ | ||
1991 | case BC_ITERN: | ||
1992 | case BC_ISNEXT: | ||
1993 | case BC_CAT: | ||
1994 | case BC_UCLO: | ||
1995 | case BC_FNEW: | ||
1996 | case BC_TSETM: | ||
1997 | setintV(&J->errinfo, (int32_t)op); | ||
1998 | lj_trace_err_info(J, LJ_TRERR_NYIBC); | ||
1999 | break; | ||
2000 | } | ||
2001 | |||
2002 | /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */ | ||
2003 | if (bcmode_a(op) == BCMdst && rc) { | ||
2004 | J->base[ra] = rc; | ||
2005 | if (ra >= J->maxslot) J->maxslot = ra+1; | ||
2006 | } | ||
2007 | |||
2008 | #undef rav | ||
2009 | #undef rbv | ||
2010 | #undef rcv | ||
2011 | |||
2012 | /* Limit the number of recorded IR instructions. */ | ||
2013 | if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord]) | ||
2014 | lj_trace_err(J, LJ_TRERR_TRACEOV); | ||
2015 | } | ||
2016 | |||
2017 | /* -- Recording setup ----------------------------------------------------- */ | ||
2018 | |||
2019 | /* Setup recording for a root trace started by a hot loop. */ | ||
2020 | static const BCIns *rec_setup_root(jit_State *J) | ||
2021 | { | ||
2022 | /* Determine the next PC and the bytecode range for the loop. */ | ||
2023 | const BCIns *pcj, *pc = J->pc; | ||
2024 | BCIns ins = *pc; | ||
2025 | BCReg ra = bc_a(ins); | ||
2026 | switch (bc_op(ins)) { | ||
2027 | case BC_FORL: | ||
2028 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | ||
2029 | pc += 1+bc_j(ins); | ||
2030 | J->bc_min = pc; | ||
2031 | break; | ||
2032 | case BC_ITERL: | ||
2033 | lua_assert(bc_op(pc[-1]) == BC_ITERC); | ||
2034 | J->maxslot = ra + bc_b(pc[-1]) - 1; | ||
2035 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | ||
2036 | pc += 1+bc_j(ins); | ||
2037 | lua_assert(bc_op(pc[-1]) == BC_JMP); | ||
2038 | J->bc_min = pc; | ||
2039 | break; | ||
2040 | case BC_LOOP: | ||
2041 | /* Only check BC range for real loops, but not for "repeat until true". */ | ||
2042 | pcj = pc + bc_j(ins); | ||
2043 | ins = *pcj; | ||
2044 | if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) { | ||
2045 | J->bc_min = pcj+1 + bc_j(ins); | ||
2046 | J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns); | ||
2047 | } | ||
2048 | J->maxslot = ra; | ||
2049 | pc++; | ||
2050 | break; | ||
2051 | case BC_RET: | ||
2052 | case BC_RET0: | ||
2053 | case BC_RET1: | ||
2054 | /* No bytecode range check for down-recursive root traces. */ | ||
2055 | J->maxslot = ra + bc_d(ins); | ||
2056 | break; | ||
2057 | case BC_FUNCF: | ||
2058 | /* No bytecode range check for root traces started by a hot call. */ | ||
2059 | J->maxslot = J->pt->numparams; | ||
2060 | pc++; | ||
2061 | break; | ||
2062 | default: | ||
2063 | lua_assert(0); | ||
2064 | break; | ||
2065 | } | ||
2066 | return pc; | ||
2067 | } | ||
2068 | |||
2069 | /* Setup recording for a side trace. */ | ||
2070 | static void rec_setup_side(jit_State *J, GCtrace *T) | ||
2071 | { | ||
2072 | SnapShot *snap = &T->snap[J->exitno]; | ||
2073 | SnapEntry *map = &T->snapmap[snap->mapofs]; | ||
2074 | MSize n, nent = snap->nent; | ||
2075 | BloomFilter seen = 0; | ||
2076 | J->framedepth = 0; | ||
2077 | /* Emit IR for slots inherited from parent snapshot. */ | ||
2078 | for (n = 0; n < nent; n++) { | ||
2079 | SnapEntry sn = map[n]; | ||
2080 | IRRef ref = snap_ref(sn); | ||
2081 | BCReg s = snap_slot(sn); | ||
2082 | IRIns *ir = &T->ir[ref]; | ||
2083 | IRType t = irt_type(ir->t); | ||
2084 | TRef tr; | ||
2085 | /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ | ||
2086 | if (bloomtest(seen, ref)) { | ||
2087 | MSize j; | ||
2088 | for (j = 0; j < n; j++) | ||
2089 | if (snap_ref(map[j]) == ref) { | ||
2090 | tr = J->slot[snap_slot(map[j])]; | ||
2091 | goto setslot; | ||
2092 | } | ||
2093 | } | ||
2094 | bloomset(seen, ref); | ||
2095 | switch ((IROp)ir->o) { | ||
2096 | /* Only have to deal with constants that can occur in stack slots. */ | ||
2097 | case IR_KPRI: tr = TREF_PRI(t); break; | ||
2098 | case IR_KINT: tr = lj_ir_kint(J, ir->i); break; | ||
2099 | case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break; | ||
2100 | case IR_KNUM: tr = lj_ir_k64(J, IR_KNUM, ir_knum(ir)); break; | ||
2101 | case IR_KINT64: tr = lj_ir_k64(J, IR_KINT64, ir_kint64(ir)); break; | ||
2102 | case IR_KPTR: tr = lj_ir_kptr(J, ir_kptr(ir)); break; /* Continuation. */ | ||
2103 | /* Inherited SLOADs don't need a guard or type check. */ | ||
2104 | case IR_SLOAD: | ||
2105 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | ||
2106 | tr = emitir_raw(IRT(IR_SLOAD, t), s, | ||
2107 | (ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2108 | break; | ||
2109 | /* Parent refs are already typed and don't need a guard. */ | ||
2110 | default: | ||
2111 | if (LJ_SOFTFP && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | ||
2112 | tr = emitir_raw(IRT(IR_SLOAD, t), s, IRSLOAD_INHERIT|IRSLOAD_PARENT); | ||
2113 | break; | ||
2114 | } | ||
2115 | setslot: | ||
2116 | J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME)); /* Same as TREF_* flags. */ | ||
2117 | J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && s); | ||
2118 | if ((sn & SNAP_FRAME)) | ||
2119 | J->baseslot = s+1; | ||
2120 | } | ||
2121 | J->base = J->slot + J->baseslot; | ||
2122 | J->maxslot = snap->nslots - J->baseslot; | ||
2123 | lj_snap_add(J); | ||
2124 | } | ||
2125 | |||
2126 | /* Setup for recording a new trace. */ | ||
2127 | void lj_record_setup(jit_State *J) | ||
2128 | { | ||
2129 | uint32_t i; | ||
2130 | |||
2131 | /* Initialize state related to current trace. */ | ||
2132 | memset(J->slot, 0, sizeof(J->slot)); | ||
2133 | memset(J->chain, 0, sizeof(J->chain)); | ||
2134 | memset(J->bpropcache, 0, sizeof(J->bpropcache)); | ||
2135 | J->scev.idx = REF_NIL; | ||
2136 | |||
2137 | J->baseslot = 1; /* Invoking function is at base[-1]. */ | ||
2138 | J->base = J->slot + J->baseslot; | ||
2139 | J->maxslot = 0; | ||
2140 | J->framedepth = 0; | ||
2141 | J->retdepth = 0; | ||
2142 | |||
2143 | J->instunroll = J->param[JIT_P_instunroll]; | ||
2144 | J->loopunroll = J->param[JIT_P_loopunroll]; | ||
2145 | J->tailcalled = 0; | ||
2146 | J->loopref = 0; | ||
2147 | |||
2148 | J->bc_min = NULL; /* Means no limit. */ | ||
2149 | J->bc_extent = ~(MSize)0; | ||
2150 | |||
2151 | /* Emit instructions for fixed references. Also triggers initial IR alloc. */ | ||
2152 | emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno); | ||
2153 | for (i = 0; i <= 2; i++) { | ||
2154 | IRIns *ir = IR(REF_NIL-i); | ||
2155 | ir->i = 0; | ||
2156 | ir->t.irt = (uint8_t)(IRT_NIL+i); | ||
2157 | ir->o = IR_KPRI; | ||
2158 | ir->prev = 0; | ||
2159 | } | ||
2160 | J->cur.nk = REF_TRUE; | ||
2161 | |||
2162 | J->startpc = J->pc; | ||
2163 | setmref(J->cur.startpc, J->pc); | ||
2164 | if (J->parent) { /* Side trace. */ | ||
2165 | GCtrace *T = traceref(J, J->parent); | ||
2166 | TraceNo root = T->root ? T->root : J->parent; | ||
2167 | J->cur.root = (uint16_t)root; | ||
2168 | J->cur.startins = BCINS_AD(BC_JMP, 0, 0); | ||
2169 | /* Check whether we could at least potentially form an extra loop. */ | ||
2170 | if (J->exitno == 0 && T->snap[0].nent == 0) { | ||
2171 | /* We can narrow a FORL for some side traces, too. */ | ||
2172 | if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI && | ||
2173 | bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) { | ||
2174 | lj_snap_add(J); | ||
2175 | rec_for_loop(J, J->pc-1, &J->scev, 1); | ||
2176 | goto sidecheck; | ||
2177 | } | ||
2178 | } else { | ||
2179 | J->startpc = NULL; /* Prevent forming an extra loop. */ | ||
2180 | } | ||
2181 | rec_setup_side(J, T); | ||
2182 | sidecheck: | ||
2183 | if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] || | ||
2184 | T->snap[J->exitno].count >= J->param[JIT_P_hotexit] + | ||
2185 | J->param[JIT_P_tryside]) { | ||
2186 | rec_stop(J, LJ_TRLINK_INTERP, 0); | ||
2187 | } | ||
2188 | } else { /* Root trace. */ | ||
2189 | J->cur.root = 0; | ||
2190 | J->cur.startins = *J->pc; | ||
2191 | J->pc = rec_setup_root(J); | ||
2192 | /* Note: the loop instruction itself is recorded at the end and not | ||
2193 | ** at the start! So snapshot #0 needs to point to the *next* instruction. | ||
2194 | */ | ||
2195 | lj_snap_add(J); | ||
2196 | if (bc_op(J->cur.startins) == BC_FORL) | ||
2197 | rec_for_loop(J, J->pc-1, &J->scev, 1); | ||
2198 | if (1 + J->pt->framesize >= LJ_MAX_JSLOTS) | ||
2199 | lj_trace_err(J, LJ_TRERR_STACKOV); | ||
2200 | } | ||
2201 | #ifdef LUAJIT_ENABLE_CHECKHOOK | ||
2202 | /* Regularly check for instruction/line hooks from compiled code and | ||
2203 | ** exit to the interpreter if the hooks are set. | ||
2204 | ** | ||
2205 | ** This is a compile-time option and disabled by default, since the | ||
2206 | ** hook checks may be quite expensive in tight loops. | ||
2207 | ** | ||
2208 | ** Note this is only useful if hooks are *not* set most of the time. | ||
2209 | ** Use this only if you want to *asynchronously* interrupt the execution. | ||
2210 | ** | ||
2211 | ** You can set the instruction hook via lua_sethook() with a count of 1 | ||
2212 | ** from a signal handler or another native thread. Please have a look | ||
2213 | ** at the first few functions in luajit.c for an example (Ctrl-C handler). | ||
2214 | */ | ||
2215 | { | ||
2216 | TRef tr = emitir(IRT(IR_XLOAD, IRT_U8), | ||
2217 | lj_ir_kptr(J, &J2G(J)->hookmask), IRXLOAD_VOLATILE); | ||
2218 | tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (LUA_MASKLINE|LUA_MASKCOUNT))); | ||
2219 | emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0)); | ||
2220 | } | ||
2221 | #endif | ||
2222 | } | ||
2223 | |||
2224 | #undef IR | ||
2225 | #undef emitir_raw | ||
2226 | #undef emitir | ||
2227 | |||
2228 | #endif | ||