aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/luajit-2.0/src/lj_opt_fold.c
diff options
context:
space:
mode:
authorDavid Walter Seikel2012-01-23 23:36:30 +1000
committerDavid Walter Seikel2012-01-23 23:36:30 +1000
commit6523585c66c04cea54df50013df8886b589847d8 (patch)
tree0b22aee7064166d88595eda260ca2d17c0773da5 /libraries/luajit-2.0/src/lj_opt_fold.c
parentUpdate the EFL to what I'm actually using, coz I'm using some stuff not yet r... (diff)
downloadSledjHamr-6523585c66c04cea54df50013df8886b589847d8.zip
SledjHamr-6523585c66c04cea54df50013df8886b589847d8.tar.gz
SledjHamr-6523585c66c04cea54df50013df8886b589847d8.tar.bz2
SledjHamr-6523585c66c04cea54df50013df8886b589847d8.tar.xz
Add luaproc and LuaJIT libraries.
Two versions of LuaJIT, the stable release, and the dev version. Try the dev version first, until ih fails badly.
Diffstat (limited to '')
-rw-r--r--libraries/luajit-2.0/src/lj_opt_fold.c2218
1 files changed, 2218 insertions, 0 deletions
diff --git a/libraries/luajit-2.0/src/lj_opt_fold.c b/libraries/luajit-2.0/src/lj_opt_fold.c
new file mode 100644
index 0000000..e7d4f9c
--- /dev/null
+++ b/libraries/luajit-2.0/src/lj_opt_fold.c
@@ -0,0 +1,2218 @@
1/*
2** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3** ABCelim: Array Bounds Check Elimination.
4** CSE: Common-Subexpression Elimination.
5** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
6*/
7
8#define lj_opt_fold_c
9#define LUA_CORE
10
11#include <math.h>
12
13#include "lj_obj.h"
14
15#if LJ_HASJIT
16
17#include "lj_str.h"
18#include "lj_tab.h"
19#include "lj_ir.h"
20#include "lj_jit.h"
21#include "lj_iropt.h"
22#include "lj_trace.h"
23#include "lj_carith.h"
24#include "lj_vm.h"
25
26/* Here's a short description how the FOLD engine processes instructions:
27**
28** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
29** The instruction and its operands are used to select matching fold rules.
30** These are applied iteratively until a fixed point is reached.
31**
32** The 8 bit opcode of the instruction itself plus the opcodes of the
33** two instructions referenced by its operands form a 24 bit key
34** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
35**
36** This key is used for partial matching against the fold rules. The
37** left/right operand fields of the key are successively masked with
38** the 'any' wildcard, from most specific to least specific:
39**
40** ins left right
41** ins any right
42** ins left any
43** ins any any
44**
45** The masked key is used to lookup a matching fold rule in a semi-perfect
46** hash table. If a matching rule is found, the related fold function is run.
47** Multiple rules can share the same fold function. A fold rule may return
48** one of several special values:
49**
50** - NEXTFOLD means no folding was applied, because an additional test
51** inside the fold function failed. Matching continues against less
52** specific fold rules. Finally the instruction is passed on to CSE.
53**
54** - RETRYFOLD means the instruction was modified in-place. Folding is
55** retried as if this instruction had just been received.
56**
57** All other return values are terminal actions -- no further folding is
58** applied:
59**
60** - INTFOLD(i) returns a reference to the integer constant i.
61**
62** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
63** without emitting an instruction.
64**
65** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
66** it without passing through any further optimizations.
67**
68** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
69** no result (e.g. guarded assertions): FAILFOLD means the guard would
70** always fail, i.e. the current trace is pointless. DROPFOLD means
71** the guard is always true and has been eliminated. CONDFOLD is a
72** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
73**
74** - Any other return value is interpreted as an IRRef or TRef. This
75** can be a reference to an existing or a newly created instruction.
76** Only the least-significant 16 bits (IRRef1) are used to form a TRef
77** which is finally returned to the caller.
78**
79** The FOLD engine receives instructions both from the trace recorder and
80** substituted instructions from LOOP unrolling. This means all types
81** of instructions may end up here, even though the recorder bypasses
82** FOLD in some cases. Thus all loads, stores and allocations must have
83** an any/any rule to avoid being passed on to CSE.
84**
85** Carefully read the following requirements before adding or modifying
86** any fold rules:
87**
88** Requirement #1: All fold rules must preserve their destination type.
89**
90** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
91** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
92**
93** Requirement #2: Fold rules should not create *new* instructions which
94** reference operands *across* PHIs.
95**
96** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
97** left operand is a PHI. Then fleft->op1 would point across the PHI
98** frontier to an invariant instruction. Adding a PHI for this instruction
99** would be counterproductive. The solution is to add a barrier which
100** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
101** The only exception is for recurrences with high latencies like
102** repeated int->num->int conversions.
103**
104** One could relax this condition a bit if the referenced instruction is
105** a PHI, too. But this often leads to worse code due to excessive
106** register shuffling.
107**
108** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
109** Even returning fleft->op1 would be ok, because a new PHI will added,
110** if needed. But again, this leads to excessive register shuffling and
111** should be avoided.
112**
113** Requirement #3: The set of all fold rules must be monotonic to guarantee
114** termination.
115**
116** The goal is optimization, so one primarily wants to add strength-reducing
117** rules. This means eliminating an instruction or replacing an instruction
118** with one or more simpler instructions. Don't add fold rules which point
119** into the other direction.
120**
121** Some rules (like commutativity) do not directly reduce the strength of
122** an instruction, but enable other fold rules (e.g. by moving constants
123** to the right operand). These rules must be made unidirectional to avoid
124** cycles.
125**
126** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
127*/
128
129/* Some local macros to save typing. Undef'd at the end. */
130#define IR(ref) (&J->cur.ir[(ref)])
131#define fins (&J->fold.ins)
132#define fleft (&J->fold.left)
133#define fright (&J->fold.right)
134#define knumleft (ir_knum(fleft)->n)
135#define knumright (ir_knum(fright)->n)
136
137/* Pass IR on to next optimization in chain (FOLD). */
138#define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
139
140/* Fold function type. Fastcall on x86 significantly reduces their size. */
141typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
142
143/* Macros for the fold specs, so buildvm can recognize them. */
144#define LJFOLD(x)
145#define LJFOLDX(x)
146#define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
147/* Note: They must be at the start of a line or buildvm ignores them! */
148
149/* Barrier to prevent using operands across PHIs. */
150#define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
151
152/* Barrier to prevent folding across a GC step.
153** GC steps can only happen at the head of a trace and at LOOP.
154** And the GC is only driven forward if there is at least one allocation.
155*/
156#define gcstep_barrier(J, ref) \
157 ((ref) < J->chain[IR_LOOP] && \
158 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
159 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
160 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR]))
161
162/* -- Constant folding for FP numbers ------------------------------------- */
163
164LJFOLD(ADD KNUM KNUM)
165LJFOLD(SUB KNUM KNUM)
166LJFOLD(MUL KNUM KNUM)
167LJFOLD(DIV KNUM KNUM)
168LJFOLD(NEG KNUM KNUM)
169LJFOLD(ABS KNUM KNUM)
170LJFOLD(ATAN2 KNUM KNUM)
171LJFOLD(LDEXP KNUM KNUM)
172LJFOLD(MIN KNUM KNUM)
173LJFOLD(MAX KNUM KNUM)
174LJFOLDF(kfold_numarith)
175{
176 lua_Number a = knumleft;
177 lua_Number b = knumright;
178 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
179 return lj_ir_knum(J, y);
180}
181
182LJFOLD(LDEXP KNUM KINT)
183LJFOLDF(kfold_ldexp)
184{
185#if LJ_TARGET_X86ORX64
186 UNUSED(J);
187 return NEXTFOLD;
188#else
189 return lj_ir_knum(J, ldexp(knumleft, fright->i));
190#endif
191}
192
193LJFOLD(FPMATH KNUM any)
194LJFOLDF(kfold_fpmath)
195{
196 lua_Number a = knumleft;
197 lua_Number y = lj_vm_foldfpm(a, fins->op2);
198 return lj_ir_knum(J, y);
199}
200
201LJFOLD(POW KNUM KINT)
202LJFOLDF(kfold_numpow)
203{
204 lua_Number a = knumleft;
205 lua_Number b = (lua_Number)fright->i;
206 lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD);
207 return lj_ir_knum(J, y);
208}
209
210/* Must not use kfold_kref for numbers (could be NaN). */
211LJFOLD(EQ KNUM KNUM)
212LJFOLD(NE KNUM KNUM)
213LJFOLD(LT KNUM KNUM)
214LJFOLD(GE KNUM KNUM)
215LJFOLD(LE KNUM KNUM)
216LJFOLD(GT KNUM KNUM)
217LJFOLD(ULT KNUM KNUM)
218LJFOLD(UGE KNUM KNUM)
219LJFOLD(ULE KNUM KNUM)
220LJFOLD(UGT KNUM KNUM)
221LJFOLDF(kfold_numcomp)
222{
223 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
224}
225
226/* -- Constant folding for 32 bit integers -------------------------------- */
227
228static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
229{
230 switch (op) {
231 case IR_ADD: k1 += k2; break;
232 case IR_SUB: k1 -= k2; break;
233 case IR_MUL: k1 *= k2; break;
234 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
235 case IR_NEG: k1 = -k1; break;
236 case IR_BAND: k1 &= k2; break;
237 case IR_BOR: k1 |= k2; break;
238 case IR_BXOR: k1 ^= k2; break;
239 case IR_BSHL: k1 <<= (k2 & 31); break;
240 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
241 case IR_BSAR: k1 >>= (k2 & 31); break;
242 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
243 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
244 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
245 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
246 default: lua_assert(0); break;
247 }
248 return k1;
249}
250
251LJFOLD(ADD KINT KINT)
252LJFOLD(SUB KINT KINT)
253LJFOLD(MUL KINT KINT)
254LJFOLD(MOD KINT KINT)
255LJFOLD(NEG KINT KINT)
256LJFOLD(BAND KINT KINT)
257LJFOLD(BOR KINT KINT)
258LJFOLD(BXOR KINT KINT)
259LJFOLD(BSHL KINT KINT)
260LJFOLD(BSHR KINT KINT)
261LJFOLD(BSAR KINT KINT)
262LJFOLD(BROL KINT KINT)
263LJFOLD(BROR KINT KINT)
264LJFOLD(MIN KINT KINT)
265LJFOLD(MAX KINT KINT)
266LJFOLDF(kfold_intarith)
267{
268 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
269}
270
271LJFOLD(ADDOV KINT KINT)
272LJFOLD(SUBOV KINT KINT)
273LJFOLD(MULOV KINT KINT)
274LJFOLDF(kfold_intovarith)
275{
276 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
277 fins->o - IR_ADDOV);
278 int32_t k = lj_num2int(n);
279 if (n != (lua_Number)k)
280 return FAILFOLD;
281 return INTFOLD(k);
282}
283
284LJFOLD(BNOT KINT)
285LJFOLDF(kfold_bnot)
286{
287 return INTFOLD(~fleft->i);
288}
289
290LJFOLD(BSWAP KINT)
291LJFOLDF(kfold_bswap)
292{
293 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
294}
295
296LJFOLD(LT KINT KINT)
297LJFOLD(GE KINT KINT)
298LJFOLD(LE KINT KINT)
299LJFOLD(GT KINT KINT)
300LJFOLD(ULT KINT KINT)
301LJFOLD(UGE KINT KINT)
302LJFOLD(ULE KINT KINT)
303LJFOLD(UGT KINT KINT)
304LJFOLD(ABC KINT KINT)
305LJFOLDF(kfold_intcomp)
306{
307 int32_t a = fleft->i, b = fright->i;
308 switch ((IROp)fins->o) {
309 case IR_LT: return CONDFOLD(a < b);
310 case IR_GE: return CONDFOLD(a >= b);
311 case IR_LE: return CONDFOLD(a <= b);
312 case IR_GT: return CONDFOLD(a > b);
313 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
314 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
315 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
316 case IR_ABC:
317 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
318 default: lua_assert(0); return FAILFOLD;
319 }
320}
321
322LJFOLD(UGE any KINT)
323LJFOLDF(kfold_intcomp0)
324{
325 if (fright->i == 0)
326 return DROPFOLD;
327 return NEXTFOLD;
328}
329
330/* -- Constant folding for 64 bit integers -------------------------------- */
331
332static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op)
333{
334 switch (op) {
335#if LJ_64 || LJ_HASFFI
336 case IR_ADD: k1 += k2; break;
337 case IR_SUB: k1 -= k2; break;
338#endif
339#if LJ_HASFFI
340 case IR_MUL: k1 *= k2; break;
341 case IR_BAND: k1 &= k2; break;
342 case IR_BOR: k1 |= k2; break;
343 case IR_BXOR: k1 ^= k2; break;
344#endif
345 default: UNUSED(k2); lua_assert(0); break;
346 }
347 return k1;
348}
349
350LJFOLD(ADD KINT64 KINT64)
351LJFOLD(SUB KINT64 KINT64)
352LJFOLD(MUL KINT64 KINT64)
353LJFOLD(BAND KINT64 KINT64)
354LJFOLD(BOR KINT64 KINT64)
355LJFOLD(BXOR KINT64 KINT64)
356LJFOLDF(kfold_int64arith)
357{
358 return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64,
359 ir_k64(fright)->u64, (IROp)fins->o));
360}
361
362LJFOLD(DIV KINT64 KINT64)
363LJFOLD(MOD KINT64 KINT64)
364LJFOLD(POW KINT64 KINT64)
365LJFOLDF(kfold_int64arith2)
366{
367#if LJ_HASFFI
368 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
369 if (irt_isi64(fins->t)) {
370 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
371 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
372 lj_carith_powi64((int64_t)k1, (int64_t)k2);
373 } else {
374 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
375 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
376 lj_carith_powu64(k1, k2);
377 }
378 return INT64FOLD(k1);
379#else
380 UNUSED(J); lua_assert(0); return FAILFOLD;
381#endif
382}
383
384LJFOLD(BSHL KINT64 KINT)
385LJFOLD(BSHR KINT64 KINT)
386LJFOLD(BSAR KINT64 KINT)
387LJFOLD(BROL KINT64 KINT)
388LJFOLD(BROR KINT64 KINT)
389LJFOLDF(kfold_int64shift)
390{
391#if LJ_HASFFI || LJ_64
392 uint64_t k = ir_k64(fleft)->u64;
393 int32_t sh = (fright->i & 63);
394 switch ((IROp)fins->o) {
395 case IR_BSHL: k <<= sh; break;
396#if LJ_HASFFI
397 case IR_BSHR: k >>= sh; break;
398 case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break;
399 case IR_BROL: k = lj_rol(k, sh); break;
400 case IR_BROR: k = lj_ror(k, sh); break;
401#endif
402 default: lua_assert(0); break;
403 }
404 return INT64FOLD(k);
405#else
406 UNUSED(J); lua_assert(0); return FAILFOLD;
407#endif
408}
409
410LJFOLD(BNOT KINT64)
411LJFOLDF(kfold_bnot64)
412{
413#if LJ_HASFFI
414 return INT64FOLD(~ir_k64(fleft)->u64);
415#else
416 UNUSED(J); lua_assert(0); return FAILFOLD;
417#endif
418}
419
420LJFOLD(BSWAP KINT64)
421LJFOLDF(kfold_bswap64)
422{
423#if LJ_HASFFI
424 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
425#else
426 UNUSED(J); lua_assert(0); return FAILFOLD;
427#endif
428}
429
430LJFOLD(LT KINT64 KINT)
431LJFOLD(GE KINT64 KINT)
432LJFOLD(LE KINT64 KINT)
433LJFOLD(GT KINT64 KINT)
434LJFOLD(ULT KINT64 KINT)
435LJFOLD(UGE KINT64 KINT)
436LJFOLD(ULE KINT64 KINT)
437LJFOLD(UGT KINT64 KINT)
438LJFOLDF(kfold_int64comp)
439{
440#if LJ_HASFFI
441 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
442 switch ((IROp)fins->o) {
443 case IR_LT: return CONDFOLD(a < b);
444 case IR_GE: return CONDFOLD(a >= b);
445 case IR_LE: return CONDFOLD(a <= b);
446 case IR_GT: return CONDFOLD(a > b);
447 case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b);
448 case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b);
449 case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b);
450 case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b);
451 default: lua_assert(0); return FAILFOLD;
452 }
453#else
454 UNUSED(J); lua_assert(0); return FAILFOLD;
455#endif
456}
457
458LJFOLD(UGE any KINT64)
459LJFOLDF(kfold_int64comp0)
460{
461#if LJ_HASFFI
462 if (ir_k64(fright)->u64 == 0)
463 return DROPFOLD;
464 return NEXTFOLD;
465#else
466 UNUSED(J); lua_assert(0); return FAILFOLD;
467#endif
468}
469
470/* -- Constant folding for strings ---------------------------------------- */
471
472LJFOLD(SNEW KKPTR KINT)
473LJFOLDF(kfold_snew_kptr)
474{
475 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
476 return lj_ir_kstr(J, s);
477}
478
479LJFOLD(SNEW any KINT)
480LJFOLDF(kfold_snew_empty)
481{
482 if (fright->i == 0)
483 return lj_ir_kstr(J, &J2G(J)->strempty);
484 return NEXTFOLD;
485}
486
487LJFOLD(STRREF KGC KINT)
488LJFOLDF(kfold_strref)
489{
490 GCstr *str = ir_kstr(fleft);
491 lua_assert((MSize)fright->i < str->len);
492 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
493}
494
495LJFOLD(STRREF SNEW any)
496LJFOLDF(kfold_strref_snew)
497{
498 PHIBARRIER(fleft);
499 if (irref_isk(fins->op2) && fright->i == 0) {
500 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
501 } else {
502 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
503 IRIns *ir = IR(fleft->op1);
504 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
505 lua_assert(ir->o == IR_STRREF);
506 PHIBARRIER(ir);
507 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
508 fins->op1 = str;
509 fins->ot = IRT(IR_STRREF, IRT_P32);
510 return RETRYFOLD;
511 }
512 return NEXTFOLD;
513}
514
515LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
516LJFOLDF(kfold_strcmp)
517{
518 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
519 GCstr *a = ir_kstr(IR(fleft->op1));
520 GCstr *b = ir_kstr(IR(fleft->op2));
521 return INTFOLD(lj_str_cmp(a, b));
522 }
523 return NEXTFOLD;
524}
525
526/* -- Constant folding of pointer arithmetic ------------------------------ */
527
528LJFOLD(ADD KGC KINT)
529LJFOLD(ADD KGC KINT64)
530LJFOLDF(kfold_add_kgc)
531{
532 GCobj *o = ir_kgc(fleft);
533#if LJ_64
534 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
535#else
536 ptrdiff_t ofs = fright->i;
537#endif
538 return lj_ir_kkptr(J, (char *)o + ofs);
539}
540
541LJFOLD(ADD KPTR KINT)
542LJFOLD(ADD KPTR KINT64)
543LJFOLD(ADD KKPTR KINT)
544LJFOLD(ADD KKPTR KINT64)
545LJFOLDF(kfold_add_kptr)
546{
547 void *p = ir_kptr(fleft);
548#if LJ_64
549 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
550#else
551 ptrdiff_t ofs = fright->i;
552#endif
553 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
554}
555
556/* -- Constant folding of conversions ------------------------------------- */
557
558LJFOLD(TOBIT KNUM KNUM)
559LJFOLDF(kfold_tobit)
560{
561 return INTFOLD(lj_num2bit(knumleft));
562}
563
564LJFOLD(CONV KINT IRCONV_NUM_INT)
565LJFOLDF(kfold_conv_kint_num)
566{
567 return lj_ir_knum(J, (lua_Number)fleft->i);
568}
569
570LJFOLD(CONV KINT IRCONV_NUM_U32)
571LJFOLDF(kfold_conv_kintu32_num)
572{
573 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
574}
575
576LJFOLD(CONV KINT IRCONV_I64_INT)
577LJFOLD(CONV KINT IRCONV_U64_INT)
578LJFOLDF(kfold_conv_kint_i64)
579{
580 if ((fins->op2 & IRCONV_SEXT))
581 return INT64FOLD((uint64_t)(int64_t)fleft->i);
582 else
583 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
584}
585
586LJFOLD(CONV KINT64 IRCONV_NUM_I64)
587LJFOLDF(kfold_conv_kint64_num_i64)
588{
589 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
590}
591
592LJFOLD(CONV KINT64 IRCONV_NUM_U64)
593LJFOLDF(kfold_conv_kint64_num_u64)
594{
595 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
596}
597
598LJFOLD(CONV KINT64 IRCONV_INT_I64)
599LJFOLD(CONV KINT64 IRCONV_U32_I64)
600LJFOLDF(kfold_conv_kint64_int_i64)
601{
602 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
603}
604
605LJFOLD(CONV KNUM IRCONV_INT_NUM)
606LJFOLDF(kfold_conv_knum_int_num)
607{
608 lua_Number n = knumleft;
609 if (!(fins->op2 & IRCONV_TRUNC)) {
610 int32_t k = lj_num2int(n);
611 if (irt_isguard(fins->t) && n != (lua_Number)k) {
612 /* We're about to create a guard which always fails, like CONV +1.5.
613 ** Some pathological loops cause this during LICM, e.g.:
614 ** local x,k,t = 0,1.5,{1,[1.5]=2}
615 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
616 ** assert(x == 300)
617 */
618 return FAILFOLD;
619 }
620 return INTFOLD(k);
621 } else {
622 return INTFOLD((int32_t)n);
623 }
624}
625
626LJFOLD(CONV KNUM IRCONV_U32_NUM)
627LJFOLDF(kfold_conv_knum_u32_num)
628{
629 lua_assert((fins->op2 & IRCONV_TRUNC));
630 return INTFOLD((int32_t)(uint32_t)knumleft);
631}
632
633LJFOLD(CONV KNUM IRCONV_I64_NUM)
634LJFOLDF(kfold_conv_knum_i64_num)
635{
636 lua_assert((fins->op2 & IRCONV_TRUNC));
637 return INT64FOLD((uint64_t)(int64_t)knumleft);
638}
639
640LJFOLD(CONV KNUM IRCONV_U64_NUM)
641LJFOLDF(kfold_conv_knum_u64_num)
642{
643 lua_assert((fins->op2 & IRCONV_TRUNC));
644 return INT64FOLD(lj_num2u64(knumleft));
645}
646
647LJFOLD(TOSTR KNUM)
648LJFOLDF(kfold_tostr_knum)
649{
650 return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft));
651}
652
653LJFOLD(TOSTR KINT)
654LJFOLDF(kfold_tostr_kint)
655{
656 return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i));
657}
658
659LJFOLD(STRTO KGC)
660LJFOLDF(kfold_strto)
661{
662 TValue n;
663 if (lj_str_tonum(ir_kstr(fleft), &n))
664 return lj_ir_knum(J, numV(&n));
665 return FAILFOLD;
666}
667
668/* -- Constant folding of equality checks --------------------------------- */
669
670/* Don't constant-fold away FLOAD checks against KNULL. */
671LJFOLD(EQ FLOAD KNULL)
672LJFOLD(NE FLOAD KNULL)
673LJFOLDX(lj_opt_cse)
674
675/* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
676LJFOLD(EQ any KNULL)
677LJFOLD(NE any KNULL)
678LJFOLD(EQ KNULL any)
679LJFOLD(NE KNULL any)
680LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
681LJFOLD(NE KINT KINT)
682LJFOLD(EQ KINT64 KINT64)
683LJFOLD(NE KINT64 KINT64)
684LJFOLD(EQ KGC KGC)
685LJFOLD(NE KGC KGC)
686LJFOLDF(kfold_kref)
687{
688 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
689}
690
691/* -- Algebraic shortcuts ------------------------------------------------- */
692
693LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
694LJFOLD(FPMATH FPMATH IRFPM_CEIL)
695LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
696LJFOLDF(shortcut_round)
697{
698 IRFPMathOp op = (IRFPMathOp)fleft->op2;
699 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
700 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
701 return NEXTFOLD;
702}
703
704LJFOLD(ABS ABS KNUM)
705LJFOLDF(shortcut_left)
706{
707 return LEFTFOLD; /* f(g(x)) ==> g(x) */
708}
709
710LJFOLD(ABS NEG KNUM)
711LJFOLDF(shortcut_dropleft)
712{
713 PHIBARRIER(fleft);
714 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
715 return RETRYFOLD;
716}
717
718/* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
719LJFOLD(NEG NEG any)
720LJFOLD(BNOT BNOT)
721LJFOLD(BSWAP BSWAP)
722LJFOLDF(shortcut_leftleft)
723{
724 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
725 return fleft->op1; /* f(g(x)) ==> x */
726}
727
728/* -- FP algebraic simplifications ---------------------------------------- */
729
730/* FP arithmetic is tricky -- there's not much to simplify.
731** Please note the following common pitfalls before sending "improvements":
732** x+0 ==> x is INVALID for x=-0
733** 0-x ==> -x is INVALID for x=+0
734** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
735*/
736
737LJFOLD(ADD NEG any)
738LJFOLDF(simplify_numadd_negx)
739{
740 PHIBARRIER(fleft);
741 fins->o = IR_SUB; /* (-a) + b ==> b - a */
742 fins->op1 = fins->op2;
743 fins->op2 = fleft->op1;
744 return RETRYFOLD;
745}
746
747LJFOLD(ADD any NEG)
748LJFOLDF(simplify_numadd_xneg)
749{
750 PHIBARRIER(fright);
751 fins->o = IR_SUB; /* a + (-b) ==> a - b */
752 fins->op2 = fright->op1;
753 return RETRYFOLD;
754}
755
756LJFOLD(SUB any KNUM)
757LJFOLDF(simplify_numsub_k)
758{
759 lua_Number n = knumright;
760 if (n == 0.0) /* x - (+-0) ==> x */
761 return LEFTFOLD;
762 return NEXTFOLD;
763}
764
765LJFOLD(SUB NEG KNUM)
766LJFOLDF(simplify_numsub_negk)
767{
768 PHIBARRIER(fleft);
769 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
770 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
771 return RETRYFOLD;
772}
773
774LJFOLD(SUB any NEG)
775LJFOLDF(simplify_numsub_xneg)
776{
777 PHIBARRIER(fright);
778 fins->o = IR_ADD; /* a - (-b) ==> a + b */
779 fins->op2 = fright->op1;
780 return RETRYFOLD;
781}
782
783LJFOLD(MUL any KNUM)
784LJFOLD(DIV any KNUM)
785LJFOLDF(simplify_nummuldiv_k)
786{
787 lua_Number n = knumright;
788 if (n == 1.0) { /* x o 1 ==> x */
789 return LEFTFOLD;
790 } else if (n == -1.0) { /* x o -1 ==> -x */
791 fins->o = IR_NEG;
792 fins->op2 = (IRRef1)lj_ir_knum_neg(J);
793 return RETRYFOLD;
794 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
795 fins->o = IR_ADD;
796 fins->op2 = fins->op1;
797 return RETRYFOLD;
798 }
799 return NEXTFOLD;
800}
801
802LJFOLD(MUL NEG KNUM)
803LJFOLD(DIV NEG KNUM)
804LJFOLDF(simplify_nummuldiv_negk)
805{
806 PHIBARRIER(fleft);
807 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
808 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
809 return RETRYFOLD;
810}
811
812LJFOLD(MUL NEG NEG)
813LJFOLD(DIV NEG NEG)
814LJFOLDF(simplify_nummuldiv_negneg)
815{
816 PHIBARRIER(fleft);
817 PHIBARRIER(fright);
818 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
819 fins->op2 = fright->op1;
820 return RETRYFOLD;
821}
822
823LJFOLD(POW any KINT)
824LJFOLDF(simplify_numpow_xk)
825{
826 int32_t k = fright->i;
827 TRef ref = fins->op1;
828 if (k == 0) /* x ^ 0 ==> 1 */
829 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
830 if (k == 1) /* x ^ 1 ==> x */
831 return LEFTFOLD;
832 if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */
833 return NEXTFOLD;
834 if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */
835 ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref);
836 k = -k;
837 }
838 /* Unroll x^k for 1 <= k <= 65536. */
839 for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */
840 ref = emitir(IRTN(IR_MUL), ref, ref);
841 if ((k >>= 1) != 0) { /* Handle trailing bits. */
842 TRef tmp = emitir(IRTN(IR_MUL), ref, ref);
843 for (; k != 1; k >>= 1) {
844 if (k & 1)
845 ref = emitir(IRTN(IR_MUL), ref, tmp);
846 tmp = emitir(IRTN(IR_MUL), tmp, tmp);
847 }
848 ref = emitir(IRTN(IR_MUL), ref, tmp);
849 }
850 return ref;
851}
852
853LJFOLD(POW KNUM any)
854LJFOLDF(simplify_numpow_kx)
855{
856 lua_Number n = knumleft;
857 if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */
858 fins->o = IR_CONV;
859#if LJ_TARGET_X86ORX64
860 fins->op1 = fins->op2;
861 fins->op2 = IRCONV_NUM_INT;
862 fins->op2 = (IRRef1)lj_opt_fold(J);
863#endif
864 fins->op1 = (IRRef1)lj_ir_knum_one(J);
865 fins->o = IR_LDEXP;
866 return RETRYFOLD;
867 }
868 return NEXTFOLD;
869}
870
871/* -- Simplify conversions ------------------------------------------------ */
872
873LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
874LJFOLDF(shortcut_conv_num_int)
875{
876 PHIBARRIER(fleft);
877 /* Only safe with a guarded conversion to int. */
878 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
879 return fleft->op1; /* f(g(x)) ==> x */
880 return NEXTFOLD;
881}
882
883LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
884LJFOLDF(simplify_conv_int_num)
885{
886 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
887 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
888 return fleft->op1;
889 return NEXTFOLD;
890}
891
892LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/
893LJFOLDF(simplify_conv_u32_num)
894{
895 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
896 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
897 return fleft->op1;
898 return NEXTFOLD;
899}
900
901LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/
902LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/
903LJFOLDF(simplify_conv_i64_num)
904{
905 PHIBARRIER(fleft);
906 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
907 /* Reduce to a sign-extension. */
908 fins->op1 = fleft->op1;
909 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
910 return RETRYFOLD;
911 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
912#if LJ_TARGET_X64
913 return fleft->op1;
914#else
915 /* Reduce to a zero-extension. */
916 fins->op1 = fleft->op1;
917 fins->op2 = (IRT_I64<<5)|IRT_U32;
918 return RETRYFOLD;
919#endif
920 }
921 return NEXTFOLD;
922}
923
924LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT */
925LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT */
926LJFOLDF(simplify_conv_int_i64)
927{
928 PHIBARRIER(fleft);
929 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT)
930 return fleft->op1;
931 return NEXTFOLD;
932}
933
934LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
935LJFOLDF(simplify_conv_flt_num)
936{
937 PHIBARRIER(fleft);
938 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
939 return fleft->op1;
940 return NEXTFOLD;
941}
942
943/* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
944LJFOLD(TOBIT CONV KNUM)
945LJFOLDF(simplify_tobit_conv)
946{
947 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
948 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
949 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
950 lua_assert(irt_isnum(fleft->t));
951 return fleft->op1;
952 }
953 return NEXTFOLD;
954}
955
956/* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
957LJFOLD(FPMATH CONV IRFPM_FLOOR)
958LJFOLD(FPMATH CONV IRFPM_CEIL)
959LJFOLD(FPMATH CONV IRFPM_TRUNC)
960LJFOLDF(simplify_floor_conv)
961{
962 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT ||
963 (fleft->op2 & IRCONV_SRCMASK) == IRT_U32)
964 return LEFTFOLD;
965 return NEXTFOLD;
966}
967
968/* Strength reduction of widening. */
969LJFOLD(CONV any IRCONV_I64_INT)
970LJFOLD(CONV any IRCONV_U64_INT)
971LJFOLDF(simplify_conv_sext)
972{
973 IRRef ref = fins->op1;
974 int64_t ofs = 0;
975 if (!(fins->op2 & IRCONV_SEXT))
976 return NEXTFOLD;
977 PHIBARRIER(fleft);
978 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
979 goto ok_reduce;
980 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
981 ofs = (int64_t)IR(fleft->op2)->i;
982 ref = fleft->op1;
983 }
984 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
985 if (ref == J->scev.idx) {
986 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
987 lua_assert(irt_isint(J->scev.t));
988 if (lo && IR(lo)->i + ofs >= 0) {
989 ok_reduce:
990#if LJ_TARGET_X64
991 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
992 return LEFTFOLD;
993#else
994 /* Reduce to a (cheaper) zero-extension. */
995 fins->op2 &= ~IRCONV_SEXT;
996 return RETRYFOLD;
997#endif
998 }
999 }
1000 return NEXTFOLD;
1001}
1002
1003/* Strength reduction of narrowing. */
1004LJFOLD(CONV ADD IRCONV_INT_I64)
1005LJFOLD(CONV SUB IRCONV_INT_I64)
1006LJFOLD(CONV MUL IRCONV_INT_I64)
1007LJFOLD(CONV ADD IRCONV_INT_U64)
1008LJFOLD(CONV SUB IRCONV_INT_U64)
1009LJFOLD(CONV MUL IRCONV_INT_U64)
1010LJFOLDF(simplify_conv_narrow)
1011{
1012 IROp op = (IROp)fleft->o;
1013 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1014 PHIBARRIER(fleft);
1015 op1 = emitir(IRTI(IR_CONV), op1, mode);
1016 op2 = emitir(IRTI(IR_CONV), op2, mode);
1017 fins->ot = IRTI(op);
1018 fins->op1 = op1;
1019 fins->op2 = op2;
1020 return RETRYFOLD;
1021}
1022
1023/* Special CSE rule for CONV. */
1024LJFOLD(CONV any any)
1025LJFOLDF(cse_conv)
1026{
1027 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1028 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1029 uint8_t guard = irt_isguard(fins->t);
1030 IRRef ref = J->chain[IR_CONV];
1031 while (ref > op1) {
1032 IRIns *ir = IR(ref);
1033 /* Commoning with stronger checks is ok. */
1034 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1035 irt_isguard(ir->t) >= guard)
1036 return ref;
1037 ref = ir->prev;
1038 }
1039 }
1040 return EMITFOLD; /* No fallthrough to regular CSE. */
1041}
1042
1043/* FP conversion narrowing. */
1044LJFOLD(TOBIT ADD KNUM)
1045LJFOLD(TOBIT SUB KNUM)
1046LJFOLD(CONV ADD IRCONV_INT_NUM)
1047LJFOLD(CONV SUB IRCONV_INT_NUM)
1048LJFOLD(CONV ADD IRCONV_I64_NUM)
1049LJFOLD(CONV SUB IRCONV_I64_NUM)
1050LJFOLDF(narrow_convert)
1051{
1052 PHIBARRIER(fleft);
1053 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1054 if (J->chain[IR_LOOP])
1055 return NEXTFOLD;
1056 lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT);
1057 return lj_opt_narrow_convert(J);
1058}
1059
1060/* -- Integer algebraic simplifications ----------------------------------- */
1061
1062LJFOLD(ADD any KINT)
1063LJFOLD(ADDOV any KINT)
1064LJFOLD(SUBOV any KINT)
1065LJFOLDF(simplify_intadd_k)
1066{
1067 if (fright->i == 0) /* i o 0 ==> i */
1068 return LEFTFOLD;
1069 return NEXTFOLD;
1070}
1071
1072LJFOLD(MULOV any KINT)
1073LJFOLDF(simplify_intmul_k)
1074{
1075 if (fright->i == 0) /* i * 0 ==> 0 */
1076 return RIGHTFOLD;
1077 if (fright->i == 1) /* i * 1 ==> i */
1078 return LEFTFOLD;
1079 if (fright->i == 2) { /* i * 2 ==> i + i */
1080 fins->o = IR_ADDOV;
1081 fins->op2 = fins->op1;
1082 return RETRYFOLD;
1083 }
1084 return NEXTFOLD;
1085}
1086
1087LJFOLD(SUB any KINT)
1088LJFOLDF(simplify_intsub_k)
1089{
1090 if (fright->i == 0) /* i - 0 ==> i */
1091 return LEFTFOLD;
1092 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1093 fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */
1094 return RETRYFOLD;
1095}
1096
1097LJFOLD(SUB KINT any)
1098LJFOLD(SUB KINT64 any)
1099LJFOLDF(simplify_intsub_kleft)
1100{
1101 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1102 fins->o = IR_NEG; /* 0 - i ==> -i */
1103 fins->op1 = fins->op2;
1104 return RETRYFOLD;
1105 }
1106 return NEXTFOLD;
1107}
1108
1109LJFOLD(ADD any KINT64)
1110LJFOLDF(simplify_intadd_k64)
1111{
1112 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1113 return LEFTFOLD;
1114 return NEXTFOLD;
1115}
1116
1117LJFOLD(SUB any KINT64)
1118LJFOLDF(simplify_intsub_k64)
1119{
1120 uint64_t k = ir_kint64(fright)->u64;
1121 if (k == 0) /* i - 0 ==> i */
1122 return LEFTFOLD;
1123 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1124 fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k);
1125 return RETRYFOLD;
1126}
1127
1128static TRef simplify_intmul_k(jit_State *J, int32_t k)
1129{
1130 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1131 ** But this is mainly intended for simple address arithmetic.
1132 ** Also it's easier for the backend to optimize the original multiplies.
1133 */
1134 if (k == 1) { /* i * 1 ==> i */
1135 return LEFTFOLD;
1136 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1137 fins->o = IR_BSHL;
1138 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1139 return RETRYFOLD;
1140 }
1141 return NEXTFOLD;
1142}
1143
1144LJFOLD(MUL any KINT)
1145LJFOLDF(simplify_intmul_k32)
1146{
1147 if (fright->i == 0) /* i * 0 ==> 0 */
1148 return INTFOLD(0);
1149 else if (fright->i > 0)
1150 return simplify_intmul_k(J, fright->i);
1151 return NEXTFOLD;
1152}
1153
1154LJFOLD(MUL any KINT64)
1155LJFOLDF(simplify_intmul_k64)
1156{
1157 if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */
1158 return INT64FOLD(0);
1159#if LJ_64
1160 /* NYI: SPLIT for BSHL and 32 bit backend support. */
1161 else if (ir_kint64(fright)->u64 < 0x80000000u)
1162 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1163#endif
1164 return NEXTFOLD;
1165}
1166
1167LJFOLD(MOD any KINT)
1168LJFOLDF(simplify_intmod_k)
1169{
1170 int32_t k = fright->i;
1171 lua_assert(k != 0);
1172 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1173 fins->o = IR_BAND;
1174 fins->op2 = lj_ir_kint(J, k-1);
1175 return RETRYFOLD;
1176 }
1177 return NEXTFOLD;
1178}
1179
1180LJFOLD(MOD KINT any)
1181LJFOLDF(simplify_intmod_kleft)
1182{
1183 if (fleft->i == 0)
1184 return INTFOLD(0);
1185 return NEXTFOLD;
1186}
1187
1188LJFOLD(SUB any any)
1189LJFOLD(SUBOV any any)
1190LJFOLDF(simplify_intsub)
1191{
1192 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1193 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1194 return NEXTFOLD;
1195}
1196
1197LJFOLD(SUB ADD any)
1198LJFOLDF(simplify_intsubadd_leftcancel)
1199{
1200 if (!irt_isnum(fins->t)) {
1201 PHIBARRIER(fleft);
1202 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1203 return fleft->op2;
1204 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1205 return fleft->op1;
1206 }
1207 return NEXTFOLD;
1208}
1209
1210LJFOLD(SUB SUB any)
1211LJFOLDF(simplify_intsubsub_leftcancel)
1212{
1213 if (!irt_isnum(fins->t)) {
1214 PHIBARRIER(fleft);
1215 if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1216 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1217 fins->op2 = fleft->op2;
1218 return RETRYFOLD;
1219 }
1220 }
1221 return NEXTFOLD;
1222}
1223
1224LJFOLD(SUB any SUB)
1225LJFOLDF(simplify_intsubsub_rightcancel)
1226{
1227 if (!irt_isnum(fins->t)) {
1228 PHIBARRIER(fright);
1229 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1230 return fright->op2;
1231 }
1232 return NEXTFOLD;
1233}
1234
1235LJFOLD(SUB any ADD)
1236LJFOLDF(simplify_intsubadd_rightcancel)
1237{
1238 if (!irt_isnum(fins->t)) {
1239 PHIBARRIER(fright);
1240 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1241 fins->op2 = fright->op2;
1242 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1243 return RETRYFOLD;
1244 }
1245 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1246 fins->op2 = fright->op1;
1247 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1248 return RETRYFOLD;
1249 }
1250 }
1251 return NEXTFOLD;
1252}
1253
1254LJFOLD(SUB ADD ADD)
1255LJFOLDF(simplify_intsubaddadd_cancel)
1256{
1257 if (!irt_isnum(fins->t)) {
1258 PHIBARRIER(fleft);
1259 PHIBARRIER(fright);
1260 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1261 fins->op1 = fleft->op2;
1262 fins->op2 = fright->op2;
1263 return RETRYFOLD;
1264 }
1265 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1266 fins->op1 = fleft->op2;
1267 fins->op2 = fright->op1;
1268 return RETRYFOLD;
1269 }
1270 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1271 fins->op1 = fleft->op1;
1272 fins->op2 = fright->op2;
1273 return RETRYFOLD;
1274 }
1275 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1276 fins->op1 = fleft->op1;
1277 fins->op2 = fright->op1;
1278 return RETRYFOLD;
1279 }
1280 }
1281 return NEXTFOLD;
1282}
1283
1284LJFOLD(BAND any KINT)
1285LJFOLD(BAND any KINT64)
1286LJFOLDF(simplify_band_k)
1287{
1288 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1289 (int64_t)ir_k64(fright)->u64;
1290 if (k == 0) /* i & 0 ==> 0 */
1291 return RIGHTFOLD;
1292 if (k == -1) /* i & -1 ==> i */
1293 return LEFTFOLD;
1294 return NEXTFOLD;
1295}
1296
1297LJFOLD(BOR any KINT)
1298LJFOLD(BOR any KINT64)
1299LJFOLDF(simplify_bor_k)
1300{
1301 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1302 (int64_t)ir_k64(fright)->u64;
1303 if (k == 0) /* i | 0 ==> i */
1304 return LEFTFOLD;
1305 if (k == -1) /* i | -1 ==> -1 */
1306 return RIGHTFOLD;
1307 return NEXTFOLD;
1308}
1309
1310LJFOLD(BXOR any KINT)
1311LJFOLD(BXOR any KINT64)
1312LJFOLDF(simplify_bxor_k)
1313{
1314 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1315 (int64_t)ir_k64(fright)->u64;
1316 if (k == 0) /* i xor 0 ==> i */
1317 return LEFTFOLD;
1318 if (k == -1) { /* i xor -1 ==> ~i */
1319 fins->o = IR_BNOT;
1320 fins->op2 = 0;
1321 return RETRYFOLD;
1322 }
1323 return NEXTFOLD;
1324}
1325
1326LJFOLD(BSHL any KINT)
1327LJFOLD(BSHR any KINT)
1328LJFOLD(BSAR any KINT)
1329LJFOLD(BROL any KINT)
1330LJFOLD(BROR any KINT)
1331LJFOLDF(simplify_shift_ik)
1332{
1333 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1334 int32_t k = (fright->i & mask);
1335 if (k == 0) /* i o 0 ==> i */
1336 return LEFTFOLD;
1337 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1338 fins->o = IR_ADD;
1339 fins->op2 = fins->op1;
1340 return RETRYFOLD;
1341 }
1342 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1343 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1344 return RETRYFOLD;
1345 }
1346#ifndef LJ_TARGET_UNIFYROT
1347 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1348 fins->o = IR_BROL;
1349 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1350 return RETRYFOLD;
1351 }
1352#endif
1353 return NEXTFOLD;
1354}
1355
1356LJFOLD(BSHL any BAND)
1357LJFOLD(BSHR any BAND)
1358LJFOLD(BSAR any BAND)
1359LJFOLD(BROL any BAND)
1360LJFOLD(BROR any BAND)
1361LJFOLDF(simplify_shift_andk)
1362{
1363 IRIns *irk = IR(fright->op2);
1364 PHIBARRIER(fright);
1365 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1366 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1367 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1368 int32_t k = irk->i & mask;
1369 if (k == mask) {
1370 fins->op2 = fright->op1;
1371 return RETRYFOLD;
1372 }
1373 }
1374 return NEXTFOLD;
1375}
1376
1377LJFOLD(BSHL KINT any)
1378LJFOLD(BSHR KINT any)
1379LJFOLD(BSHL KINT64 any)
1380LJFOLD(BSHR KINT64 any)
1381LJFOLDF(simplify_shift1_ki)
1382{
1383 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1384 (int64_t)ir_k64(fleft)->u64;
1385 if (k == 0) /* 0 o i ==> 0 */
1386 return LEFTFOLD;
1387 return NEXTFOLD;
1388}
1389
1390LJFOLD(BSAR KINT any)
1391LJFOLD(BROL KINT any)
1392LJFOLD(BROR KINT any)
1393LJFOLD(BSAR KINT64 any)
1394LJFOLD(BROL KINT64 any)
1395LJFOLD(BROR KINT64 any)
1396LJFOLDF(simplify_shift2_ki)
1397{
1398 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1399 (int64_t)ir_k64(fleft)->u64;
1400 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1401 return LEFTFOLD;
1402 return NEXTFOLD;
1403}
1404
1405LJFOLD(BSHL BAND KINT)
1406LJFOLD(BSHR BAND KINT)
1407LJFOLD(BROL BAND KINT)
1408LJFOLD(BROR BAND KINT)
1409LJFOLDF(simplify_shiftk_andk)
1410{
1411 IRIns *irk = IR(fleft->op2);
1412 PHIBARRIER(fleft);
1413 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1414 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1415 fins->op1 = fleft->op1;
1416 fins->op1 = (IRRef1)lj_opt_fold(J);
1417 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1418 fins->ot = IRTI(IR_BAND);
1419 return RETRYFOLD;
1420 }
1421 return NEXTFOLD;
1422}
1423
1424LJFOLD(BAND BSHL KINT)
1425LJFOLD(BAND BSHR KINT)
1426LJFOLDF(simplify_andk_shiftk)
1427{
1428 IRIns *irk = IR(fleft->op2);
1429 if (irk->o == IR_KINT &&
1430 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1431 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1432 return NEXTFOLD;
1433}
1434
1435/* -- Reassociation ------------------------------------------------------- */
1436
1437LJFOLD(ADD ADD KINT)
1438LJFOLD(MUL MUL KINT)
1439LJFOLD(BAND BAND KINT)
1440LJFOLD(BOR BOR KINT)
1441LJFOLD(BXOR BXOR KINT)
1442LJFOLDF(reassoc_intarith_k)
1443{
1444 IRIns *irk = IR(fleft->op2);
1445 if (irk->o == IR_KINT) {
1446 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1447 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1448 return LEFTFOLD;
1449 PHIBARRIER(fleft);
1450 fins->op1 = fleft->op1;
1451 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1452 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1453 }
1454 return NEXTFOLD;
1455}
1456
1457LJFOLD(ADD ADD KINT64)
1458LJFOLD(MUL MUL KINT64)
1459LJFOLD(BAND BAND KINT64)
1460LJFOLD(BOR BOR KINT64)
1461LJFOLD(BXOR BXOR KINT64)
1462LJFOLDF(reassoc_intarith_k64)
1463{
1464#if LJ_HASFFI || LJ_64
1465 IRIns *irk = IR(fleft->op2);
1466 if (irk->o == IR_KINT64) {
1467 uint64_t k = kfold_int64arith(ir_k64(irk)->u64,
1468 ir_k64(fright)->u64, (IROp)fins->o);
1469 PHIBARRIER(fleft);
1470 fins->op1 = fleft->op1;
1471 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1472 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1473 }
1474 return NEXTFOLD;
1475#else
1476 UNUSED(J); lua_assert(0); return FAILFOLD;
1477#endif
1478}
1479
1480LJFOLD(MIN MIN any)
1481LJFOLD(MAX MAX any)
1482LJFOLD(BAND BAND any)
1483LJFOLD(BOR BOR any)
1484LJFOLDF(reassoc_dup)
1485{
1486 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1487 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1488 return NEXTFOLD;
1489}
1490
1491LJFOLD(BXOR BXOR any)
1492LJFOLDF(reassoc_bxor)
1493{
1494 PHIBARRIER(fleft);
1495 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1496 return fleft->op2;
1497 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1498 return fleft->op1;
1499 return NEXTFOLD;
1500}
1501
1502LJFOLD(BSHL BSHL KINT)
1503LJFOLD(BSHR BSHR KINT)
1504LJFOLD(BSAR BSAR KINT)
1505LJFOLD(BROL BROL KINT)
1506LJFOLD(BROR BROR KINT)
1507LJFOLDF(reassoc_shift)
1508{
1509 IRIns *irk = IR(fleft->op2);
1510 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1511 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1512 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1513 int32_t k = (irk->i & mask) + (fright->i & mask);
1514 if (k > mask) { /* Combined shift too wide? */
1515 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1516 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1517 else if (fins->o == IR_BSAR)
1518 k = mask;
1519 else
1520 k &= mask;
1521 }
1522 fins->op1 = fleft->op1;
1523 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1524 return RETRYFOLD;
1525 }
1526 return NEXTFOLD;
1527}
1528
1529LJFOLD(MIN MIN KNUM)
1530LJFOLD(MAX MAX KNUM)
1531LJFOLD(MIN MIN KINT)
1532LJFOLD(MAX MAX KINT)
1533LJFOLDF(reassoc_minmax_k)
1534{
1535 IRIns *irk = IR(fleft->op2);
1536 if (irk->o == IR_KNUM) {
1537 lua_Number a = ir_knum(irk)->n;
1538 lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD);
1539 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1540 return LEFTFOLD;
1541 PHIBARRIER(fleft);
1542 fins->op1 = fleft->op1;
1543 fins->op2 = (IRRef1)lj_ir_knum(J, y);
1544 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1545 } else if (irk->o == IR_KINT) {
1546 int32_t a = irk->i;
1547 int32_t y = kfold_intop(a, fright->i, fins->o);
1548 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1549 return LEFTFOLD;
1550 PHIBARRIER(fleft);
1551 fins->op1 = fleft->op1;
1552 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1553 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1554 }
1555 return NEXTFOLD;
1556}
1557
1558LJFOLD(MIN MAX any)
1559LJFOLD(MAX MIN any)
1560LJFOLDF(reassoc_minmax_left)
1561{
1562 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1563 return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */
1564 return NEXTFOLD;
1565}
1566
1567LJFOLD(MIN any MAX)
1568LJFOLD(MAX any MIN)
1569LJFOLDF(reassoc_minmax_right)
1570{
1571 if (fins->op1 == fright->op1 || fins->op1 == fright->op2)
1572 return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */
1573 return NEXTFOLD;
1574}
1575
1576/* -- Array bounds check elimination -------------------------------------- */
1577
1578/* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1579** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1580** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1581*/
1582LJFOLD(ABC any ADD)
1583LJFOLDF(abc_fwd)
1584{
1585 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1586 if (irref_isk(fright->op2)) {
1587 IRIns *add2 = IR(fright->op1);
1588 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1589 IR(fright->op2)->i == -IR(add2->op2)->i) {
1590 IRRef ref = J->chain[IR_ABC];
1591 IRRef lim = add2->op1;
1592 if (fins->op1 > lim) lim = fins->op1;
1593 while (ref > lim) {
1594 IRIns *ir = IR(ref);
1595 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1596 return DROPFOLD;
1597 ref = ir->prev;
1598 }
1599 }
1600 }
1601 }
1602 return NEXTFOLD;
1603}
1604
1605/* Eliminate ABC for constants.
1606** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1607** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1608*/
1609LJFOLD(ABC any KINT)
1610LJFOLDF(abc_k)
1611{
1612 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1613 IRRef ref = J->chain[IR_ABC];
1614 IRRef asize = fins->op1;
1615 while (ref > asize) {
1616 IRIns *ir = IR(ref);
1617 if (ir->op1 == asize && irref_isk(ir->op2)) {
1618 int32_t k = IR(ir->op2)->i;
1619 if (fright->i > k)
1620 ir->op2 = fins->op2;
1621 return DROPFOLD;
1622 }
1623 ref = ir->prev;
1624 }
1625 return EMITFOLD; /* Already performed CSE. */
1626 }
1627 return NEXTFOLD;
1628}
1629
1630/* Eliminate invariant ABC inside loop. */
1631LJFOLD(ABC any any)
1632LJFOLDF(abc_invar)
1633{
1634 if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */
1635 return DROPFOLD;
1636 return NEXTFOLD;
1637}
1638
1639/* -- Commutativity ------------------------------------------------------- */
1640
1641/* The refs of commutative ops are canonicalized. Lower refs go to the right.
1642** Rationale behind this:
1643** - It (also) moves constants to the right.
1644** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1645** - It helps CSE to find more matches.
1646** - The assembler generates better code with constants at the right.
1647*/
1648
1649LJFOLD(ADD any any)
1650LJFOLD(MUL any any)
1651LJFOLD(ADDOV any any)
1652LJFOLD(MULOV any any)
1653LJFOLDF(comm_swap)
1654{
1655 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1656 IRRef1 tmp = fins->op1;
1657 fins->op1 = fins->op2;
1658 fins->op2 = tmp;
1659 return RETRYFOLD;
1660 }
1661 return NEXTFOLD;
1662}
1663
1664LJFOLD(EQ any any)
1665LJFOLD(NE any any)
1666LJFOLDF(comm_equal)
1667{
1668 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1669 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1670 return CONDFOLD(fins->o == IR_EQ);
1671 return fold_comm_swap(J);
1672}
1673
1674LJFOLD(LT any any)
1675LJFOLD(GE any any)
1676LJFOLD(LE any any)
1677LJFOLD(GT any any)
1678LJFOLD(ULT any any)
1679LJFOLD(UGE any any)
1680LJFOLD(ULE any any)
1681LJFOLD(UGT any any)
1682LJFOLDF(comm_comp)
1683{
1684 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1685 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1686 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1687 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1688 IRRef1 tmp = fins->op1;
1689 fins->op1 = fins->op2;
1690 fins->op2 = tmp;
1691 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
1692 return RETRYFOLD;
1693 }
1694 return NEXTFOLD;
1695}
1696
1697LJFOLD(BAND any any)
1698LJFOLD(BOR any any)
1699LJFOLD(MIN any any)
1700LJFOLD(MAX any any)
1701LJFOLDF(comm_dup)
1702{
1703 if (fins->op1 == fins->op2) /* x o x ==> x */
1704 return LEFTFOLD;
1705 return fold_comm_swap(J);
1706}
1707
1708LJFOLD(BXOR any any)
1709LJFOLDF(comm_bxor)
1710{
1711 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
1712 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1713 return fold_comm_swap(J);
1714}
1715
1716/* -- Simplification of compound expressions ------------------------------ */
1717
1718static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
1719{
1720 int32_t k;
1721 switch (irt_type(ir->t)) {
1722 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
1723 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
1724 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
1725 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
1726 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
1727 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
1728 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
1729 default: return 0;
1730 }
1731 return lj_ir_kint(J, k);
1732}
1733
1734/* Turn: string.sub(str, a, b) == kstr
1735** into: string.byte(str, a) == string.byte(kstr, 1) etc.
1736** Note: this creates unaligned XLOADs on x86/x64.
1737*/
1738LJFOLD(EQ SNEW KGC)
1739LJFOLD(NE SNEW KGC)
1740LJFOLDF(merge_eqne_snew_kgc)
1741{
1742 GCstr *kstr = ir_kstr(fright);
1743 int32_t len = (int32_t)kstr->len;
1744 lua_assert(irt_isstr(fins->t));
1745
1746#if LJ_TARGET_X86ORX64
1747#define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
1748#define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
1749#else
1750#define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
1751#define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
1752#endif
1753
1754 if (len <= FOLD_SNEW_MAX_LEN) {
1755 IROp op = (IROp)fins->o;
1756 IRRef strref = fleft->op1;
1757 lua_assert(IR(strref)->o == IR_STRREF);
1758 if (op == IR_EQ) {
1759 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
1760 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
1761 } else {
1762 /* NE is not expanded since this would need an OR of two conds. */
1763 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
1764 return NEXTFOLD;
1765 if (IR(fleft->op2)->i != len)
1766 return DROPFOLD;
1767 }
1768 if (len > 0) {
1769 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
1770 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
1771 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
1772 IRTI(IR_XLOAD));
1773 TRef tmp = emitir(ot, strref,
1774 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
1775 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
1776 if (len == 3)
1777 tmp = emitir(IRTI(IR_BAND), tmp,
1778 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
1779 fins->op1 = (IRRef1)tmp;
1780 fins->op2 = (IRRef1)val;
1781 fins->ot = (IROpT)IRTGI(op);
1782 return RETRYFOLD;
1783 } else {
1784 return DROPFOLD;
1785 }
1786 }
1787 return NEXTFOLD;
1788}
1789
1790/* -- Loads --------------------------------------------------------------- */
1791
1792/* Loads cannot be folded or passed on to CSE in general.
1793** Alias analysis is needed to check for forwarding opportunities.
1794**
1795** Caveat: *all* loads must be listed here or they end up at CSE!
1796*/
1797
1798LJFOLD(ALOAD any)
1799LJFOLDX(lj_opt_fwd_aload)
1800
1801/* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
1802LJFOLD(HLOAD KKPTR)
1803LJFOLDF(kfold_hload_kkptr)
1804{
1805 UNUSED(J);
1806 lua_assert(ir_kptr(fleft) == niltvg(J2G(J)));
1807 return TREF_NIL;
1808}
1809
1810LJFOLD(HLOAD any)
1811LJFOLDX(lj_opt_fwd_hload)
1812
1813LJFOLD(ULOAD any)
1814LJFOLDX(lj_opt_fwd_uload)
1815
1816LJFOLD(CALLL any IRCALL_lj_tab_len)
1817LJFOLDX(lj_opt_fwd_tab_len)
1818
1819/* Upvalue refs are really loads, but there are no corresponding stores.
1820** So CSE is ok for them, except for UREFO across a GC step (see below).
1821** If the referenced function is const, its upvalue addresses are const, too.
1822** This can be used to improve CSE by looking for the same address,
1823** even if the upvalues originate from a different function.
1824*/
1825LJFOLD(UREFO KGC any)
1826LJFOLD(UREFC KGC any)
1827LJFOLDF(cse_uref)
1828{
1829 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1830 IRRef ref = J->chain[fins->o];
1831 GCfunc *fn = ir_kfunc(fleft);
1832 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
1833 while (ref > 0) {
1834 IRIns *ir = IR(ref);
1835 if (irref_isk(ir->op1)) {
1836 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
1837 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
1838 if (fins->o == IR_UREFO && gcstep_barrier(J, ref))
1839 break;
1840 return ref;
1841 }
1842 }
1843 ref = ir->prev;
1844 }
1845 }
1846 return EMITFOLD;
1847}
1848
1849LJFOLD(HREF TNEW any)
1850LJFOLDF(fwd_href_tnew)
1851{
1852 if (lj_opt_fwd_href_nokey(J))
1853 return lj_ir_kkptr(J, niltvg(J2G(J)));
1854 return NEXTFOLD;
1855}
1856
1857LJFOLD(HREF TDUP KPRI)
1858LJFOLD(HREF TDUP KGC)
1859LJFOLD(HREF TDUP KNUM)
1860LJFOLDF(fwd_href_tdup)
1861{
1862 TValue keyv;
1863 lj_ir_kvalue(J->L, &keyv, fright);
1864 if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) &&
1865 lj_opt_fwd_href_nokey(J))
1866 return lj_ir_kkptr(J, niltvg(J2G(J)));
1867 return NEXTFOLD;
1868}
1869
1870/* We can safely FOLD/CSE array/hash refs and field loads, since there
1871** are no corresponding stores. But we need to check for any NEWREF with
1872** an aliased table, as it may invalidate all of the pointers and fields.
1873** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
1874** FLOADs. And NEWREF itself is treated like a store (see below).
1875*/
1876LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
1877LJFOLDF(fload_tab_tnew_asize)
1878{
1879 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1880 return INTFOLD(fleft->op1);
1881 return NEXTFOLD;
1882}
1883
1884LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
1885LJFOLDF(fload_tab_tnew_hmask)
1886{
1887 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1888 return INTFOLD((1 << fleft->op2)-1);
1889 return NEXTFOLD;
1890}
1891
1892LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
1893LJFOLDF(fload_tab_tdup_asize)
1894{
1895 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1896 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
1897 return NEXTFOLD;
1898}
1899
1900LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
1901LJFOLDF(fload_tab_tdup_hmask)
1902{
1903 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
1904 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
1905 return NEXTFOLD;
1906}
1907
1908LJFOLD(HREF any any)
1909LJFOLD(FLOAD any IRFL_TAB_ARRAY)
1910LJFOLD(FLOAD any IRFL_TAB_NODE)
1911LJFOLD(FLOAD any IRFL_TAB_ASIZE)
1912LJFOLD(FLOAD any IRFL_TAB_HMASK)
1913LJFOLDF(fload_tab_ah)
1914{
1915 TRef tr = lj_opt_cse(J);
1916 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
1917}
1918
1919/* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
1920LJFOLD(FLOAD KGC IRFL_STR_LEN)
1921LJFOLDF(fload_str_len_kgc)
1922{
1923 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1924 return INTFOLD((int32_t)ir_kstr(fleft)->len);
1925 return NEXTFOLD;
1926}
1927
1928LJFOLD(FLOAD SNEW IRFL_STR_LEN)
1929LJFOLDF(fload_str_len_snew)
1930{
1931 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1932 PHIBARRIER(fleft);
1933 return fleft->op2;
1934 }
1935 return NEXTFOLD;
1936}
1937
1938/* The C type ID of cdata objects is immutable. */
1939LJFOLD(FLOAD KGC IRFL_CDATA_TYPEID)
1940LJFOLDF(fload_cdata_typeid_kgc)
1941{
1942 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1943 return INTFOLD((int32_t)ir_kcdata(fleft)->typeid);
1944 return NEXTFOLD;
1945}
1946
1947/* Get the contents of immutable cdata objects. */
1948LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
1949LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
1950LJFOLDF(fload_cdata_int64_kgc)
1951{
1952 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
1953 void *p = cdataptr(ir_kcdata(fleft));
1954 if (irt_is64(fins->t))
1955 return INT64FOLD(*(uint64_t *)p);
1956 else
1957 return INTFOLD(*(int32_t *)p);
1958 }
1959 return NEXTFOLD;
1960}
1961
1962LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID)
1963LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID)
1964LJFOLDF(fload_cdata_typeid_cnew)
1965{
1966 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1967 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
1968 return NEXTFOLD;
1969}
1970
1971/* Pointer and int64 cdata objects are immutable. */
1972LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
1973LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
1974LJFOLDF(fload_cdata_ptr_int64_cnew)
1975{
1976 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
1977 return fleft->op2; /* Fold even across PHI to avoid allocations. */
1978 return NEXTFOLD;
1979}
1980
1981LJFOLD(FLOAD any IRFL_STR_LEN)
1982LJFOLD(FLOAD any IRFL_CDATA_TYPEID)
1983LJFOLD(FLOAD any IRFL_CDATA_PTR)
1984LJFOLD(FLOAD any IRFL_CDATA_INT64)
1985LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
1986LJFOLDX(lj_opt_cse)
1987
1988/* All other field loads need alias analysis. */
1989LJFOLD(FLOAD any any)
1990LJFOLDX(lj_opt_fwd_fload)
1991
1992/* This is for LOOP only. Recording handles SLOADs internally. */
1993LJFOLD(SLOAD any any)
1994LJFOLDF(fwd_sload)
1995{
1996 if ((fins->op2 & IRSLOAD_FRAME)) {
1997 TRef tr = lj_opt_cse(J);
1998 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
1999 } else {
2000 lua_assert(J->slot[fins->op1] != 0);
2001 return J->slot[fins->op1];
2002 }
2003}
2004
2005/* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2006LJFOLD(XLOAD KKPTR any)
2007LJFOLDF(xload_kptr)
2008{
2009 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2010 return tr ? tr : NEXTFOLD;
2011}
2012
2013LJFOLD(XLOAD any any)
2014LJFOLDX(lj_opt_fwd_xload)
2015
2016/* -- Write barriers ------------------------------------------------------ */
2017
2018/* Write barriers are amenable to CSE, but not across any incremental
2019** GC steps.
2020**
2021** The same logic applies to open upvalue references, because a stack
2022** may be resized during a GC step (not the current stack, but maybe that
2023** of a coroutine).
2024*/
2025LJFOLD(TBAR any)
2026LJFOLD(OBAR any any)
2027LJFOLD(UREFO any any)
2028LJFOLDF(barrier_tab)
2029{
2030 TRef tr = lj_opt_cse(J);
2031 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2032 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2033 return tr;
2034}
2035
2036LJFOLD(TBAR TNEW)
2037LJFOLD(TBAR TDUP)
2038LJFOLDF(barrier_tnew_tdup)
2039{
2040 /* New tables are always white and never need a barrier. */
2041 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2042 return NEXTFOLD;
2043 return DROPFOLD;
2044}
2045
2046/* -- Stores and allocations ---------------------------------------------- */
2047
2048/* Stores and allocations cannot be folded or passed on to CSE in general.
2049** But some stores can be eliminated with dead-store elimination (DSE).
2050**
2051** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2052*/
2053
2054LJFOLD(ASTORE any any)
2055LJFOLD(HSTORE any any)
2056LJFOLDX(lj_opt_dse_ahstore)
2057
2058LJFOLD(USTORE any any)
2059LJFOLDX(lj_opt_dse_ustore)
2060
2061LJFOLD(FSTORE any any)
2062LJFOLDX(lj_opt_dse_fstore)
2063
2064LJFOLD(XSTORE any any)
2065LJFOLDX(lj_opt_dse_xstore)
2066
2067LJFOLD(NEWREF any any) /* Treated like a store. */
2068LJFOLD(CALLS any any)
2069LJFOLD(CALLL any any) /* Safeguard fallback. */
2070LJFOLD(CALLXS any any)
2071LJFOLD(XBAR)
2072LJFOLD(RETF any any) /* Modifies BASE. */
2073LJFOLD(TNEW any any)
2074LJFOLD(TDUP any)
2075LJFOLD(CNEW any any)
2076LJFOLD(XSNEW any any)
2077LJFOLDX(lj_ir_emit)
2078
2079/* ------------------------------------------------------------------------ */
2080
2081/* Every entry in the generated hash table is a 32 bit pattern:
2082**
2083** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2084**
2085** xxxxxxxx = 8 bit index into fold function table
2086** iiiiiii = 7 bit folded instruction opcode
2087** lllllll = 7 bit left instruction opcode
2088** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2089*/
2090
2091#include "lj_folddef.h"
2092
2093/* ------------------------------------------------------------------------ */
2094
2095/* Fold IR instruction. */
2096TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2097{
2098 uint32_t key, any;
2099 IRRef ref;
2100
2101 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2102 lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2103 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT);
2104 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2105 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2106 return lj_opt_cse(J);
2107
2108 /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */
2109 if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2110 (JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2111 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2112 return lj_ir_emit(J);
2113
2114 /* DSE disabled? Emit raw IR for stores. */
2115 if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2116 return lj_ir_emit(J);
2117 }
2118
2119 /* Fold engine start/retry point. */
2120retry:
2121 /* Construct key from opcode and operand opcodes (unless literal/none). */
2122 key = ((uint32_t)fins->o << 17);
2123 if (fins->op1 >= J->cur.nk) {
2124 key += (uint32_t)IR(fins->op1)->o << 10;
2125 *fleft = *IR(fins->op1);
2126 }
2127 if (fins->op2 >= J->cur.nk) {
2128 key += (uint32_t)IR(fins->op2)->o;
2129 *fright = *IR(fins->op2);
2130 } else {
2131 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2132 }
2133
2134 /* Check for a match in order from most specific to least specific. */
2135 any = 0;
2136 for (;;) {
2137 uint32_t k = key | (any & 0x1ffff);
2138 uint32_t h = fold_hashkey(k);
2139 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2140 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2141 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2142 if (ref != NEXTFOLD)
2143 break;
2144 }
2145 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2146 return lj_opt_cse(J);
2147 any = (any | (any >> 10)) ^ 0xffc00;
2148 }
2149
2150 /* Return value processing, ordered by frequency. */
2151 if (LJ_LIKELY(ref >= MAX_FOLD))
2152 return TREF(ref, irt_t(IR(ref)->t));
2153 if (ref == RETRYFOLD)
2154 goto retry;
2155 if (ref == KINTFOLD)
2156 return lj_ir_kint(J, fins->i);
2157 if (ref == FAILFOLD)
2158 lj_trace_err(J, LJ_TRERR_GFAIL);
2159 lua_assert(ref == DROPFOLD);
2160 return REF_DROP;
2161}
2162
2163/* -- Common-Subexpression Elimination ------------------------------------ */
2164
2165/* CSE an IR instruction. This is very fast due to the skip-list chains. */
2166TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2167{
2168 /* Avoid narrow to wide store-to-load forwarding stall */
2169 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2170 IROp op = fins->o;
2171 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2172 /* Limited search for same operands in per-opcode chain. */
2173 IRRef ref = J->chain[op];
2174 IRRef lim = fins->op1;
2175 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2176 while (ref > lim) {
2177 if (IR(ref)->op12 == op12)
2178 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2179 ref = IR(ref)->prev;
2180 }
2181 }
2182 /* Otherwise emit IR (inlined for speed). */
2183 {
2184 IRRef ref = lj_ir_nextins(J);
2185 IRIns *ir = IR(ref);
2186 ir->prev = J->chain[op];
2187 ir->op12 = op12;
2188 J->chain[op] = (IRRef1)ref;
2189 ir->o = fins->o;
2190 J->guardemit.irt |= fins->t.irt;
2191 return TREF(ref, irt_t((ir->t = fins->t)));
2192 }
2193}
2194
2195/* CSE with explicit search limit. */
2196TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2197{
2198 IRRef ref = J->chain[fins->o];
2199 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2200 while (ref > lim) {
2201 if (IR(ref)->op12 == op12)
2202 return ref;
2203 ref = IR(ref)->prev;
2204 }
2205 return lj_ir_emit(J);
2206}
2207
2208/* ------------------------------------------------------------------------ */
2209
2210#undef IR
2211#undef fins
2212#undef fleft
2213#undef fright
2214#undef knumleft
2215#undef knumright
2216#undef emitir
2217
2218#endif