diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/luajit-2.0/src/lj_opt_fold.c | 2218 |
1 files changed, 0 insertions, 2218 deletions
diff --git a/libraries/luajit-2.0/src/lj_opt_fold.c b/libraries/luajit-2.0/src/lj_opt_fold.c deleted file mode 100644 index e7d4f9c..0000000 --- a/libraries/luajit-2.0/src/lj_opt_fold.c +++ /dev/null | |||
@@ -1,2218 +0,0 @@ | |||
1 | /* | ||
2 | ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation. | ||
3 | ** ABCelim: Array Bounds Check Elimination. | ||
4 | ** CSE: Common-Subexpression Elimination. | ||
5 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
6 | */ | ||
7 | |||
8 | #define lj_opt_fold_c | ||
9 | #define LUA_CORE | ||
10 | |||
11 | #include <math.h> | ||
12 | |||
13 | #include "lj_obj.h" | ||
14 | |||
15 | #if LJ_HASJIT | ||
16 | |||
17 | #include "lj_str.h" | ||
18 | #include "lj_tab.h" | ||
19 | #include "lj_ir.h" | ||
20 | #include "lj_jit.h" | ||
21 | #include "lj_iropt.h" | ||
22 | #include "lj_trace.h" | ||
23 | #include "lj_carith.h" | ||
24 | #include "lj_vm.h" | ||
25 | |||
26 | /* Here's a short description how the FOLD engine processes instructions: | ||
27 | ** | ||
28 | ** The FOLD engine receives a single instruction stored in fins (J->fold.ins). | ||
29 | ** The instruction and its operands are used to select matching fold rules. | ||
30 | ** These are applied iteratively until a fixed point is reached. | ||
31 | ** | ||
32 | ** The 8 bit opcode of the instruction itself plus the opcodes of the | ||
33 | ** two instructions referenced by its operands form a 24 bit key | ||
34 | ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits). | ||
35 | ** | ||
36 | ** This key is used for partial matching against the fold rules. The | ||
37 | ** left/right operand fields of the key are successively masked with | ||
38 | ** the 'any' wildcard, from most specific to least specific: | ||
39 | ** | ||
40 | ** ins left right | ||
41 | ** ins any right | ||
42 | ** ins left any | ||
43 | ** ins any any | ||
44 | ** | ||
45 | ** The masked key is used to lookup a matching fold rule in a semi-perfect | ||
46 | ** hash table. If a matching rule is found, the related fold function is run. | ||
47 | ** Multiple rules can share the same fold function. A fold rule may return | ||
48 | ** one of several special values: | ||
49 | ** | ||
50 | ** - NEXTFOLD means no folding was applied, because an additional test | ||
51 | ** inside the fold function failed. Matching continues against less | ||
52 | ** specific fold rules. Finally the instruction is passed on to CSE. | ||
53 | ** | ||
54 | ** - RETRYFOLD means the instruction was modified in-place. Folding is | ||
55 | ** retried as if this instruction had just been received. | ||
56 | ** | ||
57 | ** All other return values are terminal actions -- no further folding is | ||
58 | ** applied: | ||
59 | ** | ||
60 | ** - INTFOLD(i) returns a reference to the integer constant i. | ||
61 | ** | ||
62 | ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference | ||
63 | ** without emitting an instruction. | ||
64 | ** | ||
65 | ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit | ||
66 | ** it without passing through any further optimizations. | ||
67 | ** | ||
68 | ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have | ||
69 | ** no result (e.g. guarded assertions): FAILFOLD means the guard would | ||
70 | ** always fail, i.e. the current trace is pointless. DROPFOLD means | ||
71 | ** the guard is always true and has been eliminated. CONDFOLD is a | ||
72 | ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail). | ||
73 | ** | ||
74 | ** - Any other return value is interpreted as an IRRef or TRef. This | ||
75 | ** can be a reference to an existing or a newly created instruction. | ||
76 | ** Only the least-significant 16 bits (IRRef1) are used to form a TRef | ||
77 | ** which is finally returned to the caller. | ||
78 | ** | ||
79 | ** The FOLD engine receives instructions both from the trace recorder and | ||
80 | ** substituted instructions from LOOP unrolling. This means all types | ||
81 | ** of instructions may end up here, even though the recorder bypasses | ||
82 | ** FOLD in some cases. Thus all loads, stores and allocations must have | ||
83 | ** an any/any rule to avoid being passed on to CSE. | ||
84 | ** | ||
85 | ** Carefully read the following requirements before adding or modifying | ||
86 | ** any fold rules: | ||
87 | ** | ||
88 | ** Requirement #1: All fold rules must preserve their destination type. | ||
89 | ** | ||
90 | ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result). | ||
91 | ** Never use lj_ir_knumint() which can have either a KINT or KNUM result. | ||
92 | ** | ||
93 | ** Requirement #2: Fold rules should not create *new* instructions which | ||
94 | ** reference operands *across* PHIs. | ||
95 | ** | ||
96 | ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the | ||
97 | ** left operand is a PHI. Then fleft->op1 would point across the PHI | ||
98 | ** frontier to an invariant instruction. Adding a PHI for this instruction | ||
99 | ** would be counterproductive. The solution is to add a barrier which | ||
100 | ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case. | ||
101 | ** The only exception is for recurrences with high latencies like | ||
102 | ** repeated int->num->int conversions. | ||
103 | ** | ||
104 | ** One could relax this condition a bit if the referenced instruction is | ||
105 | ** a PHI, too. But this often leads to worse code due to excessive | ||
106 | ** register shuffling. | ||
107 | ** | ||
108 | ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though. | ||
109 | ** Even returning fleft->op1 would be ok, because a new PHI will added, | ||
110 | ** if needed. But again, this leads to excessive register shuffling and | ||
111 | ** should be avoided. | ||
112 | ** | ||
113 | ** Requirement #3: The set of all fold rules must be monotonic to guarantee | ||
114 | ** termination. | ||
115 | ** | ||
116 | ** The goal is optimization, so one primarily wants to add strength-reducing | ||
117 | ** rules. This means eliminating an instruction or replacing an instruction | ||
118 | ** with one or more simpler instructions. Don't add fold rules which point | ||
119 | ** into the other direction. | ||
120 | ** | ||
121 | ** Some rules (like commutativity) do not directly reduce the strength of | ||
122 | ** an instruction, but enable other fold rules (e.g. by moving constants | ||
123 | ** to the right operand). These rules must be made unidirectional to avoid | ||
124 | ** cycles. | ||
125 | ** | ||
126 | ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it. | ||
127 | */ | ||
128 | |||
129 | /* Some local macros to save typing. Undef'd at the end. */ | ||
130 | #define IR(ref) (&J->cur.ir[(ref)]) | ||
131 | #define fins (&J->fold.ins) | ||
132 | #define fleft (&J->fold.left) | ||
133 | #define fright (&J->fold.right) | ||
134 | #define knumleft (ir_knum(fleft)->n) | ||
135 | #define knumright (ir_knum(fright)->n) | ||
136 | |||
137 | /* Pass IR on to next optimization in chain (FOLD). */ | ||
138 | #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | ||
139 | |||
140 | /* Fold function type. Fastcall on x86 significantly reduces their size. */ | ||
141 | typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J); | ||
142 | |||
143 | /* Macros for the fold specs, so buildvm can recognize them. */ | ||
144 | #define LJFOLD(x) | ||
145 | #define LJFOLDX(x) | ||
146 | #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J) | ||
147 | /* Note: They must be at the start of a line or buildvm ignores them! */ | ||
148 | |||
149 | /* Barrier to prevent using operands across PHIs. */ | ||
150 | #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD | ||
151 | |||
152 | /* Barrier to prevent folding across a GC step. | ||
153 | ** GC steps can only happen at the head of a trace and at LOOP. | ||
154 | ** And the GC is only driven forward if there is at least one allocation. | ||
155 | */ | ||
156 | #define gcstep_barrier(J, ref) \ | ||
157 | ((ref) < J->chain[IR_LOOP] && \ | ||
158 | (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \ | ||
159 | J->chain[IR_TNEW] || J->chain[IR_TDUP] || \ | ||
160 | J->chain[IR_CNEW] || J->chain[IR_CNEWI] || J->chain[IR_TOSTR])) | ||
161 | |||
162 | /* -- Constant folding for FP numbers ------------------------------------- */ | ||
163 | |||
164 | LJFOLD(ADD KNUM KNUM) | ||
165 | LJFOLD(SUB KNUM KNUM) | ||
166 | LJFOLD(MUL KNUM KNUM) | ||
167 | LJFOLD(DIV KNUM KNUM) | ||
168 | LJFOLD(NEG KNUM KNUM) | ||
169 | LJFOLD(ABS KNUM KNUM) | ||
170 | LJFOLD(ATAN2 KNUM KNUM) | ||
171 | LJFOLD(LDEXP KNUM KNUM) | ||
172 | LJFOLD(MIN KNUM KNUM) | ||
173 | LJFOLD(MAX KNUM KNUM) | ||
174 | LJFOLDF(kfold_numarith) | ||
175 | { | ||
176 | lua_Number a = knumleft; | ||
177 | lua_Number b = knumright; | ||
178 | lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD); | ||
179 | return lj_ir_knum(J, y); | ||
180 | } | ||
181 | |||
182 | LJFOLD(LDEXP KNUM KINT) | ||
183 | LJFOLDF(kfold_ldexp) | ||
184 | { | ||
185 | #if LJ_TARGET_X86ORX64 | ||
186 | UNUSED(J); | ||
187 | return NEXTFOLD; | ||
188 | #else | ||
189 | return lj_ir_knum(J, ldexp(knumleft, fright->i)); | ||
190 | #endif | ||
191 | } | ||
192 | |||
193 | LJFOLD(FPMATH KNUM any) | ||
194 | LJFOLDF(kfold_fpmath) | ||
195 | { | ||
196 | lua_Number a = knumleft; | ||
197 | lua_Number y = lj_vm_foldfpm(a, fins->op2); | ||
198 | return lj_ir_knum(J, y); | ||
199 | } | ||
200 | |||
201 | LJFOLD(POW KNUM KINT) | ||
202 | LJFOLDF(kfold_numpow) | ||
203 | { | ||
204 | lua_Number a = knumleft; | ||
205 | lua_Number b = (lua_Number)fright->i; | ||
206 | lua_Number y = lj_vm_foldarith(a, b, IR_POW - IR_ADD); | ||
207 | return lj_ir_knum(J, y); | ||
208 | } | ||
209 | |||
210 | /* Must not use kfold_kref for numbers (could be NaN). */ | ||
211 | LJFOLD(EQ KNUM KNUM) | ||
212 | LJFOLD(NE KNUM KNUM) | ||
213 | LJFOLD(LT KNUM KNUM) | ||
214 | LJFOLD(GE KNUM KNUM) | ||
215 | LJFOLD(LE KNUM KNUM) | ||
216 | LJFOLD(GT KNUM KNUM) | ||
217 | LJFOLD(ULT KNUM KNUM) | ||
218 | LJFOLD(UGE KNUM KNUM) | ||
219 | LJFOLD(ULE KNUM KNUM) | ||
220 | LJFOLD(UGT KNUM KNUM) | ||
221 | LJFOLDF(kfold_numcomp) | ||
222 | { | ||
223 | return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o)); | ||
224 | } | ||
225 | |||
226 | /* -- Constant folding for 32 bit integers -------------------------------- */ | ||
227 | |||
228 | static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op) | ||
229 | { | ||
230 | switch (op) { | ||
231 | case IR_ADD: k1 += k2; break; | ||
232 | case IR_SUB: k1 -= k2; break; | ||
233 | case IR_MUL: k1 *= k2; break; | ||
234 | case IR_MOD: k1 = lj_vm_modi(k1, k2); break; | ||
235 | case IR_NEG: k1 = -k1; break; | ||
236 | case IR_BAND: k1 &= k2; break; | ||
237 | case IR_BOR: k1 |= k2; break; | ||
238 | case IR_BXOR: k1 ^= k2; break; | ||
239 | case IR_BSHL: k1 <<= (k2 & 31); break; | ||
240 | case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break; | ||
241 | case IR_BSAR: k1 >>= (k2 & 31); break; | ||
242 | case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break; | ||
243 | case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break; | ||
244 | case IR_MIN: k1 = k1 < k2 ? k1 : k2; break; | ||
245 | case IR_MAX: k1 = k1 > k2 ? k1 : k2; break; | ||
246 | default: lua_assert(0); break; | ||
247 | } | ||
248 | return k1; | ||
249 | } | ||
250 | |||
251 | LJFOLD(ADD KINT KINT) | ||
252 | LJFOLD(SUB KINT KINT) | ||
253 | LJFOLD(MUL KINT KINT) | ||
254 | LJFOLD(MOD KINT KINT) | ||
255 | LJFOLD(NEG KINT KINT) | ||
256 | LJFOLD(BAND KINT KINT) | ||
257 | LJFOLD(BOR KINT KINT) | ||
258 | LJFOLD(BXOR KINT KINT) | ||
259 | LJFOLD(BSHL KINT KINT) | ||
260 | LJFOLD(BSHR KINT KINT) | ||
261 | LJFOLD(BSAR KINT KINT) | ||
262 | LJFOLD(BROL KINT KINT) | ||
263 | LJFOLD(BROR KINT KINT) | ||
264 | LJFOLD(MIN KINT KINT) | ||
265 | LJFOLD(MAX KINT KINT) | ||
266 | LJFOLDF(kfold_intarith) | ||
267 | { | ||
268 | return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o)); | ||
269 | } | ||
270 | |||
271 | LJFOLD(ADDOV KINT KINT) | ||
272 | LJFOLD(SUBOV KINT KINT) | ||
273 | LJFOLD(MULOV KINT KINT) | ||
274 | LJFOLDF(kfold_intovarith) | ||
275 | { | ||
276 | lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i, | ||
277 | fins->o - IR_ADDOV); | ||
278 | int32_t k = lj_num2int(n); | ||
279 | if (n != (lua_Number)k) | ||
280 | return FAILFOLD; | ||
281 | return INTFOLD(k); | ||
282 | } | ||
283 | |||
284 | LJFOLD(BNOT KINT) | ||
285 | LJFOLDF(kfold_bnot) | ||
286 | { | ||
287 | return INTFOLD(~fleft->i); | ||
288 | } | ||
289 | |||
290 | LJFOLD(BSWAP KINT) | ||
291 | LJFOLDF(kfold_bswap) | ||
292 | { | ||
293 | return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i)); | ||
294 | } | ||
295 | |||
296 | LJFOLD(LT KINT KINT) | ||
297 | LJFOLD(GE KINT KINT) | ||
298 | LJFOLD(LE KINT KINT) | ||
299 | LJFOLD(GT KINT KINT) | ||
300 | LJFOLD(ULT KINT KINT) | ||
301 | LJFOLD(UGE KINT KINT) | ||
302 | LJFOLD(ULE KINT KINT) | ||
303 | LJFOLD(UGT KINT KINT) | ||
304 | LJFOLD(ABC KINT KINT) | ||
305 | LJFOLDF(kfold_intcomp) | ||
306 | { | ||
307 | int32_t a = fleft->i, b = fright->i; | ||
308 | switch ((IROp)fins->o) { | ||
309 | case IR_LT: return CONDFOLD(a < b); | ||
310 | case IR_GE: return CONDFOLD(a >= b); | ||
311 | case IR_LE: return CONDFOLD(a <= b); | ||
312 | case IR_GT: return CONDFOLD(a > b); | ||
313 | case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b); | ||
314 | case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b); | ||
315 | case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b); | ||
316 | case IR_ABC: | ||
317 | case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b); | ||
318 | default: lua_assert(0); return FAILFOLD; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | LJFOLD(UGE any KINT) | ||
323 | LJFOLDF(kfold_intcomp0) | ||
324 | { | ||
325 | if (fright->i == 0) | ||
326 | return DROPFOLD; | ||
327 | return NEXTFOLD; | ||
328 | } | ||
329 | |||
330 | /* -- Constant folding for 64 bit integers -------------------------------- */ | ||
331 | |||
332 | static uint64_t kfold_int64arith(uint64_t k1, uint64_t k2, IROp op) | ||
333 | { | ||
334 | switch (op) { | ||
335 | #if LJ_64 || LJ_HASFFI | ||
336 | case IR_ADD: k1 += k2; break; | ||
337 | case IR_SUB: k1 -= k2; break; | ||
338 | #endif | ||
339 | #if LJ_HASFFI | ||
340 | case IR_MUL: k1 *= k2; break; | ||
341 | case IR_BAND: k1 &= k2; break; | ||
342 | case IR_BOR: k1 |= k2; break; | ||
343 | case IR_BXOR: k1 ^= k2; break; | ||
344 | #endif | ||
345 | default: UNUSED(k2); lua_assert(0); break; | ||
346 | } | ||
347 | return k1; | ||
348 | } | ||
349 | |||
350 | LJFOLD(ADD KINT64 KINT64) | ||
351 | LJFOLD(SUB KINT64 KINT64) | ||
352 | LJFOLD(MUL KINT64 KINT64) | ||
353 | LJFOLD(BAND KINT64 KINT64) | ||
354 | LJFOLD(BOR KINT64 KINT64) | ||
355 | LJFOLD(BXOR KINT64 KINT64) | ||
356 | LJFOLDF(kfold_int64arith) | ||
357 | { | ||
358 | return INT64FOLD(kfold_int64arith(ir_k64(fleft)->u64, | ||
359 | ir_k64(fright)->u64, (IROp)fins->o)); | ||
360 | } | ||
361 | |||
362 | LJFOLD(DIV KINT64 KINT64) | ||
363 | LJFOLD(MOD KINT64 KINT64) | ||
364 | LJFOLD(POW KINT64 KINT64) | ||
365 | LJFOLDF(kfold_int64arith2) | ||
366 | { | ||
367 | #if LJ_HASFFI | ||
368 | uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64; | ||
369 | if (irt_isi64(fins->t)) { | ||
370 | k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) : | ||
371 | fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) : | ||
372 | lj_carith_powi64((int64_t)k1, (int64_t)k2); | ||
373 | } else { | ||
374 | k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) : | ||
375 | fins->o == IR_MOD ? lj_carith_modu64(k1, k2) : | ||
376 | lj_carith_powu64(k1, k2); | ||
377 | } | ||
378 | return INT64FOLD(k1); | ||
379 | #else | ||
380 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
381 | #endif | ||
382 | } | ||
383 | |||
384 | LJFOLD(BSHL KINT64 KINT) | ||
385 | LJFOLD(BSHR KINT64 KINT) | ||
386 | LJFOLD(BSAR KINT64 KINT) | ||
387 | LJFOLD(BROL KINT64 KINT) | ||
388 | LJFOLD(BROR KINT64 KINT) | ||
389 | LJFOLDF(kfold_int64shift) | ||
390 | { | ||
391 | #if LJ_HASFFI || LJ_64 | ||
392 | uint64_t k = ir_k64(fleft)->u64; | ||
393 | int32_t sh = (fright->i & 63); | ||
394 | switch ((IROp)fins->o) { | ||
395 | case IR_BSHL: k <<= sh; break; | ||
396 | #if LJ_HASFFI | ||
397 | case IR_BSHR: k >>= sh; break; | ||
398 | case IR_BSAR: k = (uint64_t)((int64_t)k >> sh); break; | ||
399 | case IR_BROL: k = lj_rol(k, sh); break; | ||
400 | case IR_BROR: k = lj_ror(k, sh); break; | ||
401 | #endif | ||
402 | default: lua_assert(0); break; | ||
403 | } | ||
404 | return INT64FOLD(k); | ||
405 | #else | ||
406 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
407 | #endif | ||
408 | } | ||
409 | |||
410 | LJFOLD(BNOT KINT64) | ||
411 | LJFOLDF(kfold_bnot64) | ||
412 | { | ||
413 | #if LJ_HASFFI | ||
414 | return INT64FOLD(~ir_k64(fleft)->u64); | ||
415 | #else | ||
416 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
417 | #endif | ||
418 | } | ||
419 | |||
420 | LJFOLD(BSWAP KINT64) | ||
421 | LJFOLDF(kfold_bswap64) | ||
422 | { | ||
423 | #if LJ_HASFFI | ||
424 | return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64)); | ||
425 | #else | ||
426 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
427 | #endif | ||
428 | } | ||
429 | |||
430 | LJFOLD(LT KINT64 KINT) | ||
431 | LJFOLD(GE KINT64 KINT) | ||
432 | LJFOLD(LE KINT64 KINT) | ||
433 | LJFOLD(GT KINT64 KINT) | ||
434 | LJFOLD(ULT KINT64 KINT) | ||
435 | LJFOLD(UGE KINT64 KINT) | ||
436 | LJFOLD(ULE KINT64 KINT) | ||
437 | LJFOLD(UGT KINT64 KINT) | ||
438 | LJFOLDF(kfold_int64comp) | ||
439 | { | ||
440 | #if LJ_HASFFI | ||
441 | uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64; | ||
442 | switch ((IROp)fins->o) { | ||
443 | case IR_LT: return CONDFOLD(a < b); | ||
444 | case IR_GE: return CONDFOLD(a >= b); | ||
445 | case IR_LE: return CONDFOLD(a <= b); | ||
446 | case IR_GT: return CONDFOLD(a > b); | ||
447 | case IR_ULT: return CONDFOLD((uint64_t)a < (uint64_t)b); | ||
448 | case IR_UGE: return CONDFOLD((uint64_t)a >= (uint64_t)b); | ||
449 | case IR_ULE: return CONDFOLD((uint64_t)a <= (uint64_t)b); | ||
450 | case IR_UGT: return CONDFOLD((uint64_t)a > (uint64_t)b); | ||
451 | default: lua_assert(0); return FAILFOLD; | ||
452 | } | ||
453 | #else | ||
454 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
455 | #endif | ||
456 | } | ||
457 | |||
458 | LJFOLD(UGE any KINT64) | ||
459 | LJFOLDF(kfold_int64comp0) | ||
460 | { | ||
461 | #if LJ_HASFFI | ||
462 | if (ir_k64(fright)->u64 == 0) | ||
463 | return DROPFOLD; | ||
464 | return NEXTFOLD; | ||
465 | #else | ||
466 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
467 | #endif | ||
468 | } | ||
469 | |||
470 | /* -- Constant folding for strings ---------------------------------------- */ | ||
471 | |||
472 | LJFOLD(SNEW KKPTR KINT) | ||
473 | LJFOLDF(kfold_snew_kptr) | ||
474 | { | ||
475 | GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i); | ||
476 | return lj_ir_kstr(J, s); | ||
477 | } | ||
478 | |||
479 | LJFOLD(SNEW any KINT) | ||
480 | LJFOLDF(kfold_snew_empty) | ||
481 | { | ||
482 | if (fright->i == 0) | ||
483 | return lj_ir_kstr(J, &J2G(J)->strempty); | ||
484 | return NEXTFOLD; | ||
485 | } | ||
486 | |||
487 | LJFOLD(STRREF KGC KINT) | ||
488 | LJFOLDF(kfold_strref) | ||
489 | { | ||
490 | GCstr *str = ir_kstr(fleft); | ||
491 | lua_assert((MSize)fright->i < str->len); | ||
492 | return lj_ir_kkptr(J, (char *)strdata(str) + fright->i); | ||
493 | } | ||
494 | |||
495 | LJFOLD(STRREF SNEW any) | ||
496 | LJFOLDF(kfold_strref_snew) | ||
497 | { | ||
498 | PHIBARRIER(fleft); | ||
499 | if (irref_isk(fins->op2) && fright->i == 0) { | ||
500 | return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */ | ||
501 | } else { | ||
502 | /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */ | ||
503 | IRIns *ir = IR(fleft->op1); | ||
504 | IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */ | ||
505 | lua_assert(ir->o == IR_STRREF); | ||
506 | PHIBARRIER(ir); | ||
507 | fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */ | ||
508 | fins->op1 = str; | ||
509 | fins->ot = IRT(IR_STRREF, IRT_P32); | ||
510 | return RETRYFOLD; | ||
511 | } | ||
512 | return NEXTFOLD; | ||
513 | } | ||
514 | |||
515 | LJFOLD(CALLN CARG IRCALL_lj_str_cmp) | ||
516 | LJFOLDF(kfold_strcmp) | ||
517 | { | ||
518 | if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) { | ||
519 | GCstr *a = ir_kstr(IR(fleft->op1)); | ||
520 | GCstr *b = ir_kstr(IR(fleft->op2)); | ||
521 | return INTFOLD(lj_str_cmp(a, b)); | ||
522 | } | ||
523 | return NEXTFOLD; | ||
524 | } | ||
525 | |||
526 | /* -- Constant folding of pointer arithmetic ------------------------------ */ | ||
527 | |||
528 | LJFOLD(ADD KGC KINT) | ||
529 | LJFOLD(ADD KGC KINT64) | ||
530 | LJFOLDF(kfold_add_kgc) | ||
531 | { | ||
532 | GCobj *o = ir_kgc(fleft); | ||
533 | #if LJ_64 | ||
534 | ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64; | ||
535 | #else | ||
536 | ptrdiff_t ofs = fright->i; | ||
537 | #endif | ||
538 | return lj_ir_kkptr(J, (char *)o + ofs); | ||
539 | } | ||
540 | |||
541 | LJFOLD(ADD KPTR KINT) | ||
542 | LJFOLD(ADD KPTR KINT64) | ||
543 | LJFOLD(ADD KKPTR KINT) | ||
544 | LJFOLD(ADD KKPTR KINT64) | ||
545 | LJFOLDF(kfold_add_kptr) | ||
546 | { | ||
547 | void *p = ir_kptr(fleft); | ||
548 | #if LJ_64 | ||
549 | ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64; | ||
550 | #else | ||
551 | ptrdiff_t ofs = fright->i; | ||
552 | #endif | ||
553 | return lj_ir_kptr_(J, fleft->o, (char *)p + ofs); | ||
554 | } | ||
555 | |||
556 | /* -- Constant folding of conversions ------------------------------------- */ | ||
557 | |||
558 | LJFOLD(TOBIT KNUM KNUM) | ||
559 | LJFOLDF(kfold_tobit) | ||
560 | { | ||
561 | return INTFOLD(lj_num2bit(knumleft)); | ||
562 | } | ||
563 | |||
564 | LJFOLD(CONV KINT IRCONV_NUM_INT) | ||
565 | LJFOLDF(kfold_conv_kint_num) | ||
566 | { | ||
567 | return lj_ir_knum(J, (lua_Number)fleft->i); | ||
568 | } | ||
569 | |||
570 | LJFOLD(CONV KINT IRCONV_NUM_U32) | ||
571 | LJFOLDF(kfold_conv_kintu32_num) | ||
572 | { | ||
573 | return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i); | ||
574 | } | ||
575 | |||
576 | LJFOLD(CONV KINT IRCONV_I64_INT) | ||
577 | LJFOLD(CONV KINT IRCONV_U64_INT) | ||
578 | LJFOLDF(kfold_conv_kint_i64) | ||
579 | { | ||
580 | if ((fins->op2 & IRCONV_SEXT)) | ||
581 | return INT64FOLD((uint64_t)(int64_t)fleft->i); | ||
582 | else | ||
583 | return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i); | ||
584 | } | ||
585 | |||
586 | LJFOLD(CONV KINT64 IRCONV_NUM_I64) | ||
587 | LJFOLDF(kfold_conv_kint64_num_i64) | ||
588 | { | ||
589 | return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64); | ||
590 | } | ||
591 | |||
592 | LJFOLD(CONV KINT64 IRCONV_NUM_U64) | ||
593 | LJFOLDF(kfold_conv_kint64_num_u64) | ||
594 | { | ||
595 | return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64); | ||
596 | } | ||
597 | |||
598 | LJFOLD(CONV KINT64 IRCONV_INT_I64) | ||
599 | LJFOLD(CONV KINT64 IRCONV_U32_I64) | ||
600 | LJFOLDF(kfold_conv_kint64_int_i64) | ||
601 | { | ||
602 | return INTFOLD((int32_t)ir_kint64(fleft)->u64); | ||
603 | } | ||
604 | |||
605 | LJFOLD(CONV KNUM IRCONV_INT_NUM) | ||
606 | LJFOLDF(kfold_conv_knum_int_num) | ||
607 | { | ||
608 | lua_Number n = knumleft; | ||
609 | if (!(fins->op2 & IRCONV_TRUNC)) { | ||
610 | int32_t k = lj_num2int(n); | ||
611 | if (irt_isguard(fins->t) && n != (lua_Number)k) { | ||
612 | /* We're about to create a guard which always fails, like CONV +1.5. | ||
613 | ** Some pathological loops cause this during LICM, e.g.: | ||
614 | ** local x,k,t = 0,1.5,{1,[1.5]=2} | ||
615 | ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end | ||
616 | ** assert(x == 300) | ||
617 | */ | ||
618 | return FAILFOLD; | ||
619 | } | ||
620 | return INTFOLD(k); | ||
621 | } else { | ||
622 | return INTFOLD((int32_t)n); | ||
623 | } | ||
624 | } | ||
625 | |||
626 | LJFOLD(CONV KNUM IRCONV_U32_NUM) | ||
627 | LJFOLDF(kfold_conv_knum_u32_num) | ||
628 | { | ||
629 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
630 | return INTFOLD((int32_t)(uint32_t)knumleft); | ||
631 | } | ||
632 | |||
633 | LJFOLD(CONV KNUM IRCONV_I64_NUM) | ||
634 | LJFOLDF(kfold_conv_knum_i64_num) | ||
635 | { | ||
636 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
637 | return INT64FOLD((uint64_t)(int64_t)knumleft); | ||
638 | } | ||
639 | |||
640 | LJFOLD(CONV KNUM IRCONV_U64_NUM) | ||
641 | LJFOLDF(kfold_conv_knum_u64_num) | ||
642 | { | ||
643 | lua_assert((fins->op2 & IRCONV_TRUNC)); | ||
644 | return INT64FOLD(lj_num2u64(knumleft)); | ||
645 | } | ||
646 | |||
647 | LJFOLD(TOSTR KNUM) | ||
648 | LJFOLDF(kfold_tostr_knum) | ||
649 | { | ||
650 | return lj_ir_kstr(J, lj_str_fromnum(J->L, &knumleft)); | ||
651 | } | ||
652 | |||
653 | LJFOLD(TOSTR KINT) | ||
654 | LJFOLDF(kfold_tostr_kint) | ||
655 | { | ||
656 | return lj_ir_kstr(J, lj_str_fromint(J->L, fleft->i)); | ||
657 | } | ||
658 | |||
659 | LJFOLD(STRTO KGC) | ||
660 | LJFOLDF(kfold_strto) | ||
661 | { | ||
662 | TValue n; | ||
663 | if (lj_str_tonum(ir_kstr(fleft), &n)) | ||
664 | return lj_ir_knum(J, numV(&n)); | ||
665 | return FAILFOLD; | ||
666 | } | ||
667 | |||
668 | /* -- Constant folding of equality checks --------------------------------- */ | ||
669 | |||
670 | /* Don't constant-fold away FLOAD checks against KNULL. */ | ||
671 | LJFOLD(EQ FLOAD KNULL) | ||
672 | LJFOLD(NE FLOAD KNULL) | ||
673 | LJFOLDX(lj_opt_cse) | ||
674 | |||
675 | /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */ | ||
676 | LJFOLD(EQ any KNULL) | ||
677 | LJFOLD(NE any KNULL) | ||
678 | LJFOLD(EQ KNULL any) | ||
679 | LJFOLD(NE KNULL any) | ||
680 | LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */ | ||
681 | LJFOLD(NE KINT KINT) | ||
682 | LJFOLD(EQ KINT64 KINT64) | ||
683 | LJFOLD(NE KINT64 KINT64) | ||
684 | LJFOLD(EQ KGC KGC) | ||
685 | LJFOLD(NE KGC KGC) | ||
686 | LJFOLDF(kfold_kref) | ||
687 | { | ||
688 | return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE)); | ||
689 | } | ||
690 | |||
691 | /* -- Algebraic shortcuts ------------------------------------------------- */ | ||
692 | |||
693 | LJFOLD(FPMATH FPMATH IRFPM_FLOOR) | ||
694 | LJFOLD(FPMATH FPMATH IRFPM_CEIL) | ||
695 | LJFOLD(FPMATH FPMATH IRFPM_TRUNC) | ||
696 | LJFOLDF(shortcut_round) | ||
697 | { | ||
698 | IRFPMathOp op = (IRFPMathOp)fleft->op2; | ||
699 | if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC) | ||
700 | return LEFTFOLD; /* round(round_left(x)) = round_left(x) */ | ||
701 | return NEXTFOLD; | ||
702 | } | ||
703 | |||
704 | LJFOLD(ABS ABS KNUM) | ||
705 | LJFOLDF(shortcut_left) | ||
706 | { | ||
707 | return LEFTFOLD; /* f(g(x)) ==> g(x) */ | ||
708 | } | ||
709 | |||
710 | LJFOLD(ABS NEG KNUM) | ||
711 | LJFOLDF(shortcut_dropleft) | ||
712 | { | ||
713 | PHIBARRIER(fleft); | ||
714 | fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */ | ||
715 | return RETRYFOLD; | ||
716 | } | ||
717 | |||
718 | /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */ | ||
719 | LJFOLD(NEG NEG any) | ||
720 | LJFOLD(BNOT BNOT) | ||
721 | LJFOLD(BSWAP BSWAP) | ||
722 | LJFOLDF(shortcut_leftleft) | ||
723 | { | ||
724 | PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */ | ||
725 | return fleft->op1; /* f(g(x)) ==> x */ | ||
726 | } | ||
727 | |||
728 | /* -- FP algebraic simplifications ---------------------------------------- */ | ||
729 | |||
730 | /* FP arithmetic is tricky -- there's not much to simplify. | ||
731 | ** Please note the following common pitfalls before sending "improvements": | ||
732 | ** x+0 ==> x is INVALID for x=-0 | ||
733 | ** 0-x ==> -x is INVALID for x=+0 | ||
734 | ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN | ||
735 | */ | ||
736 | |||
737 | LJFOLD(ADD NEG any) | ||
738 | LJFOLDF(simplify_numadd_negx) | ||
739 | { | ||
740 | PHIBARRIER(fleft); | ||
741 | fins->o = IR_SUB; /* (-a) + b ==> b - a */ | ||
742 | fins->op1 = fins->op2; | ||
743 | fins->op2 = fleft->op1; | ||
744 | return RETRYFOLD; | ||
745 | } | ||
746 | |||
747 | LJFOLD(ADD any NEG) | ||
748 | LJFOLDF(simplify_numadd_xneg) | ||
749 | { | ||
750 | PHIBARRIER(fright); | ||
751 | fins->o = IR_SUB; /* a + (-b) ==> a - b */ | ||
752 | fins->op2 = fright->op1; | ||
753 | return RETRYFOLD; | ||
754 | } | ||
755 | |||
756 | LJFOLD(SUB any KNUM) | ||
757 | LJFOLDF(simplify_numsub_k) | ||
758 | { | ||
759 | lua_Number n = knumright; | ||
760 | if (n == 0.0) /* x - (+-0) ==> x */ | ||
761 | return LEFTFOLD; | ||
762 | return NEXTFOLD; | ||
763 | } | ||
764 | |||
765 | LJFOLD(SUB NEG KNUM) | ||
766 | LJFOLDF(simplify_numsub_negk) | ||
767 | { | ||
768 | PHIBARRIER(fleft); | ||
769 | fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */ | ||
770 | fins->op1 = (IRRef1)lj_ir_knum(J, -knumright); | ||
771 | return RETRYFOLD; | ||
772 | } | ||
773 | |||
774 | LJFOLD(SUB any NEG) | ||
775 | LJFOLDF(simplify_numsub_xneg) | ||
776 | { | ||
777 | PHIBARRIER(fright); | ||
778 | fins->o = IR_ADD; /* a - (-b) ==> a + b */ | ||
779 | fins->op2 = fright->op1; | ||
780 | return RETRYFOLD; | ||
781 | } | ||
782 | |||
783 | LJFOLD(MUL any KNUM) | ||
784 | LJFOLD(DIV any KNUM) | ||
785 | LJFOLDF(simplify_nummuldiv_k) | ||
786 | { | ||
787 | lua_Number n = knumright; | ||
788 | if (n == 1.0) { /* x o 1 ==> x */ | ||
789 | return LEFTFOLD; | ||
790 | } else if (n == -1.0) { /* x o -1 ==> -x */ | ||
791 | fins->o = IR_NEG; | ||
792 | fins->op2 = (IRRef1)lj_ir_knum_neg(J); | ||
793 | return RETRYFOLD; | ||
794 | } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */ | ||
795 | fins->o = IR_ADD; | ||
796 | fins->op2 = fins->op1; | ||
797 | return RETRYFOLD; | ||
798 | } | ||
799 | return NEXTFOLD; | ||
800 | } | ||
801 | |||
802 | LJFOLD(MUL NEG KNUM) | ||
803 | LJFOLD(DIV NEG KNUM) | ||
804 | LJFOLDF(simplify_nummuldiv_negk) | ||
805 | { | ||
806 | PHIBARRIER(fleft); | ||
807 | fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */ | ||
808 | fins->op2 = (IRRef1)lj_ir_knum(J, -knumright); | ||
809 | return RETRYFOLD; | ||
810 | } | ||
811 | |||
812 | LJFOLD(MUL NEG NEG) | ||
813 | LJFOLD(DIV NEG NEG) | ||
814 | LJFOLDF(simplify_nummuldiv_negneg) | ||
815 | { | ||
816 | PHIBARRIER(fleft); | ||
817 | PHIBARRIER(fright); | ||
818 | fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */ | ||
819 | fins->op2 = fright->op1; | ||
820 | return RETRYFOLD; | ||
821 | } | ||
822 | |||
823 | LJFOLD(POW any KINT) | ||
824 | LJFOLDF(simplify_numpow_xk) | ||
825 | { | ||
826 | int32_t k = fright->i; | ||
827 | TRef ref = fins->op1; | ||
828 | if (k == 0) /* x ^ 0 ==> 1 */ | ||
829 | return lj_ir_knum_one(J); /* Result must be a number, not an int. */ | ||
830 | if (k == 1) /* x ^ 1 ==> x */ | ||
831 | return LEFTFOLD; | ||
832 | if ((uint32_t)(k+65536) > 2*65536u) /* Limit code explosion. */ | ||
833 | return NEXTFOLD; | ||
834 | if (k < 0) { /* x ^ (-k) ==> (1/x) ^ k. */ | ||
835 | ref = emitir(IRTN(IR_DIV), lj_ir_knum_one(J), ref); | ||
836 | k = -k; | ||
837 | } | ||
838 | /* Unroll x^k for 1 <= k <= 65536. */ | ||
839 | for (; (k & 1) == 0; k >>= 1) /* Handle leading zeros. */ | ||
840 | ref = emitir(IRTN(IR_MUL), ref, ref); | ||
841 | if ((k >>= 1) != 0) { /* Handle trailing bits. */ | ||
842 | TRef tmp = emitir(IRTN(IR_MUL), ref, ref); | ||
843 | for (; k != 1; k >>= 1) { | ||
844 | if (k & 1) | ||
845 | ref = emitir(IRTN(IR_MUL), ref, tmp); | ||
846 | tmp = emitir(IRTN(IR_MUL), tmp, tmp); | ||
847 | } | ||
848 | ref = emitir(IRTN(IR_MUL), ref, tmp); | ||
849 | } | ||
850 | return ref; | ||
851 | } | ||
852 | |||
853 | LJFOLD(POW KNUM any) | ||
854 | LJFOLDF(simplify_numpow_kx) | ||
855 | { | ||
856 | lua_Number n = knumleft; | ||
857 | if (n == 2.0) { /* 2.0 ^ i ==> ldexp(1.0, tonum(i)) */ | ||
858 | fins->o = IR_CONV; | ||
859 | #if LJ_TARGET_X86ORX64 | ||
860 | fins->op1 = fins->op2; | ||
861 | fins->op2 = IRCONV_NUM_INT; | ||
862 | fins->op2 = (IRRef1)lj_opt_fold(J); | ||
863 | #endif | ||
864 | fins->op1 = (IRRef1)lj_ir_knum_one(J); | ||
865 | fins->o = IR_LDEXP; | ||
866 | return RETRYFOLD; | ||
867 | } | ||
868 | return NEXTFOLD; | ||
869 | } | ||
870 | |||
871 | /* -- Simplify conversions ------------------------------------------------ */ | ||
872 | |||
873 | LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */ | ||
874 | LJFOLDF(shortcut_conv_num_int) | ||
875 | { | ||
876 | PHIBARRIER(fleft); | ||
877 | /* Only safe with a guarded conversion to int. */ | ||
878 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t)) | ||
879 | return fleft->op1; /* f(g(x)) ==> x */ | ||
880 | return NEXTFOLD; | ||
881 | } | ||
882 | |||
883 | LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */ | ||
884 | LJFOLDF(simplify_conv_int_num) | ||
885 | { | ||
886 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
887 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) | ||
888 | return fleft->op1; | ||
889 | return NEXTFOLD; | ||
890 | } | ||
891 | |||
892 | LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32*/ | ||
893 | LJFOLDF(simplify_conv_u32_num) | ||
894 | { | ||
895 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
896 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) | ||
897 | return fleft->op1; | ||
898 | return NEXTFOLD; | ||
899 | } | ||
900 | |||
901 | LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32*/ | ||
902 | LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32*/ | ||
903 | LJFOLDF(simplify_conv_i64_num) | ||
904 | { | ||
905 | PHIBARRIER(fleft); | ||
906 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) { | ||
907 | /* Reduce to a sign-extension. */ | ||
908 | fins->op1 = fleft->op1; | ||
909 | fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT); | ||
910 | return RETRYFOLD; | ||
911 | } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
912 | #if LJ_TARGET_X64 | ||
913 | return fleft->op1; | ||
914 | #else | ||
915 | /* Reduce to a zero-extension. */ | ||
916 | fins->op1 = fleft->op1; | ||
917 | fins->op2 = (IRT_I64<<5)|IRT_U32; | ||
918 | return RETRYFOLD; | ||
919 | #endif | ||
920 | } | ||
921 | return NEXTFOLD; | ||
922 | } | ||
923 | |||
924 | LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT */ | ||
925 | LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT */ | ||
926 | LJFOLDF(simplify_conv_int_i64) | ||
927 | { | ||
928 | PHIBARRIER(fleft); | ||
929 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) | ||
930 | return fleft->op1; | ||
931 | return NEXTFOLD; | ||
932 | } | ||
933 | |||
934 | LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */ | ||
935 | LJFOLDF(simplify_conv_flt_num) | ||
936 | { | ||
937 | PHIBARRIER(fleft); | ||
938 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT) | ||
939 | return fleft->op1; | ||
940 | return NEXTFOLD; | ||
941 | } | ||
942 | |||
943 | /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | ||
944 | LJFOLD(TOBIT CONV KNUM) | ||
945 | LJFOLDF(simplify_tobit_conv) | ||
946 | { | ||
947 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || | ||
948 | (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) { | ||
949 | /* Fold even across PHI to avoid expensive num->int conversions in loop. */ | ||
950 | lua_assert(irt_isnum(fleft->t)); | ||
951 | return fleft->op1; | ||
952 | } | ||
953 | return NEXTFOLD; | ||
954 | } | ||
955 | |||
956 | /* Shortcut floor/ceil/round + IRT_NUM <- IRT_INT/IRT_U32 conversion. */ | ||
957 | LJFOLD(FPMATH CONV IRFPM_FLOOR) | ||
958 | LJFOLD(FPMATH CONV IRFPM_CEIL) | ||
959 | LJFOLD(FPMATH CONV IRFPM_TRUNC) | ||
960 | LJFOLDF(simplify_floor_conv) | ||
961 | { | ||
962 | if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT || | ||
963 | (fleft->op2 & IRCONV_SRCMASK) == IRT_U32) | ||
964 | return LEFTFOLD; | ||
965 | return NEXTFOLD; | ||
966 | } | ||
967 | |||
968 | /* Strength reduction of widening. */ | ||
969 | LJFOLD(CONV any IRCONV_I64_INT) | ||
970 | LJFOLD(CONV any IRCONV_U64_INT) | ||
971 | LJFOLDF(simplify_conv_sext) | ||
972 | { | ||
973 | IRRef ref = fins->op1; | ||
974 | int64_t ofs = 0; | ||
975 | if (!(fins->op2 & IRCONV_SEXT)) | ||
976 | return NEXTFOLD; | ||
977 | PHIBARRIER(fleft); | ||
978 | if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t))) | ||
979 | goto ok_reduce; | ||
980 | if (fleft->o == IR_ADD && irref_isk(fleft->op2)) { | ||
981 | ofs = (int64_t)IR(fleft->op2)->i; | ||
982 | ref = fleft->op1; | ||
983 | } | ||
984 | /* Use scalar evolution analysis results to strength-reduce sign-extension. */ | ||
985 | if (ref == J->scev.idx) { | ||
986 | IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop; | ||
987 | lua_assert(irt_isint(J->scev.t)); | ||
988 | if (lo && IR(lo)->i + ofs >= 0) { | ||
989 | ok_reduce: | ||
990 | #if LJ_TARGET_X64 | ||
991 | /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */ | ||
992 | return LEFTFOLD; | ||
993 | #else | ||
994 | /* Reduce to a (cheaper) zero-extension. */ | ||
995 | fins->op2 &= ~IRCONV_SEXT; | ||
996 | return RETRYFOLD; | ||
997 | #endif | ||
998 | } | ||
999 | } | ||
1000 | return NEXTFOLD; | ||
1001 | } | ||
1002 | |||
1003 | /* Strength reduction of narrowing. */ | ||
1004 | LJFOLD(CONV ADD IRCONV_INT_I64) | ||
1005 | LJFOLD(CONV SUB IRCONV_INT_I64) | ||
1006 | LJFOLD(CONV MUL IRCONV_INT_I64) | ||
1007 | LJFOLD(CONV ADD IRCONV_INT_U64) | ||
1008 | LJFOLD(CONV SUB IRCONV_INT_U64) | ||
1009 | LJFOLD(CONV MUL IRCONV_INT_U64) | ||
1010 | LJFOLDF(simplify_conv_narrow) | ||
1011 | { | ||
1012 | IROp op = (IROp)fleft->o; | ||
1013 | IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2; | ||
1014 | PHIBARRIER(fleft); | ||
1015 | op1 = emitir(IRTI(IR_CONV), op1, mode); | ||
1016 | op2 = emitir(IRTI(IR_CONV), op2, mode); | ||
1017 | fins->ot = IRTI(op); | ||
1018 | fins->op1 = op1; | ||
1019 | fins->op2 = op2; | ||
1020 | return RETRYFOLD; | ||
1021 | } | ||
1022 | |||
1023 | /* Special CSE rule for CONV. */ | ||
1024 | LJFOLD(CONV any any) | ||
1025 | LJFOLDF(cse_conv) | ||
1026 | { | ||
1027 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
1028 | IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK); | ||
1029 | uint8_t guard = irt_isguard(fins->t); | ||
1030 | IRRef ref = J->chain[IR_CONV]; | ||
1031 | while (ref > op1) { | ||
1032 | IRIns *ir = IR(ref); | ||
1033 | /* Commoning with stronger checks is ok. */ | ||
1034 | if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 && | ||
1035 | irt_isguard(ir->t) >= guard) | ||
1036 | return ref; | ||
1037 | ref = ir->prev; | ||
1038 | } | ||
1039 | } | ||
1040 | return EMITFOLD; /* No fallthrough to regular CSE. */ | ||
1041 | } | ||
1042 | |||
1043 | /* FP conversion narrowing. */ | ||
1044 | LJFOLD(TOBIT ADD KNUM) | ||
1045 | LJFOLD(TOBIT SUB KNUM) | ||
1046 | LJFOLD(CONV ADD IRCONV_INT_NUM) | ||
1047 | LJFOLD(CONV SUB IRCONV_INT_NUM) | ||
1048 | LJFOLD(CONV ADD IRCONV_I64_NUM) | ||
1049 | LJFOLD(CONV SUB IRCONV_I64_NUM) | ||
1050 | LJFOLDF(narrow_convert) | ||
1051 | { | ||
1052 | PHIBARRIER(fleft); | ||
1053 | /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */ | ||
1054 | if (J->chain[IR_LOOP]) | ||
1055 | return NEXTFOLD; | ||
1056 | lua_assert(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT); | ||
1057 | return lj_opt_narrow_convert(J); | ||
1058 | } | ||
1059 | |||
1060 | /* -- Integer algebraic simplifications ----------------------------------- */ | ||
1061 | |||
1062 | LJFOLD(ADD any KINT) | ||
1063 | LJFOLD(ADDOV any KINT) | ||
1064 | LJFOLD(SUBOV any KINT) | ||
1065 | LJFOLDF(simplify_intadd_k) | ||
1066 | { | ||
1067 | if (fright->i == 0) /* i o 0 ==> i */ | ||
1068 | return LEFTFOLD; | ||
1069 | return NEXTFOLD; | ||
1070 | } | ||
1071 | |||
1072 | LJFOLD(MULOV any KINT) | ||
1073 | LJFOLDF(simplify_intmul_k) | ||
1074 | { | ||
1075 | if (fright->i == 0) /* i * 0 ==> 0 */ | ||
1076 | return RIGHTFOLD; | ||
1077 | if (fright->i == 1) /* i * 1 ==> i */ | ||
1078 | return LEFTFOLD; | ||
1079 | if (fright->i == 2) { /* i * 2 ==> i + i */ | ||
1080 | fins->o = IR_ADDOV; | ||
1081 | fins->op2 = fins->op1; | ||
1082 | return RETRYFOLD; | ||
1083 | } | ||
1084 | return NEXTFOLD; | ||
1085 | } | ||
1086 | |||
1087 | LJFOLD(SUB any KINT) | ||
1088 | LJFOLDF(simplify_intsub_k) | ||
1089 | { | ||
1090 | if (fright->i == 0) /* i - 0 ==> i */ | ||
1091 | return LEFTFOLD; | ||
1092 | fins->o = IR_ADD; /* i - k ==> i + (-k) */ | ||
1093 | fins->op2 = (IRRef1)lj_ir_kint(J, -fright->i); /* Overflow for -2^31 ok. */ | ||
1094 | return RETRYFOLD; | ||
1095 | } | ||
1096 | |||
1097 | LJFOLD(SUB KINT any) | ||
1098 | LJFOLD(SUB KINT64 any) | ||
1099 | LJFOLDF(simplify_intsub_kleft) | ||
1100 | { | ||
1101 | if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) { | ||
1102 | fins->o = IR_NEG; /* 0 - i ==> -i */ | ||
1103 | fins->op1 = fins->op2; | ||
1104 | return RETRYFOLD; | ||
1105 | } | ||
1106 | return NEXTFOLD; | ||
1107 | } | ||
1108 | |||
1109 | LJFOLD(ADD any KINT64) | ||
1110 | LJFOLDF(simplify_intadd_k64) | ||
1111 | { | ||
1112 | if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */ | ||
1113 | return LEFTFOLD; | ||
1114 | return NEXTFOLD; | ||
1115 | } | ||
1116 | |||
1117 | LJFOLD(SUB any KINT64) | ||
1118 | LJFOLDF(simplify_intsub_k64) | ||
1119 | { | ||
1120 | uint64_t k = ir_kint64(fright)->u64; | ||
1121 | if (k == 0) /* i - 0 ==> i */ | ||
1122 | return LEFTFOLD; | ||
1123 | fins->o = IR_ADD; /* i - k ==> i + (-k) */ | ||
1124 | fins->op2 = (IRRef1)lj_ir_kint64(J, (uint64_t)-(int64_t)k); | ||
1125 | return RETRYFOLD; | ||
1126 | } | ||
1127 | |||
1128 | static TRef simplify_intmul_k(jit_State *J, int32_t k) | ||
1129 | { | ||
1130 | /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2. | ||
1131 | ** But this is mainly intended for simple address arithmetic. | ||
1132 | ** Also it's easier for the backend to optimize the original multiplies. | ||
1133 | */ | ||
1134 | if (k == 1) { /* i * 1 ==> i */ | ||
1135 | return LEFTFOLD; | ||
1136 | } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */ | ||
1137 | fins->o = IR_BSHL; | ||
1138 | fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k)); | ||
1139 | return RETRYFOLD; | ||
1140 | } | ||
1141 | return NEXTFOLD; | ||
1142 | } | ||
1143 | |||
1144 | LJFOLD(MUL any KINT) | ||
1145 | LJFOLDF(simplify_intmul_k32) | ||
1146 | { | ||
1147 | if (fright->i == 0) /* i * 0 ==> 0 */ | ||
1148 | return INTFOLD(0); | ||
1149 | else if (fright->i > 0) | ||
1150 | return simplify_intmul_k(J, fright->i); | ||
1151 | return NEXTFOLD; | ||
1152 | } | ||
1153 | |||
1154 | LJFOLD(MUL any KINT64) | ||
1155 | LJFOLDF(simplify_intmul_k64) | ||
1156 | { | ||
1157 | if (ir_kint64(fright)->u64 == 0) /* i * 0 ==> 0 */ | ||
1158 | return INT64FOLD(0); | ||
1159 | #if LJ_64 | ||
1160 | /* NYI: SPLIT for BSHL and 32 bit backend support. */ | ||
1161 | else if (ir_kint64(fright)->u64 < 0x80000000u) | ||
1162 | return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64); | ||
1163 | #endif | ||
1164 | return NEXTFOLD; | ||
1165 | } | ||
1166 | |||
1167 | LJFOLD(MOD any KINT) | ||
1168 | LJFOLDF(simplify_intmod_k) | ||
1169 | { | ||
1170 | int32_t k = fright->i; | ||
1171 | lua_assert(k != 0); | ||
1172 | if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */ | ||
1173 | fins->o = IR_BAND; | ||
1174 | fins->op2 = lj_ir_kint(J, k-1); | ||
1175 | return RETRYFOLD; | ||
1176 | } | ||
1177 | return NEXTFOLD; | ||
1178 | } | ||
1179 | |||
1180 | LJFOLD(MOD KINT any) | ||
1181 | LJFOLDF(simplify_intmod_kleft) | ||
1182 | { | ||
1183 | if (fleft->i == 0) | ||
1184 | return INTFOLD(0); | ||
1185 | return NEXTFOLD; | ||
1186 | } | ||
1187 | |||
1188 | LJFOLD(SUB any any) | ||
1189 | LJFOLD(SUBOV any any) | ||
1190 | LJFOLDF(simplify_intsub) | ||
1191 | { | ||
1192 | if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */ | ||
1193 | return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0); | ||
1194 | return NEXTFOLD; | ||
1195 | } | ||
1196 | |||
1197 | LJFOLD(SUB ADD any) | ||
1198 | LJFOLDF(simplify_intsubadd_leftcancel) | ||
1199 | { | ||
1200 | if (!irt_isnum(fins->t)) { | ||
1201 | PHIBARRIER(fleft); | ||
1202 | if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */ | ||
1203 | return fleft->op2; | ||
1204 | if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */ | ||
1205 | return fleft->op1; | ||
1206 | } | ||
1207 | return NEXTFOLD; | ||
1208 | } | ||
1209 | |||
1210 | LJFOLD(SUB SUB any) | ||
1211 | LJFOLDF(simplify_intsubsub_leftcancel) | ||
1212 | { | ||
1213 | if (!irt_isnum(fins->t)) { | ||
1214 | PHIBARRIER(fleft); | ||
1215 | if (fins->op1 == fleft->op1) { /* (i - j) - i ==> 0 - j */ | ||
1216 | fins->op1 = (IRRef1)lj_ir_kint(J, 0); | ||
1217 | fins->op2 = fleft->op2; | ||
1218 | return RETRYFOLD; | ||
1219 | } | ||
1220 | } | ||
1221 | return NEXTFOLD; | ||
1222 | } | ||
1223 | |||
1224 | LJFOLD(SUB any SUB) | ||
1225 | LJFOLDF(simplify_intsubsub_rightcancel) | ||
1226 | { | ||
1227 | if (!irt_isnum(fins->t)) { | ||
1228 | PHIBARRIER(fright); | ||
1229 | if (fins->op1 == fright->op1) /* i - (i - j) ==> j */ | ||
1230 | return fright->op2; | ||
1231 | } | ||
1232 | return NEXTFOLD; | ||
1233 | } | ||
1234 | |||
1235 | LJFOLD(SUB any ADD) | ||
1236 | LJFOLDF(simplify_intsubadd_rightcancel) | ||
1237 | { | ||
1238 | if (!irt_isnum(fins->t)) { | ||
1239 | PHIBARRIER(fright); | ||
1240 | if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */ | ||
1241 | fins->op2 = fright->op2; | ||
1242 | fins->op1 = (IRRef1)lj_ir_kint(J, 0); | ||
1243 | return RETRYFOLD; | ||
1244 | } | ||
1245 | if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */ | ||
1246 | fins->op2 = fright->op1; | ||
1247 | fins->op1 = (IRRef1)lj_ir_kint(J, 0); | ||
1248 | return RETRYFOLD; | ||
1249 | } | ||
1250 | } | ||
1251 | return NEXTFOLD; | ||
1252 | } | ||
1253 | |||
1254 | LJFOLD(SUB ADD ADD) | ||
1255 | LJFOLDF(simplify_intsubaddadd_cancel) | ||
1256 | { | ||
1257 | if (!irt_isnum(fins->t)) { | ||
1258 | PHIBARRIER(fleft); | ||
1259 | PHIBARRIER(fright); | ||
1260 | if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */ | ||
1261 | fins->op1 = fleft->op2; | ||
1262 | fins->op2 = fright->op2; | ||
1263 | return RETRYFOLD; | ||
1264 | } | ||
1265 | if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */ | ||
1266 | fins->op1 = fleft->op2; | ||
1267 | fins->op2 = fright->op1; | ||
1268 | return RETRYFOLD; | ||
1269 | } | ||
1270 | if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */ | ||
1271 | fins->op1 = fleft->op1; | ||
1272 | fins->op2 = fright->op2; | ||
1273 | return RETRYFOLD; | ||
1274 | } | ||
1275 | if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */ | ||
1276 | fins->op1 = fleft->op1; | ||
1277 | fins->op2 = fright->op1; | ||
1278 | return RETRYFOLD; | ||
1279 | } | ||
1280 | } | ||
1281 | return NEXTFOLD; | ||
1282 | } | ||
1283 | |||
1284 | LJFOLD(BAND any KINT) | ||
1285 | LJFOLD(BAND any KINT64) | ||
1286 | LJFOLDF(simplify_band_k) | ||
1287 | { | ||
1288 | int64_t k = fright->o == IR_KINT ? (int64_t)fright->i : | ||
1289 | (int64_t)ir_k64(fright)->u64; | ||
1290 | if (k == 0) /* i & 0 ==> 0 */ | ||
1291 | return RIGHTFOLD; | ||
1292 | if (k == -1) /* i & -1 ==> i */ | ||
1293 | return LEFTFOLD; | ||
1294 | return NEXTFOLD; | ||
1295 | } | ||
1296 | |||
1297 | LJFOLD(BOR any KINT) | ||
1298 | LJFOLD(BOR any KINT64) | ||
1299 | LJFOLDF(simplify_bor_k) | ||
1300 | { | ||
1301 | int64_t k = fright->o == IR_KINT ? (int64_t)fright->i : | ||
1302 | (int64_t)ir_k64(fright)->u64; | ||
1303 | if (k == 0) /* i | 0 ==> i */ | ||
1304 | return LEFTFOLD; | ||
1305 | if (k == -1) /* i | -1 ==> -1 */ | ||
1306 | return RIGHTFOLD; | ||
1307 | return NEXTFOLD; | ||
1308 | } | ||
1309 | |||
1310 | LJFOLD(BXOR any KINT) | ||
1311 | LJFOLD(BXOR any KINT64) | ||
1312 | LJFOLDF(simplify_bxor_k) | ||
1313 | { | ||
1314 | int64_t k = fright->o == IR_KINT ? (int64_t)fright->i : | ||
1315 | (int64_t)ir_k64(fright)->u64; | ||
1316 | if (k == 0) /* i xor 0 ==> i */ | ||
1317 | return LEFTFOLD; | ||
1318 | if (k == -1) { /* i xor -1 ==> ~i */ | ||
1319 | fins->o = IR_BNOT; | ||
1320 | fins->op2 = 0; | ||
1321 | return RETRYFOLD; | ||
1322 | } | ||
1323 | return NEXTFOLD; | ||
1324 | } | ||
1325 | |||
1326 | LJFOLD(BSHL any KINT) | ||
1327 | LJFOLD(BSHR any KINT) | ||
1328 | LJFOLD(BSAR any KINT) | ||
1329 | LJFOLD(BROL any KINT) | ||
1330 | LJFOLD(BROR any KINT) | ||
1331 | LJFOLDF(simplify_shift_ik) | ||
1332 | { | ||
1333 | int32_t mask = irt_is64(fins->t) ? 63 : 31; | ||
1334 | int32_t k = (fright->i & mask); | ||
1335 | if (k == 0) /* i o 0 ==> i */ | ||
1336 | return LEFTFOLD; | ||
1337 | if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */ | ||
1338 | fins->o = IR_ADD; | ||
1339 | fins->op2 = fins->op1; | ||
1340 | return RETRYFOLD; | ||
1341 | } | ||
1342 | if (k != fright->i) { /* i o k ==> i o (k & mask) */ | ||
1343 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
1344 | return RETRYFOLD; | ||
1345 | } | ||
1346 | #ifndef LJ_TARGET_UNIFYROT | ||
1347 | if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */ | ||
1348 | fins->o = IR_BROL; | ||
1349 | fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask); | ||
1350 | return RETRYFOLD; | ||
1351 | } | ||
1352 | #endif | ||
1353 | return NEXTFOLD; | ||
1354 | } | ||
1355 | |||
1356 | LJFOLD(BSHL any BAND) | ||
1357 | LJFOLD(BSHR any BAND) | ||
1358 | LJFOLD(BSAR any BAND) | ||
1359 | LJFOLD(BROL any BAND) | ||
1360 | LJFOLD(BROR any BAND) | ||
1361 | LJFOLDF(simplify_shift_andk) | ||
1362 | { | ||
1363 | IRIns *irk = IR(fright->op2); | ||
1364 | PHIBARRIER(fright); | ||
1365 | if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) && | ||
1366 | irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */ | ||
1367 | int32_t mask = irt_is64(fins->t) ? 63 : 31; | ||
1368 | int32_t k = irk->i & mask; | ||
1369 | if (k == mask) { | ||
1370 | fins->op2 = fright->op1; | ||
1371 | return RETRYFOLD; | ||
1372 | } | ||
1373 | } | ||
1374 | return NEXTFOLD; | ||
1375 | } | ||
1376 | |||
1377 | LJFOLD(BSHL KINT any) | ||
1378 | LJFOLD(BSHR KINT any) | ||
1379 | LJFOLD(BSHL KINT64 any) | ||
1380 | LJFOLD(BSHR KINT64 any) | ||
1381 | LJFOLDF(simplify_shift1_ki) | ||
1382 | { | ||
1383 | int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i : | ||
1384 | (int64_t)ir_k64(fleft)->u64; | ||
1385 | if (k == 0) /* 0 o i ==> 0 */ | ||
1386 | return LEFTFOLD; | ||
1387 | return NEXTFOLD; | ||
1388 | } | ||
1389 | |||
1390 | LJFOLD(BSAR KINT any) | ||
1391 | LJFOLD(BROL KINT any) | ||
1392 | LJFOLD(BROR KINT any) | ||
1393 | LJFOLD(BSAR KINT64 any) | ||
1394 | LJFOLD(BROL KINT64 any) | ||
1395 | LJFOLD(BROR KINT64 any) | ||
1396 | LJFOLDF(simplify_shift2_ki) | ||
1397 | { | ||
1398 | int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i : | ||
1399 | (int64_t)ir_k64(fleft)->u64; | ||
1400 | if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */ | ||
1401 | return LEFTFOLD; | ||
1402 | return NEXTFOLD; | ||
1403 | } | ||
1404 | |||
1405 | LJFOLD(BSHL BAND KINT) | ||
1406 | LJFOLD(BSHR BAND KINT) | ||
1407 | LJFOLD(BROL BAND KINT) | ||
1408 | LJFOLD(BROR BAND KINT) | ||
1409 | LJFOLDF(simplify_shiftk_andk) | ||
1410 | { | ||
1411 | IRIns *irk = IR(fleft->op2); | ||
1412 | PHIBARRIER(fleft); | ||
1413 | if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */ | ||
1414 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1415 | fins->op1 = fleft->op1; | ||
1416 | fins->op1 = (IRRef1)lj_opt_fold(J); | ||
1417 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
1418 | fins->ot = IRTI(IR_BAND); | ||
1419 | return RETRYFOLD; | ||
1420 | } | ||
1421 | return NEXTFOLD; | ||
1422 | } | ||
1423 | |||
1424 | LJFOLD(BAND BSHL KINT) | ||
1425 | LJFOLD(BAND BSHR KINT) | ||
1426 | LJFOLDF(simplify_andk_shiftk) | ||
1427 | { | ||
1428 | IRIns *irk = IR(fleft->op2); | ||
1429 | if (irk->o == IR_KINT && | ||
1430 | kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i) | ||
1431 | return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */ | ||
1432 | return NEXTFOLD; | ||
1433 | } | ||
1434 | |||
1435 | /* -- Reassociation ------------------------------------------------------- */ | ||
1436 | |||
1437 | LJFOLD(ADD ADD KINT) | ||
1438 | LJFOLD(MUL MUL KINT) | ||
1439 | LJFOLD(BAND BAND KINT) | ||
1440 | LJFOLD(BOR BOR KINT) | ||
1441 | LJFOLD(BXOR BXOR KINT) | ||
1442 | LJFOLDF(reassoc_intarith_k) | ||
1443 | { | ||
1444 | IRIns *irk = IR(fleft->op2); | ||
1445 | if (irk->o == IR_KINT) { | ||
1446 | int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o); | ||
1447 | if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */ | ||
1448 | return LEFTFOLD; | ||
1449 | PHIBARRIER(fleft); | ||
1450 | fins->op1 = fleft->op1; | ||
1451 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
1452 | return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */ | ||
1453 | } | ||
1454 | return NEXTFOLD; | ||
1455 | } | ||
1456 | |||
1457 | LJFOLD(ADD ADD KINT64) | ||
1458 | LJFOLD(MUL MUL KINT64) | ||
1459 | LJFOLD(BAND BAND KINT64) | ||
1460 | LJFOLD(BOR BOR KINT64) | ||
1461 | LJFOLD(BXOR BXOR KINT64) | ||
1462 | LJFOLDF(reassoc_intarith_k64) | ||
1463 | { | ||
1464 | #if LJ_HASFFI || LJ_64 | ||
1465 | IRIns *irk = IR(fleft->op2); | ||
1466 | if (irk->o == IR_KINT64) { | ||
1467 | uint64_t k = kfold_int64arith(ir_k64(irk)->u64, | ||
1468 | ir_k64(fright)->u64, (IROp)fins->o); | ||
1469 | PHIBARRIER(fleft); | ||
1470 | fins->op1 = fleft->op1; | ||
1471 | fins->op2 = (IRRef1)lj_ir_kint64(J, k); | ||
1472 | return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */ | ||
1473 | } | ||
1474 | return NEXTFOLD; | ||
1475 | #else | ||
1476 | UNUSED(J); lua_assert(0); return FAILFOLD; | ||
1477 | #endif | ||
1478 | } | ||
1479 | |||
1480 | LJFOLD(MIN MIN any) | ||
1481 | LJFOLD(MAX MAX any) | ||
1482 | LJFOLD(BAND BAND any) | ||
1483 | LJFOLD(BOR BOR any) | ||
1484 | LJFOLDF(reassoc_dup) | ||
1485 | { | ||
1486 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
1487 | return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */ | ||
1488 | return NEXTFOLD; | ||
1489 | } | ||
1490 | |||
1491 | LJFOLD(BXOR BXOR any) | ||
1492 | LJFOLDF(reassoc_bxor) | ||
1493 | { | ||
1494 | PHIBARRIER(fleft); | ||
1495 | if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */ | ||
1496 | return fleft->op2; | ||
1497 | if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */ | ||
1498 | return fleft->op1; | ||
1499 | return NEXTFOLD; | ||
1500 | } | ||
1501 | |||
1502 | LJFOLD(BSHL BSHL KINT) | ||
1503 | LJFOLD(BSHR BSHR KINT) | ||
1504 | LJFOLD(BSAR BSAR KINT) | ||
1505 | LJFOLD(BROL BROL KINT) | ||
1506 | LJFOLD(BROR BROR KINT) | ||
1507 | LJFOLDF(reassoc_shift) | ||
1508 | { | ||
1509 | IRIns *irk = IR(fleft->op2); | ||
1510 | PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */ | ||
1511 | if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */ | ||
1512 | int32_t mask = irt_is64(fins->t) ? 63 : 31; | ||
1513 | int32_t k = (irk->i & mask) + (fright->i & mask); | ||
1514 | if (k > mask) { /* Combined shift too wide? */ | ||
1515 | if (fins->o == IR_BSHL || fins->o == IR_BSHR) | ||
1516 | return mask == 31 ? INTFOLD(0) : INT64FOLD(0); | ||
1517 | else if (fins->o == IR_BSAR) | ||
1518 | k = mask; | ||
1519 | else | ||
1520 | k &= mask; | ||
1521 | } | ||
1522 | fins->op1 = fleft->op1; | ||
1523 | fins->op2 = (IRRef1)lj_ir_kint(J, k); | ||
1524 | return RETRYFOLD; | ||
1525 | } | ||
1526 | return NEXTFOLD; | ||
1527 | } | ||
1528 | |||
1529 | LJFOLD(MIN MIN KNUM) | ||
1530 | LJFOLD(MAX MAX KNUM) | ||
1531 | LJFOLD(MIN MIN KINT) | ||
1532 | LJFOLD(MAX MAX KINT) | ||
1533 | LJFOLDF(reassoc_minmax_k) | ||
1534 | { | ||
1535 | IRIns *irk = IR(fleft->op2); | ||
1536 | if (irk->o == IR_KNUM) { | ||
1537 | lua_Number a = ir_knum(irk)->n; | ||
1538 | lua_Number y = lj_vm_foldarith(a, knumright, fins->o - IR_ADD); | ||
1539 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | ||
1540 | return LEFTFOLD; | ||
1541 | PHIBARRIER(fleft); | ||
1542 | fins->op1 = fleft->op1; | ||
1543 | fins->op2 = (IRRef1)lj_ir_knum(J, y); | ||
1544 | return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ | ||
1545 | } else if (irk->o == IR_KINT) { | ||
1546 | int32_t a = irk->i; | ||
1547 | int32_t y = kfold_intop(a, fright->i, fins->o); | ||
1548 | if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */ | ||
1549 | return LEFTFOLD; | ||
1550 | PHIBARRIER(fleft); | ||
1551 | fins->op1 = fleft->op1; | ||
1552 | fins->op2 = (IRRef1)lj_ir_kint(J, y); | ||
1553 | return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */ | ||
1554 | } | ||
1555 | return NEXTFOLD; | ||
1556 | } | ||
1557 | |||
1558 | LJFOLD(MIN MAX any) | ||
1559 | LJFOLD(MAX MIN any) | ||
1560 | LJFOLDF(reassoc_minmax_left) | ||
1561 | { | ||
1562 | if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2) | ||
1563 | return RIGHTFOLD; /* (b o1 a) o2 b ==> b; (a o1 b) o2 b ==> b */ | ||
1564 | return NEXTFOLD; | ||
1565 | } | ||
1566 | |||
1567 | LJFOLD(MIN any MAX) | ||
1568 | LJFOLD(MAX any MIN) | ||
1569 | LJFOLDF(reassoc_minmax_right) | ||
1570 | { | ||
1571 | if (fins->op1 == fright->op1 || fins->op1 == fright->op2) | ||
1572 | return LEFTFOLD; /* a o2 (a o1 b) ==> a; a o2 (b o1 a) ==> a */ | ||
1573 | return NEXTFOLD; | ||
1574 | } | ||
1575 | |||
1576 | /* -- Array bounds check elimination -------------------------------------- */ | ||
1577 | |||
1578 | /* Eliminate ABC across PHIs to handle t[i-1] forwarding case. | ||
1579 | ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists. | ||
1580 | ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig. | ||
1581 | */ | ||
1582 | LJFOLD(ABC any ADD) | ||
1583 | LJFOLDF(abc_fwd) | ||
1584 | { | ||
1585 | if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { | ||
1586 | if (irref_isk(fright->op2)) { | ||
1587 | IRIns *add2 = IR(fright->op1); | ||
1588 | if (add2->o == IR_ADD && irref_isk(add2->op2) && | ||
1589 | IR(fright->op2)->i == -IR(add2->op2)->i) { | ||
1590 | IRRef ref = J->chain[IR_ABC]; | ||
1591 | IRRef lim = add2->op1; | ||
1592 | if (fins->op1 > lim) lim = fins->op1; | ||
1593 | while (ref > lim) { | ||
1594 | IRIns *ir = IR(ref); | ||
1595 | if (ir->op1 == fins->op1 && ir->op2 == add2->op1) | ||
1596 | return DROPFOLD; | ||
1597 | ref = ir->prev; | ||
1598 | } | ||
1599 | } | ||
1600 | } | ||
1601 | } | ||
1602 | return NEXTFOLD; | ||
1603 | } | ||
1604 | |||
1605 | /* Eliminate ABC for constants. | ||
1606 | ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2)) | ||
1607 | ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2. | ||
1608 | */ | ||
1609 | LJFOLD(ABC any KINT) | ||
1610 | LJFOLDF(abc_k) | ||
1611 | { | ||
1612 | if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) { | ||
1613 | IRRef ref = J->chain[IR_ABC]; | ||
1614 | IRRef asize = fins->op1; | ||
1615 | while (ref > asize) { | ||
1616 | IRIns *ir = IR(ref); | ||
1617 | if (ir->op1 == asize && irref_isk(ir->op2)) { | ||
1618 | int32_t k = IR(ir->op2)->i; | ||
1619 | if (fright->i > k) | ||
1620 | ir->op2 = fins->op2; | ||
1621 | return DROPFOLD; | ||
1622 | } | ||
1623 | ref = ir->prev; | ||
1624 | } | ||
1625 | return EMITFOLD; /* Already performed CSE. */ | ||
1626 | } | ||
1627 | return NEXTFOLD; | ||
1628 | } | ||
1629 | |||
1630 | /* Eliminate invariant ABC inside loop. */ | ||
1631 | LJFOLD(ABC any any) | ||
1632 | LJFOLDF(abc_invar) | ||
1633 | { | ||
1634 | if (!irt_isint(fins->t) && J->chain[IR_LOOP]) /* Currently marked as PTR. */ | ||
1635 | return DROPFOLD; | ||
1636 | return NEXTFOLD; | ||
1637 | } | ||
1638 | |||
1639 | /* -- Commutativity ------------------------------------------------------- */ | ||
1640 | |||
1641 | /* The refs of commutative ops are canonicalized. Lower refs go to the right. | ||
1642 | ** Rationale behind this: | ||
1643 | ** - It (also) moves constants to the right. | ||
1644 | ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices). | ||
1645 | ** - It helps CSE to find more matches. | ||
1646 | ** - The assembler generates better code with constants at the right. | ||
1647 | */ | ||
1648 | |||
1649 | LJFOLD(ADD any any) | ||
1650 | LJFOLD(MUL any any) | ||
1651 | LJFOLD(ADDOV any any) | ||
1652 | LJFOLD(MULOV any any) | ||
1653 | LJFOLDF(comm_swap) | ||
1654 | { | ||
1655 | if (fins->op1 < fins->op2) { /* Move lower ref to the right. */ | ||
1656 | IRRef1 tmp = fins->op1; | ||
1657 | fins->op1 = fins->op2; | ||
1658 | fins->op2 = tmp; | ||
1659 | return RETRYFOLD; | ||
1660 | } | ||
1661 | return NEXTFOLD; | ||
1662 | } | ||
1663 | |||
1664 | LJFOLD(EQ any any) | ||
1665 | LJFOLD(NE any any) | ||
1666 | LJFOLDF(comm_equal) | ||
1667 | { | ||
1668 | /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */ | ||
1669 | if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) | ||
1670 | return CONDFOLD(fins->o == IR_EQ); | ||
1671 | return fold_comm_swap(J); | ||
1672 | } | ||
1673 | |||
1674 | LJFOLD(LT any any) | ||
1675 | LJFOLD(GE any any) | ||
1676 | LJFOLD(LE any any) | ||
1677 | LJFOLD(GT any any) | ||
1678 | LJFOLD(ULT any any) | ||
1679 | LJFOLD(UGE any any) | ||
1680 | LJFOLD(ULE any any) | ||
1681 | LJFOLD(UGT any any) | ||
1682 | LJFOLDF(comm_comp) | ||
1683 | { | ||
1684 | /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */ | ||
1685 | if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) | ||
1686 | return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1); | ||
1687 | if (fins->op1 < fins->op2) { /* Move lower ref to the right. */ | ||
1688 | IRRef1 tmp = fins->op1; | ||
1689 | fins->op1 = fins->op2; | ||
1690 | fins->op2 = tmp; | ||
1691 | fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */ | ||
1692 | return RETRYFOLD; | ||
1693 | } | ||
1694 | return NEXTFOLD; | ||
1695 | } | ||
1696 | |||
1697 | LJFOLD(BAND any any) | ||
1698 | LJFOLD(BOR any any) | ||
1699 | LJFOLD(MIN any any) | ||
1700 | LJFOLD(MAX any any) | ||
1701 | LJFOLDF(comm_dup) | ||
1702 | { | ||
1703 | if (fins->op1 == fins->op2) /* x o x ==> x */ | ||
1704 | return LEFTFOLD; | ||
1705 | return fold_comm_swap(J); | ||
1706 | } | ||
1707 | |||
1708 | LJFOLD(BXOR any any) | ||
1709 | LJFOLDF(comm_bxor) | ||
1710 | { | ||
1711 | if (fins->op1 == fins->op2) /* i xor i ==> 0 */ | ||
1712 | return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0); | ||
1713 | return fold_comm_swap(J); | ||
1714 | } | ||
1715 | |||
1716 | /* -- Simplification of compound expressions ------------------------------ */ | ||
1717 | |||
1718 | static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p) | ||
1719 | { | ||
1720 | int32_t k; | ||
1721 | switch (irt_type(ir->t)) { | ||
1722 | case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p); | ||
1723 | case IRT_I8: k = (int32_t)*(int8_t *)p; break; | ||
1724 | case IRT_U8: k = (int32_t)*(uint8_t *)p; break; | ||
1725 | case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break; | ||
1726 | case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break; | ||
1727 | case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break; | ||
1728 | case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p); | ||
1729 | default: return 0; | ||
1730 | } | ||
1731 | return lj_ir_kint(J, k); | ||
1732 | } | ||
1733 | |||
1734 | /* Turn: string.sub(str, a, b) == kstr | ||
1735 | ** into: string.byte(str, a) == string.byte(kstr, 1) etc. | ||
1736 | ** Note: this creates unaligned XLOADs on x86/x64. | ||
1737 | */ | ||
1738 | LJFOLD(EQ SNEW KGC) | ||
1739 | LJFOLD(NE SNEW KGC) | ||
1740 | LJFOLDF(merge_eqne_snew_kgc) | ||
1741 | { | ||
1742 | GCstr *kstr = ir_kstr(fright); | ||
1743 | int32_t len = (int32_t)kstr->len; | ||
1744 | lua_assert(irt_isstr(fins->t)); | ||
1745 | |||
1746 | #if LJ_TARGET_X86ORX64 | ||
1747 | #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */ | ||
1748 | #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */ | ||
1749 | #else | ||
1750 | #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */ | ||
1751 | #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */ | ||
1752 | #endif | ||
1753 | |||
1754 | if (len <= FOLD_SNEW_MAX_LEN) { | ||
1755 | IROp op = (IROp)fins->o; | ||
1756 | IRRef strref = fleft->op1; | ||
1757 | lua_assert(IR(strref)->o == IR_STRREF); | ||
1758 | if (op == IR_EQ) { | ||
1759 | emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len)); | ||
1760 | /* Caveat: fins/fleft/fright is no longer valid after emitir. */ | ||
1761 | } else { | ||
1762 | /* NE is not expanded since this would need an OR of two conds. */ | ||
1763 | if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */ | ||
1764 | return NEXTFOLD; | ||
1765 | if (IR(fleft->op2)->i != len) | ||
1766 | return DROPFOLD; | ||
1767 | } | ||
1768 | if (len > 0) { | ||
1769 | /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */ | ||
1770 | uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) : | ||
1771 | len == 2 ? IRT(IR_XLOAD, IRT_U16) : | ||
1772 | IRTI(IR_XLOAD)); | ||
1773 | TRef tmp = emitir(ot, strref, | ||
1774 | IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0)); | ||
1775 | TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr)); | ||
1776 | if (len == 3) | ||
1777 | tmp = emitir(IRTI(IR_BAND), tmp, | ||
1778 | lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00))); | ||
1779 | fins->op1 = (IRRef1)tmp; | ||
1780 | fins->op2 = (IRRef1)val; | ||
1781 | fins->ot = (IROpT)IRTGI(op); | ||
1782 | return RETRYFOLD; | ||
1783 | } else { | ||
1784 | return DROPFOLD; | ||
1785 | } | ||
1786 | } | ||
1787 | return NEXTFOLD; | ||
1788 | } | ||
1789 | |||
1790 | /* -- Loads --------------------------------------------------------------- */ | ||
1791 | |||
1792 | /* Loads cannot be folded or passed on to CSE in general. | ||
1793 | ** Alias analysis is needed to check for forwarding opportunities. | ||
1794 | ** | ||
1795 | ** Caveat: *all* loads must be listed here or they end up at CSE! | ||
1796 | */ | ||
1797 | |||
1798 | LJFOLD(ALOAD any) | ||
1799 | LJFOLDX(lj_opt_fwd_aload) | ||
1800 | |||
1801 | /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */ | ||
1802 | LJFOLD(HLOAD KKPTR) | ||
1803 | LJFOLDF(kfold_hload_kkptr) | ||
1804 | { | ||
1805 | UNUSED(J); | ||
1806 | lua_assert(ir_kptr(fleft) == niltvg(J2G(J))); | ||
1807 | return TREF_NIL; | ||
1808 | } | ||
1809 | |||
1810 | LJFOLD(HLOAD any) | ||
1811 | LJFOLDX(lj_opt_fwd_hload) | ||
1812 | |||
1813 | LJFOLD(ULOAD any) | ||
1814 | LJFOLDX(lj_opt_fwd_uload) | ||
1815 | |||
1816 | LJFOLD(CALLL any IRCALL_lj_tab_len) | ||
1817 | LJFOLDX(lj_opt_fwd_tab_len) | ||
1818 | |||
1819 | /* Upvalue refs are really loads, but there are no corresponding stores. | ||
1820 | ** So CSE is ok for them, except for UREFO across a GC step (see below). | ||
1821 | ** If the referenced function is const, its upvalue addresses are const, too. | ||
1822 | ** This can be used to improve CSE by looking for the same address, | ||
1823 | ** even if the upvalues originate from a different function. | ||
1824 | */ | ||
1825 | LJFOLD(UREFO KGC any) | ||
1826 | LJFOLD(UREFC KGC any) | ||
1827 | LJFOLDF(cse_uref) | ||
1828 | { | ||
1829 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
1830 | IRRef ref = J->chain[fins->o]; | ||
1831 | GCfunc *fn = ir_kfunc(fleft); | ||
1832 | GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)])); | ||
1833 | while (ref > 0) { | ||
1834 | IRIns *ir = IR(ref); | ||
1835 | if (irref_isk(ir->op1)) { | ||
1836 | GCfunc *fn2 = ir_kfunc(IR(ir->op1)); | ||
1837 | if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) { | ||
1838 | if (fins->o == IR_UREFO && gcstep_barrier(J, ref)) | ||
1839 | break; | ||
1840 | return ref; | ||
1841 | } | ||
1842 | } | ||
1843 | ref = ir->prev; | ||
1844 | } | ||
1845 | } | ||
1846 | return EMITFOLD; | ||
1847 | } | ||
1848 | |||
1849 | LJFOLD(HREF TNEW any) | ||
1850 | LJFOLDF(fwd_href_tnew) | ||
1851 | { | ||
1852 | if (lj_opt_fwd_href_nokey(J)) | ||
1853 | return lj_ir_kkptr(J, niltvg(J2G(J))); | ||
1854 | return NEXTFOLD; | ||
1855 | } | ||
1856 | |||
1857 | LJFOLD(HREF TDUP KPRI) | ||
1858 | LJFOLD(HREF TDUP KGC) | ||
1859 | LJFOLD(HREF TDUP KNUM) | ||
1860 | LJFOLDF(fwd_href_tdup) | ||
1861 | { | ||
1862 | TValue keyv; | ||
1863 | lj_ir_kvalue(J->L, &keyv, fright); | ||
1864 | if (lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv) == niltvg(J2G(J)) && | ||
1865 | lj_opt_fwd_href_nokey(J)) | ||
1866 | return lj_ir_kkptr(J, niltvg(J2G(J))); | ||
1867 | return NEXTFOLD; | ||
1868 | } | ||
1869 | |||
1870 | /* We can safely FOLD/CSE array/hash refs and field loads, since there | ||
1871 | ** are no corresponding stores. But we need to check for any NEWREF with | ||
1872 | ** an aliased table, as it may invalidate all of the pointers and fields. | ||
1873 | ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on | ||
1874 | ** FLOADs. And NEWREF itself is treated like a store (see below). | ||
1875 | */ | ||
1876 | LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE) | ||
1877 | LJFOLDF(fload_tab_tnew_asize) | ||
1878 | { | ||
1879 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1)) | ||
1880 | return INTFOLD(fleft->op1); | ||
1881 | return NEXTFOLD; | ||
1882 | } | ||
1883 | |||
1884 | LJFOLD(FLOAD TNEW IRFL_TAB_HMASK) | ||
1885 | LJFOLDF(fload_tab_tnew_hmask) | ||
1886 | { | ||
1887 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1)) | ||
1888 | return INTFOLD((1 << fleft->op2)-1); | ||
1889 | return NEXTFOLD; | ||
1890 | } | ||
1891 | |||
1892 | LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE) | ||
1893 | LJFOLDF(fload_tab_tdup_asize) | ||
1894 | { | ||
1895 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1)) | ||
1896 | return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize); | ||
1897 | return NEXTFOLD; | ||
1898 | } | ||
1899 | |||
1900 | LJFOLD(FLOAD TDUP IRFL_TAB_HMASK) | ||
1901 | LJFOLDF(fload_tab_tdup_hmask) | ||
1902 | { | ||
1903 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1)) | ||
1904 | return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask); | ||
1905 | return NEXTFOLD; | ||
1906 | } | ||
1907 | |||
1908 | LJFOLD(HREF any any) | ||
1909 | LJFOLD(FLOAD any IRFL_TAB_ARRAY) | ||
1910 | LJFOLD(FLOAD any IRFL_TAB_NODE) | ||
1911 | LJFOLD(FLOAD any IRFL_TAB_ASIZE) | ||
1912 | LJFOLD(FLOAD any IRFL_TAB_HMASK) | ||
1913 | LJFOLDF(fload_tab_ah) | ||
1914 | { | ||
1915 | TRef tr = lj_opt_cse(J); | ||
1916 | return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD; | ||
1917 | } | ||
1918 | |||
1919 | /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */ | ||
1920 | LJFOLD(FLOAD KGC IRFL_STR_LEN) | ||
1921 | LJFOLDF(fload_str_len_kgc) | ||
1922 | { | ||
1923 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
1924 | return INTFOLD((int32_t)ir_kstr(fleft)->len); | ||
1925 | return NEXTFOLD; | ||
1926 | } | ||
1927 | |||
1928 | LJFOLD(FLOAD SNEW IRFL_STR_LEN) | ||
1929 | LJFOLDF(fload_str_len_snew) | ||
1930 | { | ||
1931 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
1932 | PHIBARRIER(fleft); | ||
1933 | return fleft->op2; | ||
1934 | } | ||
1935 | return NEXTFOLD; | ||
1936 | } | ||
1937 | |||
1938 | /* The C type ID of cdata objects is immutable. */ | ||
1939 | LJFOLD(FLOAD KGC IRFL_CDATA_TYPEID) | ||
1940 | LJFOLDF(fload_cdata_typeid_kgc) | ||
1941 | { | ||
1942 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
1943 | return INTFOLD((int32_t)ir_kcdata(fleft)->typeid); | ||
1944 | return NEXTFOLD; | ||
1945 | } | ||
1946 | |||
1947 | /* Get the contents of immutable cdata objects. */ | ||
1948 | LJFOLD(FLOAD KGC IRFL_CDATA_PTR) | ||
1949 | LJFOLD(FLOAD KGC IRFL_CDATA_INT64) | ||
1950 | LJFOLDF(fload_cdata_int64_kgc) | ||
1951 | { | ||
1952 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) { | ||
1953 | void *p = cdataptr(ir_kcdata(fleft)); | ||
1954 | if (irt_is64(fins->t)) | ||
1955 | return INT64FOLD(*(uint64_t *)p); | ||
1956 | else | ||
1957 | return INTFOLD(*(int32_t *)p); | ||
1958 | } | ||
1959 | return NEXTFOLD; | ||
1960 | } | ||
1961 | |||
1962 | LJFOLD(FLOAD CNEW IRFL_CDATA_TYPEID) | ||
1963 | LJFOLD(FLOAD CNEWI IRFL_CDATA_TYPEID) | ||
1964 | LJFOLDF(fload_cdata_typeid_cnew) | ||
1965 | { | ||
1966 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
1967 | return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */ | ||
1968 | return NEXTFOLD; | ||
1969 | } | ||
1970 | |||
1971 | /* Pointer and int64 cdata objects are immutable. */ | ||
1972 | LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR) | ||
1973 | LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64) | ||
1974 | LJFOLDF(fload_cdata_ptr_int64_cnew) | ||
1975 | { | ||
1976 | if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) | ||
1977 | return fleft->op2; /* Fold even across PHI to avoid allocations. */ | ||
1978 | return NEXTFOLD; | ||
1979 | } | ||
1980 | |||
1981 | LJFOLD(FLOAD any IRFL_STR_LEN) | ||
1982 | LJFOLD(FLOAD any IRFL_CDATA_TYPEID) | ||
1983 | LJFOLD(FLOAD any IRFL_CDATA_PTR) | ||
1984 | LJFOLD(FLOAD any IRFL_CDATA_INT64) | ||
1985 | LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */ | ||
1986 | LJFOLDX(lj_opt_cse) | ||
1987 | |||
1988 | /* All other field loads need alias analysis. */ | ||
1989 | LJFOLD(FLOAD any any) | ||
1990 | LJFOLDX(lj_opt_fwd_fload) | ||
1991 | |||
1992 | /* This is for LOOP only. Recording handles SLOADs internally. */ | ||
1993 | LJFOLD(SLOAD any any) | ||
1994 | LJFOLDF(fwd_sload) | ||
1995 | { | ||
1996 | if ((fins->op2 & IRSLOAD_FRAME)) { | ||
1997 | TRef tr = lj_opt_cse(J); | ||
1998 | return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr; | ||
1999 | } else { | ||
2000 | lua_assert(J->slot[fins->op1] != 0); | ||
2001 | return J->slot[fins->op1]; | ||
2002 | } | ||
2003 | } | ||
2004 | |||
2005 | /* Only fold for KKPTR. The pointer _and_ the contents must be const. */ | ||
2006 | LJFOLD(XLOAD KKPTR any) | ||
2007 | LJFOLDF(xload_kptr) | ||
2008 | { | ||
2009 | TRef tr = kfold_xload(J, fins, ir_kptr(fleft)); | ||
2010 | return tr ? tr : NEXTFOLD; | ||
2011 | } | ||
2012 | |||
2013 | LJFOLD(XLOAD any any) | ||
2014 | LJFOLDX(lj_opt_fwd_xload) | ||
2015 | |||
2016 | /* -- Write barriers ------------------------------------------------------ */ | ||
2017 | |||
2018 | /* Write barriers are amenable to CSE, but not across any incremental | ||
2019 | ** GC steps. | ||
2020 | ** | ||
2021 | ** The same logic applies to open upvalue references, because a stack | ||
2022 | ** may be resized during a GC step (not the current stack, but maybe that | ||
2023 | ** of a coroutine). | ||
2024 | */ | ||
2025 | LJFOLD(TBAR any) | ||
2026 | LJFOLD(OBAR any any) | ||
2027 | LJFOLD(UREFO any any) | ||
2028 | LJFOLDF(barrier_tab) | ||
2029 | { | ||
2030 | TRef tr = lj_opt_cse(J); | ||
2031 | if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */ | ||
2032 | return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */ | ||
2033 | return tr; | ||
2034 | } | ||
2035 | |||
2036 | LJFOLD(TBAR TNEW) | ||
2037 | LJFOLD(TBAR TDUP) | ||
2038 | LJFOLDF(barrier_tnew_tdup) | ||
2039 | { | ||
2040 | /* New tables are always white and never need a barrier. */ | ||
2041 | if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */ | ||
2042 | return NEXTFOLD; | ||
2043 | return DROPFOLD; | ||
2044 | } | ||
2045 | |||
2046 | /* -- Stores and allocations ---------------------------------------------- */ | ||
2047 | |||
2048 | /* Stores and allocations cannot be folded or passed on to CSE in general. | ||
2049 | ** But some stores can be eliminated with dead-store elimination (DSE). | ||
2050 | ** | ||
2051 | ** Caveat: *all* stores and allocs must be listed here or they end up at CSE! | ||
2052 | */ | ||
2053 | |||
2054 | LJFOLD(ASTORE any any) | ||
2055 | LJFOLD(HSTORE any any) | ||
2056 | LJFOLDX(lj_opt_dse_ahstore) | ||
2057 | |||
2058 | LJFOLD(USTORE any any) | ||
2059 | LJFOLDX(lj_opt_dse_ustore) | ||
2060 | |||
2061 | LJFOLD(FSTORE any any) | ||
2062 | LJFOLDX(lj_opt_dse_fstore) | ||
2063 | |||
2064 | LJFOLD(XSTORE any any) | ||
2065 | LJFOLDX(lj_opt_dse_xstore) | ||
2066 | |||
2067 | LJFOLD(NEWREF any any) /* Treated like a store. */ | ||
2068 | LJFOLD(CALLS any any) | ||
2069 | LJFOLD(CALLL any any) /* Safeguard fallback. */ | ||
2070 | LJFOLD(CALLXS any any) | ||
2071 | LJFOLD(XBAR) | ||
2072 | LJFOLD(RETF any any) /* Modifies BASE. */ | ||
2073 | LJFOLD(TNEW any any) | ||
2074 | LJFOLD(TDUP any) | ||
2075 | LJFOLD(CNEW any any) | ||
2076 | LJFOLD(XSNEW any any) | ||
2077 | LJFOLDX(lj_ir_emit) | ||
2078 | |||
2079 | /* ------------------------------------------------------------------------ */ | ||
2080 | |||
2081 | /* Every entry in the generated hash table is a 32 bit pattern: | ||
2082 | ** | ||
2083 | ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr | ||
2084 | ** | ||
2085 | ** xxxxxxxx = 8 bit index into fold function table | ||
2086 | ** iiiiiii = 7 bit folded instruction opcode | ||
2087 | ** lllllll = 7 bit left instruction opcode | ||
2088 | ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field | ||
2089 | */ | ||
2090 | |||
2091 | #include "lj_folddef.h" | ||
2092 | |||
2093 | /* ------------------------------------------------------------------------ */ | ||
2094 | |||
2095 | /* Fold IR instruction. */ | ||
2096 | TRef LJ_FASTCALL lj_opt_fold(jit_State *J) | ||
2097 | { | ||
2098 | uint32_t key, any; | ||
2099 | IRRef ref; | ||
2100 | |||
2101 | if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) { | ||
2102 | lua_assert(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) | | ||
2103 | JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT); | ||
2104 | /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */ | ||
2105 | if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N) | ||
2106 | return lj_opt_cse(J); | ||
2107 | |||
2108 | /* Forwarding or CSE disabled? Emit raw IR for loads, except for SLOAD. */ | ||
2109 | if ((J->flags & (JIT_F_OPT_FWD|JIT_F_OPT_CSE)) != | ||
2110 | (JIT_F_OPT_FWD|JIT_F_OPT_CSE) && | ||
2111 | irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD) | ||
2112 | return lj_ir_emit(J); | ||
2113 | |||
2114 | /* DSE disabled? Emit raw IR for stores. */ | ||
2115 | if (!(J->flags & JIT_F_OPT_DSE) && irm_kind(lj_ir_mode[fins->o]) == IRM_S) | ||
2116 | return lj_ir_emit(J); | ||
2117 | } | ||
2118 | |||
2119 | /* Fold engine start/retry point. */ | ||
2120 | retry: | ||
2121 | /* Construct key from opcode and operand opcodes (unless literal/none). */ | ||
2122 | key = ((uint32_t)fins->o << 17); | ||
2123 | if (fins->op1 >= J->cur.nk) { | ||
2124 | key += (uint32_t)IR(fins->op1)->o << 10; | ||
2125 | *fleft = *IR(fins->op1); | ||
2126 | } | ||
2127 | if (fins->op2 >= J->cur.nk) { | ||
2128 | key += (uint32_t)IR(fins->op2)->o; | ||
2129 | *fright = *IR(fins->op2); | ||
2130 | } else { | ||
2131 | key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */ | ||
2132 | } | ||
2133 | |||
2134 | /* Check for a match in order from most specific to least specific. */ | ||
2135 | any = 0; | ||
2136 | for (;;) { | ||
2137 | uint32_t k = key | (any & 0x1ffff); | ||
2138 | uint32_t h = fold_hashkey(k); | ||
2139 | uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */ | ||
2140 | if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) { | ||
2141 | ref = (IRRef)tref_ref(fold_func[fh >> 24](J)); | ||
2142 | if (ref != NEXTFOLD) | ||
2143 | break; | ||
2144 | } | ||
2145 | if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */ | ||
2146 | return lj_opt_cse(J); | ||
2147 | any = (any | (any >> 10)) ^ 0xffc00; | ||
2148 | } | ||
2149 | |||
2150 | /* Return value processing, ordered by frequency. */ | ||
2151 | if (LJ_LIKELY(ref >= MAX_FOLD)) | ||
2152 | return TREF(ref, irt_t(IR(ref)->t)); | ||
2153 | if (ref == RETRYFOLD) | ||
2154 | goto retry; | ||
2155 | if (ref == KINTFOLD) | ||
2156 | return lj_ir_kint(J, fins->i); | ||
2157 | if (ref == FAILFOLD) | ||
2158 | lj_trace_err(J, LJ_TRERR_GFAIL); | ||
2159 | lua_assert(ref == DROPFOLD); | ||
2160 | return REF_DROP; | ||
2161 | } | ||
2162 | |||
2163 | /* -- Common-Subexpression Elimination ------------------------------------ */ | ||
2164 | |||
2165 | /* CSE an IR instruction. This is very fast due to the skip-list chains. */ | ||
2166 | TRef LJ_FASTCALL lj_opt_cse(jit_State *J) | ||
2167 | { | ||
2168 | /* Avoid narrow to wide store-to-load forwarding stall */ | ||
2169 | IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); | ||
2170 | IROp op = fins->o; | ||
2171 | if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) { | ||
2172 | /* Limited search for same operands in per-opcode chain. */ | ||
2173 | IRRef ref = J->chain[op]; | ||
2174 | IRRef lim = fins->op1; | ||
2175 | if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */ | ||
2176 | while (ref > lim) { | ||
2177 | if (IR(ref)->op12 == op12) | ||
2178 | return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */ | ||
2179 | ref = IR(ref)->prev; | ||
2180 | } | ||
2181 | } | ||
2182 | /* Otherwise emit IR (inlined for speed). */ | ||
2183 | { | ||
2184 | IRRef ref = lj_ir_nextins(J); | ||
2185 | IRIns *ir = IR(ref); | ||
2186 | ir->prev = J->chain[op]; | ||
2187 | ir->op12 = op12; | ||
2188 | J->chain[op] = (IRRef1)ref; | ||
2189 | ir->o = fins->o; | ||
2190 | J->guardemit.irt |= fins->t.irt; | ||
2191 | return TREF(ref, irt_t((ir->t = fins->t))); | ||
2192 | } | ||
2193 | } | ||
2194 | |||
2195 | /* CSE with explicit search limit. */ | ||
2196 | TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim) | ||
2197 | { | ||
2198 | IRRef ref = J->chain[fins->o]; | ||
2199 | IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16); | ||
2200 | while (ref > lim) { | ||
2201 | if (IR(ref)->op12 == op12) | ||
2202 | return ref; | ||
2203 | ref = IR(ref)->prev; | ||
2204 | } | ||
2205 | return lj_ir_emit(J); | ||
2206 | } | ||
2207 | |||
2208 | /* ------------------------------------------------------------------------ */ | ||
2209 | |||
2210 | #undef IR | ||
2211 | #undef fins | ||
2212 | #undef fleft | ||
2213 | #undef fright | ||
2214 | #undef knumleft | ||
2215 | #undef knumright | ||
2216 | #undef emitir | ||
2217 | |||
2218 | #endif | ||