aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/LuaJIT-1.1.7/src/ljit_x86.dasc')
-rw-r--r--libraries/LuaJIT-1.1.7/src/ljit_x86.dasc2457
1 files changed, 0 insertions, 2457 deletions
diff --git a/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc b/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc
deleted file mode 100644
index f7be91e..0000000
--- a/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc
+++ /dev/null
@@ -1,2457 +0,0 @@
1/*
2** Bytecode to machine code translation for x86 CPUs.
3** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4*/
5
6|// Include common definitions and macros.
7|.include ljit_x86.dash
8|
9|// Place actionlist and globals here at the top of the file.
10|.actionlist jit_actionlist
11|.globals JSUB_
12
13/* ------------------------------------------------------------------------ */
14
15/* Arch string. */
16const char luaJIT_arch[] = "x86";
17
18/* Forward declarations for C functions called from jsubs. */
19static void jit_hookins(lua_State *L, const Instruction *newpc);
20static void jit_gettable_fb(lua_State *L, Table *t, StkId dest);
21static void jit_settable_fb(lua_State *L, Table *t, StkId val);
22
23/* ------------------------------------------------------------------------ */
24
25/* Detect CPU features and set JIT flags. */
26static int jit_cpudetect(jit_State *J)
27{
28 void *mcode;
29 size_t sz;
30 int status;
31 /* Some of the jsubs need the flags. So compile this separately. */
32 unsigned int feature;
33 dasm_setup(Dst, jit_actionlist);
34 | // Check for CPUID support first.
35 | pushfd
36 | pop edx
37 | mov ecx, edx
38 | xor edx, 0x00200000 // Toggle ID bit in flags.
39 | push edx
40 | popfd
41 | pushfd
42 | pop edx
43 | xor eax, eax // Zero means no features supported.
44 | cmp ecx, edx
45 | jz >1 // No ID toggle means no CPUID support.
46 |
47 | inc eax // CPUID function 1.
48 | push ebx // Callee-save ebx modified by CPUID.
49 | cpuid
50 | pop ebx
51 | mov eax, edx // Return feature support bits.
52 |1:
53 | ret
54 (void)dasm_checkstep(Dst, DASM_SECTION_CODE);
55 status = luaJIT_link(J, &mcode, &sz);
56 if (status != JIT_S_OK)
57 return status;
58 /* Check feature bits. See the Intel/AMD manuals for the bit definitions. */
59 feature = ((unsigned int (*)(void))mcode)();
60 if (feature & (1<<15)) J->flags |= JIT_F_CPU_CMOV;
61 if (feature & (1<<26)) J->flags |= JIT_F_CPU_SSE2;
62 luaJIT_freemcode(J, mcode, sz); /* We don't need this code anymore. */
63 return JIT_S_OK;
64}
65
66/* Check some assumptions. Should compile to nop. */
67static int jit_consistency_check(jit_State *J)
68{
69 do {
70 /* Force a compiler error for inconsistent structure sizes. */
71 /* Check LUA_TVALUE_ALIGN in luaconf.h, too. */
72 ||int check_TVALUE_SIZE_in_ljit_x86_dash[1+TVALUE_SIZE-sizeof(TValue)];
73 ||int check_TVALUE_SIZE_in_ljit_x86_dash_[1+sizeof(TValue)-TVALUE_SIZE];
74 ((void)check_TVALUE_SIZE_in_ljit_x86_dash[0]);
75 ((void)check_TVALUE_SIZE_in_ljit_x86_dash_[0]);
76 if (LUA_TNIL != 0 || LUA_TBOOLEAN != 1 || PCRLUA != 0) break;
77 if ((int)&(((Node *)0)->i_val) != (int)&(((StkId)0)->value)) break;
78 return JIT_S_OK;
79 } while (0);
80 J->dasmstatus = 999999999; /* Recognizable error. */
81 return JIT_S_COMPILER_ERROR;
82}
83
84/* Compile JIT subroutines (once). */
85static int jit_compile_jsub(jit_State *J)
86{
87 int status = jit_consistency_check(J);
88 if (status != JIT_S_OK) return status;
89 status = jit_cpudetect(J);
90 if (status != JIT_S_OK) return status;
91 dasm_setup(Dst, jit_actionlist);
92 |// Macros to reorder and combine JIT subroutine definitions.
93 |.macro .jsub, name
94 |.capture JSUB // Add the entry point.
95 ||//-----------------------------------------------------------------------
96 ||//->name:
97 | .align 16
98 |->name:
99 |.endmacro
100 |.macro .endjsub; .endcapture; .endmacro
101 |.macro .dumpjsub; .dumpcapture JSUB; .endmacro
102 |
103 |.code
104 |//-----------------------------------------------------------------------
105 | .align 16
106 | // Must be the first JSUB defined or used.
107 |->STACKPTR: // Get stack pointer (for jit.util.*).
108 | lea eax, [esp+aword*1] // But adjust for the return address.
109 | ret
110 |
111 |//-----------------------------------------------------------------------
112 | .align 16
113 |->GATE_LJ: // Lua -> JIT gate. (L, func, nresults)
114 | push ebp
115 | mov ebp, esp
116 | sub esp, LJFRAME_OFFSET
117 | mov SAVER1, BASE
118 | mov BASE, CARG2 // func
119 | mov CARG2, L // Arg used as savereg. Avoids aword*8 stack frame.
120 | mov L, CARG1 // L
121 | mov SAVER2, TOP
122 | mov TOP, L->top
123 | mov LCL, BASE->value
124 | mov CI, L->ci
125 | // Prevent stackless yields. No limit check -- this is not a real C call.
126 | inc word L->nCcalls // short
127 |
128 | call aword LCL->jit_gate // Call the compiled code.
129 |
130 | mov CI, L->ci
131 | mov L->top, TOP // Only correct for LUA_MULTRET.
132 | mov edx, CI->savedpc
133 | mov eax, CARG3 // nresults
134 | mov L->savedpc, edx // L->savedpc = CI->savedpc
135 | mov edx, CI->base
136 | test eax, eax
137 | mov L->base, edx // L->base = CI->base
138 | js >2 // Skip for nresults == LUA_MULTRET.
139 |
140 | TValuemul eax
141 | add BASE, eax
142 | xor ecx, ecx
143 | mov L->top, BASE // L->top = &func[nresults]
144 |1: // No initial check. May use EXTRA_STACK (once).
145 | mov TOP->tt, ecx // Clear unset stack slots.
146 | add TOP, #TOP
147 | cmp TOP, BASE
148 | jb <1
149 |
150 |2:
151 | dec word L->nCcalls // short
152 | mov eax, PCRC
153 | mov TOP, SAVER2
154 | mov BASE, SAVER1
155 | mov L, CARG2
156 | mov esp, ebp
157 | pop ebp
158 | ret
159 |
160 |//-----------------------------------------------------------------------
161 | .align 16
162 |->GATE_JL: // JIT -> Lua callgate.
163 | mov PROTO:edx, LCL->p
164 | cmp dword PROTO:edx->jit_status, JIT_S_OK
165 | jne >1 // Already compiled?
166 |
167 | // Yes, copy callgate to closure (so GATE_JL is not called again).
168 | mov edx, PROTO:edx->jit_mcode
169 | mov LCL->jit_gate, edx
170 | jmp edx // Chain to compiled code.
171 |
172 |1: // Let luaD_precall do the hard work: compile & run or fallback.
173 | sub esp, FRAME_OFFSET
174 | mov eax, CI->savedpc
175 | mov L->ci, CI // May not be in sync for tailcalls.
176 | mov L->top, TOP
177 | mov ARG3, -1 // LUA_MULTRET
178 | mov L->savedpc, eax // luaD_precall expects it there.
179 | mov ARG2, BASE
180 | sub BASE, L->stack // Preserve old BASE (= func).
181 | mov ARG1, L
182 | call &luaD_precall // luaD_precall(L, func, nresults)
183 | test eax,eax // Assumes: PCRLUA == 0
184 | jnz >2 // PCRC? PCRYIELD cannot happen.
185 |
186 | // Returned PCRLUA: need to call the bytecode interpreter.
187 | call &luaV_execute, L, 1
188 | // Indirect yield (L->status == LUA_YIELD) cannot happen.
189 |
190 |2: // Returned PCRC: compile & run done. Frame is already unwound.
191 | add esp, FRAME_OFFSET
192 | add BASE, L->stack // Restore stack-relative pointers BASE and TOP.
193 | mov TOP, L->top
194 | ret
195 |
196 |//-----------------------------------------------------------------------
197 | .align 16
198 |->GATE_JC: // JIT -> C callgate.
199 | lea eax, TOP[LUA_MINSTACK]
200 | sub esp, FRAME_OFFSET
201 | cmp eax, L->stack_last
202 | jae ->GROW_STACK // Stack overflow?
203 | cmp CI, L->end_ci
204 | lea CI, CI[1]
205 | je ->GROW_CI // CI overflow?
206 | mov L->ci, CI
207 | mov CI->func, BASE
208 | mov CI->top, eax
209 | mov CCLOSURE:edx, BASE->value
210 | add BASE, #BASE
211 | mov L->top, TOP
212 | mov L->base, BASE
213 | mov CI->base, BASE
214 | // ci->nresults is not set because we don't use luaD_poscall().
215 |
216 |->GATE_JC_PATCH: // Patch mark for jmp to GATE_JC_DEBUG.
217 |
218 | call aword CCLOSURE:edx->f, L // Call the C function.
219 |
220 |2: // Label used below!
221 | add esp, FRAME_OFFSET
222 | mov CI, L->ci
223 | TValuemul eax // eax = nresults*sizeof(TValue)
224 | mov TOP, CI->func
225 | jz >4 // Skip loop if nresults == 0.
226 | // Yield (-1) cannot happen.
227 | mov BASE, L->top
228 | mov edx, BASE
229 | sub BASE, eax // BASE = &L->top[-nresults]
230 |3: // Relocate [L->top-nresults, L->top) -> [ci->func, ci->func+nresults)
231 | mov eax, [BASE]
232 | add BASE, aword*1
233 | mov [TOP], eax
234 | add TOP, aword*1
235 | cmp BASE, edx
236 | jb <3
237 |
238 |4:
239 | mov BASE, CI->func
240 | sub CI, #CI
241 | mov L->ci, CI
242 | ret
243 |
244 |//-----------------------------------------------------------------------
245 | nop; nop; nop; nop; nop; nop // Save area. See DEBUGPATCH_SIZE.
246 | .align 16
247 |->GATE_JC_DEBUG: // JIT -> C callgate for debugging.
248 | test byte L->hookmask, LUA_MASKCALL // Need to call hook?
249 | jnz >7
250 |6:
251 | call aword CCLOSURE:edx->f, L // Call the C function.
252 |
253 | test byte L->hookmask, LUA_MASKRET // Need to call hook?
254 | jz <2
255 |
256 | // Return hook. TODO: LUA_HOOKTAILRET is not called since tailcalls == 0.
257 | mov BASE, eax // BASE (ebx) is callee-save.
258 | call &luaD_callhook, L, LUA_HOOKRET, -1
259 | mov eax, BASE
260 | jmp <2
261 |
262 |7: // Call hook.
263 | mov BASE, CCLOSURE:edx // BASE (ebx) is callee-save.
264 | call &luaD_callhook, L, LUA_HOOKCALL, -1
265 | mov CCLOSURE:edx, BASE
266 | jmp <6
267 |
268 |//-----------------------------------------------------------------------
269 | .align 16
270 |->GROW_STACK: // Grow stack. Jump from/to prologue.
271 | sub eax, TOP
272 | TValuediv eax // eax = (eax-TOP)/sizeof(TValue).
273 | mov L->top, TOP
274 | sub BASE, L->stack
275 | mov ARG3, CI
276 | call &luaD_growstack, L, eax
277 | mov CI, ARG3 // CI may not be in sync with L->ci.
278 | add BASE, L->stack // Restore stack-relative pointers.
279 | mov TOP, L->top
280 | mov LCL, BASE->value
281 | add esp, FRAME_OFFSET // Undo esp adjust of prologue/GATE_JC.
282 | jmp aword LCL->jit_gate // Retry prologue.
283 |
284 |//-----------------------------------------------------------------------
285 | .align 16
286 |->GROW_CI: // Grow CI. Jump from/to prologue.
287 | mov L->top, TOP // May throw LUA_ERRMEM, so save TOP.
288 | call &luaD_growCI, L
289 | lea CI, CINFO:eax[-1] // Undo ci++ (L->ci reset in prologue).
290 | mov LCL, BASE->value
291 | mov L->ci, CI
292 | add esp, FRAME_OFFSET // Undo esp adjust of prologue/GATE_JC.
293 | jmp aword LCL->jit_gate // Retry prologue.
294 |
295 |//-----------------------------------------------------------------------
296 |.dumpjsub // Dump all captured .jsub's.
297 |
298 |// Uncritical jsubs follow. No need to align them.
299 |//-----------------------------------------------------------------------
300 |->DEOPTIMIZE_CALLER: // Deoptimize calling instruction.
301 | pop edx
302 | jmp ->DEOPTIMIZE
303 |
304 |->DEOPTIMIZE_OPEN: // Deoptimize open instruction.
305 | mov L->top, TOP // Save TOP.
306 |
307 |->DEOPTIMIZE: // Deoptimize instruction.
308 | mov L->savedpc, edx // &J->nextins expected in edx.
309 | call &luaJIT_deoptimize, L
310 | mov BASE, L->base
311 | mov TOP, L->top // Restore TOP for open ins.
312 | jmp eax // Continue with new mcode addr.
313 |
314 | .align 16
315 |//-----------------------------------------------------------------------
316
317 (void)dasm_checkstep(Dst, DASM_SECTION_CODE);
318 status = luaJIT_link(J, &J->jsubmcode, &J->szjsubmcode);
319 if (status != JIT_S_OK)
320 return status;
321
322 /* Copy the callgates from the globals to the global state. */
323 G(J->L)->jit_gateLJ = (luaJIT_GateLJ)J->jsub[JSUB_GATE_LJ];
324 G(J->L)->jit_gateJL = (lua_CFunction)J->jsub[JSUB_GATE_JL];
325 G(J->L)->jit_gateJC = (lua_CFunction)J->jsub[JSUB_GATE_JC];
326 return JIT_S_OK;
327}
328
329/* Match with number of nops above. Avoid confusing the instruction decoder. */
330#define DEBUGPATCH_SIZE 6
331
332/* Notify backend that the debug mode may have changed. */
333void luaJIT_debugnotify(jit_State *J)
334{
335 unsigned char *patch = (unsigned char *)J->jsub[JSUB_GATE_JC_PATCH];
336 unsigned char *target = (unsigned char *)J->jsub[JSUB_GATE_JC_DEBUG];
337 /* Yep, this is self-modifying code -- don't tell anyone. */
338 if (patch[0] == 0xe9) { /* Debug patch is active. */
339 if (!(J->flags & JIT_F_DEBUG_CALL)) /* Deactivate it. */
340 memcpy(patch, target-DEBUGPATCH_SIZE, DEBUGPATCH_SIZE);
341 } else { /* Debug patch is inactive. */
342 if (J->flags & JIT_F_DEBUG_CALL) { /* Activate it. */
343 int rel = target-(patch+5);
344 memcpy(target-DEBUGPATCH_SIZE, patch, DEBUGPATCH_SIZE);
345 patch[0] = 0xe9; /* jmp */
346 memcpy(patch+1, &rel, 4); /* Relative address. */
347 memset(patch+5, 0x90, DEBUGPATCH_SIZE-5); /* nop */
348 }
349 }
350}
351
352/* Patch a jmp into existing mcode. */
353static void jit_patch_jmp(jit_State *J, void *mcode, void *to)
354{
355 unsigned char *patch = (unsigned char *)mcode;
356 int rel = ((unsigned char *)to)-(patch+5);
357 patch[0] = 0xe9; /* jmp */
358 memcpy((void *)(patch+1), &rel, 4); /* Relative addr. */
359}
360
361/* ------------------------------------------------------------------------ */
362
363/* Call line/count hook. */
364static void jit_hookins(lua_State *L, const Instruction *newpc)
365{
366 Proto *pt = ci_func(L->ci)->l.p;
367 int pc = luaJIT_findpc(pt, newpc); /* Sloooow with mcode addrs. */
368 const Instruction *savedpc = L->savedpc;
369 L->savedpc = pt->code + pc + 1;
370 if (L->hookmask > LUA_MASKLINE && L->hookcount == 0) {
371 resethookcount(L);
372 luaD_callhook(L, LUA_HOOKCOUNT, -1);
373 }
374 if (L->hookmask & LUA_MASKLINE) {
375 int newline = getline(pt, pc);
376 if (pc != 0) {
377 int oldpc = luaJIT_findpc(pt, savedpc);
378 if (!(pc <= oldpc || newline != getline(pt, oldpc))) return;
379 }
380 luaD_callhook(L, LUA_HOOKLINE, newline);
381 }
382}
383
384/* Insert hook check for each instruction in full debug mode. */
385static void jit_ins_debug(jit_State *J, int openop)
386{
387 if (openop) {
388 | mov L->top, TOP
389 }
390 |// TODO: Passing bytecode addrs would speed this up (but use more space).
391 | call ->HOOKINS
392
393 |.jsub HOOKINS
394 | test byte L->hookmask, LUA_MASKLINE|LUA_MASKCOUNT
395 | jz >2
396 | dec dword L->hookcount
397 | jz >1
398 | test byte L->hookmask, LUA_MASKLINE
399 | jz >2
400 |1:
401 | mov eax, [esp] // Current machine code address.
402 | sub esp, FRAME_OFFSET
403 | call &jit_hookins, L, eax
404 | add esp, FRAME_OFFSET
405 | mov BASE, L->base // Restore stack-relative pointers.
406 | mov TOP, L->top
407 |2:
408 | ret
409 |.endjsub
410}
411
412/* Called before every instruction. */
413static void jit_ins_start(jit_State *J)
414{
415 |// Always emit PC labels, even for dead code (but not for combined JMP).
416 |=>J->nextpc:
417}
418
419/* Chain to another instruction. */
420static void jit_ins_chainto(jit_State *J, int pc)
421{
422 | jmp =>pc
423}
424
425/* Set PC label. */
426static void jit_ins_setpc(jit_State *J, int pc, void *target)
427{
428 |.label =>pc, &target
429}
430
431/* Called after the last instruction has been encoded. */
432static void jit_ins_last(jit_State *J, int lastpc, int sizemfm)
433{
434 if (J->tflags & JIT_TF_USED_DEOPT) { /* Deopt section has been used? */
435 |.deopt
436 | jmp ->DEOPTIMIZE // Yes, need to add final jmp.
437 |.code
438 }
439 |=>lastpc+1: // Extra label at the end of .code.
440 |.tail
441 |=>lastpc+2: // And at the end of .deopt/.tail.
442 | .align word // Keep next section word aligned.
443 | .word 0xffff // Terminate mfm with JIT_MFM_STOP.
444 |.mfmap
445 | // <-- Deoptimization hints are inserted here.
446 | .space sizemfm // To be filled in with inverse mfm.
447 | .aword 0, 0 // Next mcode block pointer and size.
448 | // The previous two awords are only word, but not aword aligned.
449 | // Copying them is easier than aligning them and adjusting mfm handling.
450 |.code
451}
452
453/* Add a deoptimize target for the current instruction. */
454static void jit_deopt_target(jit_State *J, int nargs)
455{
456 |.define L_DEOPTLABEL, 9 // Local deopt label.
457 |.define L_DEOPTIMIZE, <9 // Local deopt target. Use after call.
458 |.define L_DEOPTIMIZEF, >9 // Local deopt target. Use before call.
459 if (nargs != -1) {
460 |// Alas, x86 doesn't have conditional calls. So branch to the .deopt
461 |// section to load J->nextins and jump to JSUB_DEOPTIMIZE.
462 |// Only a single jump is added at the end (if needed) and any
463 |// intervening code sequences are shadowed (lea trick).
464 |.deopt // Occupies 6 bytes in .deopt section.
465 | .byte 0x8d // Shadow mov with lea edi, [edx+ofs].
466 |L_DEOPTLABEL:
467 | mov edx, &J->nextins // Current instruction + 1.
468 |.code
469 J->tflags |= JIT_TF_USED_DEOPT;
470 } else {
471 |.tail // Occupies 10 bytes in .tail section.
472 |L_DEOPTLABEL:
473 | mov edx, &J->nextins
474 | jmp ->DEOPTIMIZE_OPEN // Open ins need to save TOP, too.
475 | // And TOP (edi) would be overwritten by the lea trick.
476 | // So checking for open ops later on wouldn't suffice. Sigh.
477 |.code
478 }
479}
480
481/* luaC_checkGC() inlined. Destroys caller-saves + TOP (edi). Uses label 7:. */
482/* Use this only at the _end_ of an instruction. */
483static void jit_checkGC(jit_State *J)
484{
485 | mov GL:ecx, L->l_G
486 | mov eax, GL:ecx->totalbytes // size_t
487 | mov TOP, >7
488 | cmp eax, GL:ecx->GCthreshold // size_t
489 | jae ->GCSTEP
490 |7:
491
492 |.jsub GCSTEP
493 | call &luaC_step, L
494 | mov BASE, L->base
495 | jmp TOP
496 |.endjsub
497}
498
499/* ------------------------------------------------------------------------ */
500
501|// JIT->JIT calling conventions:
502|//
503|// Register/Type | Call Setup | Prologue | Epilogue | Call Finish
504|// ===========================================================================
505|// eax | LCL | = BASE->value| | * | *
506|// ecx | CI | = L->ci | L->ci = ++CI | * | *
507|// edx | * | * | * | * | *
508|// ---------------------------------------------------------------------------
509|// esi | L | | | |
510|// ebx | BASE | += f | ++ | -- | -= f
511|// edi | TOP | += f+1+nargs | = BASE+maxst | = f+nresults | = BASE+maxst
512|// ---------------------------------------------------------------------------
513|// L->base | | = BASE | | = BASE
514|// L->top | | = TOP | | = TOP
515|// L->ci | | ++, -> = ... | -- |
516|// L->ci->savedpc| = &code[pc] | [ L-> = ] | |
517|// ---------------------------------------------------------------------------
518|// args + vars | | setnil | |
519|// results | | | move | setnil
520|// ---------------------------------------------------------------------------
521
522
523|// Include support for function inlining.
524|.include ljit_x86_inline.dash
525
526
527#ifdef LUA_COMPAT_VARARG
528static void jit_vararg_table(lua_State *L)
529{
530 Table *tab;
531 StkId base, func;
532 int i, num, numparams;
533 luaC_checkGC(L);
534 base = L->base;
535 func = L->ci->func;
536 numparams = clvalue(func)->l.p->numparams;
537 num = base - func - numparams - 1;
538 tab = luaH_new(L, num, 1);
539 for (i = 0; i < num; i++)
540 setobj2n(L, luaH_setnum(L, tab, i+1), base - num + i);
541 setnvalue(luaH_setstr(L, tab, luaS_newliteral(L, "n")), (lua_Number)num);
542 sethvalue(L, base + numparams, tab);
543}
544#endif
545
546/* Encode JIT function prologue. */
547static void jit_prologue(jit_State *J)
548{
549 Proto *pt = J->pt;
550 int numparams = pt->numparams;
551 int stacksize = pt->maxstacksize;
552
553 |// Note: the order of the following instructions has been carefully tuned.
554 | lea eax, TOP[stacksize]
555 | sub esp, FRAME_OFFSET
556 | cmp eax, L->stack_last
557 | jae ->GROW_STACK // Stack overflow?
558 | // This is a slight overallocation (BASE[1+stacksize] would be enough).
559 | // We duplicate luaD_precall() behaviour so we can use luaD_growstack().
560 | cmp CI, L->end_ci
561 | lea CI, CI[1]
562 | je ->GROW_CI // CI overflow?
563 | xor eax, eax // Assumes: LUA_TNIL == 0
564 | mov CI->func, BASE
565 | add BASE, #BASE
566 | mov L->ci, CI
567
568 if (numparams > 0) {
569 | lea edx, BASE[numparams]
570 | cmp TOP, edx // L->top >< L->base+numparams ?
571 }
572
573 if (!pt->is_vararg) { /* Fixarg function. */
574 /* Must cap L->top at L->base+numparams because 1st LOADNIL is omitted. */
575 if (numparams == 0) {
576 | mov TOP, BASE
577 } else if (J->flags & JIT_F_CPU_CMOV) {
578 | cmova TOP, edx
579 } else {
580 | jna >1
581 | mov TOP, edx
582 |1:
583 }
584 | lea edx, BASE[stacksize] // New ci->top.
585 | mov CI->tailcalls, eax // 0
586 | mov CI->top, edx
587 | mov L->top, edx
588 | mov L->base, BASE
589 | mov CI->base, BASE
590 } else { /* Vararg function. */
591 int i;
592 if (numparams > 0) {
593 |// If some fixargs are missing we need to clear them and
594 |// bump TOP to get a consistent frame layout for OP_VARARG.
595 | jb >5
596 |4:
597 |.tail
598 |5: // This is uncommon. So move it to .tail and use a loop.
599 | mov TOP->tt, eax
600 | add TOP, #TOP
601 | cmp TOP, edx
602 | jb <5
603 | jmp <4
604 |.code
605 }
606 | mov L->base, TOP // New base is after last arg.
607 | mov CI->base, TOP
608 | mov CI->tailcalls, eax // 0
609 for (i = 0; i < numparams; i++) { /* Move/clear fixargs. */
610 |// Inline this. Vararg funcs usually have very few fixargs.
611 | copyslot TOP[i], BASE[i], ecx, edx
612 | mov BASE[i].tt, eax // Clear old fixarg slot (help the GC).
613 }
614 if (numparams > 0) {
615 | mov CI, L->ci // Reload CI = ecx (used by move).
616 }
617 | mov BASE, TOP
618 | lea edx, BASE[stacksize] // New ci->top.
619 | lea TOP, BASE[numparams] // Start of vars to clear.
620 | mov CI->top, edx
621 | mov L->top, edx
622 stacksize -= numparams; /* Fixargs are already cleared. */
623 }
624
625 /* Clear undefined args and all vars. Still assumes eax = LUA_TNIL = 0. */
626 /* Note: cannot clear only args because L->top has grown. */
627 if (stacksize <= EXTRA_STACK) { /* Loopless clear. May use EXTRA_STACK. */
628 int i;
629 for (i = 0; i < stacksize; i++) {
630 | mov TOP[i].tt, eax
631 }
632 } else { /* Standard loop. */
633 |2: // Unrolled for 2 stack slots. No initial check. May use EXTRA_STACK.
634 | mov TOP[0].tt, eax
635 | mov TOP[1].tt, eax
636 | add TOP, 2*#TOP
637 | cmp TOP, edx
638 | jb <2
639 |// Note: TOP is undefined now. TOP is only valid across calls/open ins.
640 }
641
642#ifdef LUA_COMPAT_VARARG
643 if (pt->is_vararg & VARARG_NEEDSARG) {
644 | call &jit_vararg_table, L
645 }
646#endif
647
648 /* Call hook check. */
649 if (J->flags & JIT_F_DEBUG_CALL) {
650 | test byte L->hookmask, LUA_MASKCALL
651 | jz >9
652 | call ->HOOKCALL
653 |9:
654
655 |.jsub HOOKCALL
656 | mov CI, L->ci
657 | mov TOP, CI->func
658 | mov LCL, TOP->value
659 | mov PROTO:edi, LCL->p // clvalue(L->ci->func)->l.p
660 | mov eax, PROTO:edi->code
661 | add eax, 4 // Hooks expect incremented PC.
662 | mov L->savedpc, eax
663 | sub esp, FRAME_OFFSET
664 | call &luaD_callhook, L, LUA_HOOKCALL, -1
665 | add esp, FRAME_OFFSET
666 | mov eax, PROTO:edi->code // PROTO:edi is callee-save.
667 | mov L->savedpc, eax // jit_hookins needs previous PC.
668 | mov BASE, L->base
669 | ret
670 |.endjsub
671 }
672}
673
674/* Check if we can combine 'return const'. */
675static int jit_return_k(jit_State *J)
676{
677 if (!J->combine) return 0; /* COMBINE hint set? */
678 /* May need to close open upvalues. */
679 if (!fhint_isset(J, NOCLOSE)) {
680 | call &luaF_close, L, BASE
681 }
682 if (!J->pt->is_vararg) { /* Fixarg function. */
683 | sub aword L->ci, #CI
684 | mov TOP, BASE
685 | sub BASE, #BASE
686 | add esp, FRAME_OFFSET
687 } else { /* Vararg function. */
688 | mov CI, L->ci
689 | mov BASE, CI->func
690 | sub CI, #CI
691 | mov L->ci, CI
692 | lea TOP, BASE[1]
693 | add esp, FRAME_OFFSET
694 }
695 jit_assert(J->combine == 1); /* Required to skip next RETURN instruction. */
696 return 1;
697}
698
699static void jit_op_return(jit_State *J, int rbase, int nresults)
700{
701 /* Return hook check. */
702 if (J->flags & JIT_F_DEBUG_CALL) {
703 if (nresults < 0 && !(J->flags & JIT_F_DEBUG_INS)) {
704 | mov L->top, TOP
705 }
706 |// TODO: LUA_HOOKTAILRET (+ ci->tailcalls counting) or changed debug API.
707 | test byte L->hookmask, LUA_MASKRET
708 | jz >7
709 | call ->HOOKRET
710 |7:
711 if (J->flags & JIT_F_DEBUG_INS) {
712 | mov eax, FRAME_RETADDR
713 | mov L->savedpc, eax
714 }
715
716 |.jsub HOOKRET
717 | mov eax, [esp] // Current machine code address.
718 | mov L->savedpc, eax
719 | sub esp, FRAME_OFFSET
720 | call &luaD_callhook, L, LUA_HOOKRET, -1
721 | add esp, FRAME_OFFSET
722 | mov BASE, L->base // Restore stack-relative pointers.
723 | mov TOP, L->top
724 | ret
725 |.endjsub
726 }
727
728 /* May need to close open upvalues. */
729 if (!fhint_isset(J, NOCLOSE)) {
730 | call &luaF_close, L, BASE
731 }
732
733 /* Previous op was open: 'return f()' or 'return ...' */
734 if (nresults < 0) {
735 |// Relocate [BASE+rbase, TOP) -> [ci->func, *).
736 | mov CI, L->ci
737 | addidx BASE, rbase
738 | mov edx, CI->func
739 | cmp BASE, TOP
740 | jnb >2
741 |1:
742 | mov eax, [BASE]
743 | add BASE, aword*1
744 | mov [edx], eax
745 | add edx, aword*1
746 | cmp BASE, TOP
747 | jb <1
748 |2:
749 | add esp, FRAME_OFFSET
750 | mov BASE, CI->func
751 | sub CI, #CI
752 | mov TOP, edx // Relocated TOP.
753 | mov L->ci, CI
754 | ret
755 return;
756 }
757
758 if (!J->pt->is_vararg) { /* Fixarg function, nresults >= 0. */
759 int i;
760 | sub aword L->ci, #CI
761 |// Relocate [BASE+rbase,BASE+rbase+nresults) -> [BASE-1, *).
762 |// TODO: loop for large nresults?
763 | sub BASE, #BASE
764 for (i = 0; i < nresults; i++) {
765 | copyslot BASE[i], BASE[rbase+i+1]
766 }
767 | add esp, FRAME_OFFSET
768 | lea TOP, BASE[nresults]
769 | ret
770 } else { /* Vararg function, nresults >= 0. */
771 int i;
772 |// Relocate [BASE+rbase,BASE+rbase+nresults) -> [ci->func, *).
773 | mov CI, L->ci
774 | mov TOP, CI->func
775 | sub CI, #CI
776 | mov L->ci, CI // CI = ecx is used by copyslot.
777 for (i = 0; i < nresults; i++) {
778 | copyslot TOP[i], BASE[rbase+i]
779 }
780 | add esp, FRAME_OFFSET
781 | mov BASE, TOP
782 | addidx TOP, nresults
783 | ret
784 }
785}
786
787static void jit_op_call(jit_State *J, int func, int nargs, int nresults)
788{
789 int cltype = jit_inline_call(J, func, nargs, nresults);
790 if (cltype < 0) return; /* Inlined? */
791
792 |// Note: the order of the following instructions has been carefully tuned.
793 | addidx BASE, func
794 | mov CI, L->ci
795 | isfunction 0 // BASE[0] is L->base[func].
796 if (nargs >= 0) { /* Previous op was not open and did not set TOP. */
797 | lea TOP, BASE[1+nargs]
798 }
799 | mov LCL, BASE->value
800 | mov edx, &J->nextins
801 | mov CI->savedpc, edx
802 if (cltype == LUA_TFUNCTION) {
803 if (nargs == -1) {
804 | jne ->DEOPTIMIZE_OPEN // TYPE hint was wrong (open op)?
805 } else {
806 | jne ->DEOPTIMIZE // TYPE hint was wrong?
807 }
808 } else {
809 | je >1 // Skip __call handling for functions.
810 | call ->METACALL
811 |1:
812
813 |.jsub METACALL // CALL to __call metamethod.
814 | sub esp, FRAME_OFFSET
815 | mov L->savedpc, edx // May throw errors. Save PC and TOP.
816 | mov L->top, TOP
817 | call &luaD_tryfuncTM, L, BASE // Resolve __call metamethod.
818 | add esp, FRAME_OFFSET
819 | mov BASE, eax // Restore stack-relative pointers.
820 | mov TOP, L->top
821 | mov LCL, BASE->value
822 | mov CI, L->ci
823 | ret
824 |.endjsub
825 }
826 | call aword LCL->jit_gate // Call JIT func or GATE_JL/GATE_JC.
827 | subidx BASE, func
828 | mov L->base, BASE
829
830 /* Clear undefined results TOP <= o < func+nresults. */
831 if (nresults > 0) {
832 | xor eax, eax
833 if (nresults <= EXTRA_STACK) { /* Loopless clear. May use EXTRA_STACK. */
834 int i;
835 for (i = 0; i < nresults; i++) {
836 | mov TOP[i].tt, eax
837 }
838 } else { /* Standard loop. TODO: move to .tail? */
839 | lea edx, BASE[func+nresults]
840 |1: // Unrolled for 2 stack slots. No initial check. May use EXTRA_STACK.
841 | mov TOP[0].tt, eax // LUA_TNIL
842 | mov TOP[1].tt, eax // LUA_TNIL
843 | add TOP, 2*#TOP
844 | cmp TOP, edx
845 | jb <1
846 }
847 }
848
849 if (nresults >= 0) { /* Not an open ins. Restore L->top. */
850 | lea TOP, BASE[J->pt->maxstacksize] // Faster than getting L->ci->top.
851 | mov L->top, TOP
852 } /* Otherwise keep TOP for next instruction. */
853}
854
855static void jit_op_tailcall(jit_State *J, int func, int nargs)
856{
857 int cltype;
858
859 if (!fhint_isset(J, NOCLOSE)) { /* May need to close open upvalues. */
860 | call &luaF_close, L, BASE
861 }
862
863 cltype = jit_inline_call(J, func, nargs, -2);
864 if (cltype < 0) goto finish; /* Inlined? */
865
866 if (cltype == LUA_TFUNCTION) {
867 jit_deopt_target(J, nargs);
868 | isfunction func
869 | jne L_DEOPTIMIZE // TYPE hint was wrong?
870 } else {
871 | isfunction func; jne >5 // Handle generic callables first.
872 |.tail
873 |5: // Fallback for generic callables.
874 | addidx BASE, func
875 if (nargs >= 0) {
876 | lea TOP, BASE[1+nargs]
877 }
878 | mov edx, &J->nextins
879 | jmp ->METATAILCALL
880 |.code
881
882 |.jsub METATAILCALL // TAILCALL to __call metamethod.
883 | mov L->savedpc, edx
884 | mov L->top, TOP
885 | call &luaD_tryfuncTM, L, BASE // Resolve __call metamethod.
886 |
887 |// Relocate [eax, L->top) -> [L->ci->func, *).
888 | mov CI, L->ci
889 | mov edx, L->top
890 | mov TOP, CI->func
891 |1:
892 | mov BASE, [eax]
893 | add eax, aword*1
894 | mov [TOP], BASE
895 | add TOP, aword*1
896 | cmp eax, edx
897 | jb <1
898 |
899 | mov BASE, CI->func
900 | mov LCL, BASE->value
901 | sub CI, #CI
902 | add esp, FRAME_OFFSET
903 | jmp aword LCL->jit_gate // Chain to callgate.
904 |.endjsub
905 }
906
907 if (nargs >= 0) { /* Previous op was not open and did not set TOP. */
908 int i;
909 /* Relocate [BASE+func, BASE+func+nargs] -> [ci->func, ci->func+nargs]. */
910 /* TODO: loop for large nargs? */
911 if (!J->pt->is_vararg) { /* Fixarg function. */
912 | mov LCL, BASE[func].value
913 for (i = 0; i < nargs; i++) {
914 | copyslot BASE[i], BASE[func+1+i], ecx, edx
915 }
916 | lea TOP, BASE[nargs]
917 | sub BASE, #BASE
918 | mov CI, L->ci
919 | mov BASE->value, LCL // Sufficient to copy func->value.
920 } else { /* Vararg function. */
921 | mov CI, L->ci
922 | lea TOP, BASE[func]
923 | mov BASE, CI->func
924 | mov LCL, TOP->value
925 | mov BASE->value, LCL // Sufficient to copy func->value.
926 for (i = 0; i < nargs; i++) {
927 | copyslot BASE[i+1], TOP[i+1], eax, edx
928 }
929 | lea TOP, BASE[1+nargs]
930 | mov LCL, BASE->value // Need to reload LCL = eax.
931 }
932 } else { /* Previous op was open and set TOP. */
933 |// Relocate [BASE+func, TOP) -> [ci->func, *).
934 | mov CI, L->ci
935 | addidx BASE, func
936 | mov edx, CI->func
937 |1:
938 | mov eax, [BASE]
939 | add BASE, aword*1
940 | mov [edx], eax
941 | add edx, aword*1
942 | cmp BASE, TOP
943 | jb <1
944 | mov BASE, CI->func
945 | mov TOP, edx // Relocated TOP.
946 | mov LCL, BASE->value
947 }
948 | sub CI, #CI
949 | add esp, FRAME_OFFSET
950 | jmp aword LCL->jit_gate // Chain to JIT function.
951
952finish:
953 J->combine++; /* Combine with following return instruction. */
954}
955
956/* ------------------------------------------------------------------------ */
957
958static void jit_op_move(jit_State *J, int dest, int src)
959{
960 | copyslot BASE[dest], BASE[src]
961}
962
963static void jit_op_loadk(jit_State *J, int dest, int kidx)
964{
965 const TValue *kk = &J->pt->k[kidx];
966 int rk = jit_return_k(J);
967 if (rk) dest = 0;
968 | copyconst BASE[dest], kk
969 if (rk) {
970 | ret
971 }
972}
973
974static void jit_op_loadnil(jit_State *J, int first, int last)
975{
976 int idx, num = last - first + 1;
977 int rk = jit_return_k(J);
978 | xor eax, eax // Assumes: LUA_TNIL == 0
979 if (rk) {
980 | settt BASE[0], eax
981 | ret
982 } else if (num <= 8) {
983 for (idx = first; idx <= last; idx++) {
984 | settt BASE[idx], eax // 3/6 bytes
985 }
986 } else {
987 | lea ecx, BASE[first].tt // 15-21 bytes
988 | lea edx, BASE[last].tt
989 |1:
990 | mov [ecx], eax
991 | cmp ecx, edx
992 | lea ecx, [ecx+#BASE] // Preserves CC.
993 | jbe <1
994 }
995}
996
997static void jit_op_loadbool(jit_State *J, int dest, int b, int dojump)
998{
999 int rk = jit_return_k(J);
1000 if (rk) dest = 0;
1001 | setbvalue BASE[dest], b
1002 if (rk) {
1003 | ret
1004 } else if (dojump) {
1005 const TValue *h = hint_getpc(J, COMBINE, J->nextpc);
1006 if (!(ttisboolean(h) && bvalue(h) == 0)) { /* Avoid jmp around dead ins. */
1007 | jmp =>J->nextpc+1
1008 }
1009 }
1010}
1011
1012/* ------------------------------------------------------------------------ */
1013
1014static void jit_op_getupval(jit_State *J, int dest, int uvidx)
1015{
1016 | getLCL
1017 | mov UPVAL:ecx, LCL->upvals[uvidx]
1018 | mov TOP, UPVAL:ecx->v
1019 | copyslot BASE[dest], TOP[0]
1020}
1021
1022static void jit_op_setupval(jit_State *J, int src, int uvidx)
1023{
1024 | getLCL
1025 | mov UPVAL:ecx, LCL->upvals[uvidx]
1026 | mov TOP, UPVAL:ecx->v
1027 | // This is really copyslot TOP[0], BASE[src] with compare mixed in.
1028 | mov eax, BASE[src].tt
1029 | mov GCOBJECT:edx, BASE[src].value
1030 | mov TOP->tt, eax
1031 | cmp eax, LUA_TSTRING // iscollectable(val)?
1032 | mov eax, BASE[src].value.na[1]
1033 | mov TOP->value, GCOBJECT:edx
1034 | mov TOP->value.na[1], eax
1035 | jae >5
1036 |4:
1037 |.tail
1038 |5:
1039 | test byte GCOBJECT:edx->gch.marked, WHITEBITS // && iswhite(val)
1040 | jz <4
1041 | test byte UPVAL:ecx->marked, bitmask(BLACKBIT) // && isblack(uv)
1042 | jz <4
1043 | call ->BARRIERF // Yes, need barrier.
1044 | jmp <4
1045 |.code
1046
1047 |.jsub BARRIERF // luaC_barrierf() with regparms.
1048 | mov ARG4, GCOBJECT:edx
1049 | mov ARG3, UPVAL:ecx
1050 | mov ARG2, L
1051 | jmp &luaC_barrierf // Chain to C code.
1052 |.endjsub
1053}
1054
1055/* ------------------------------------------------------------------------ */
1056
1057/* Optimized table lookup routines. Enter via jsub, fallback to C. */
1058
1059/* Fallback for GETTABLE_*. Temporary key is in L->env. */
1060static void jit_gettable_fb(lua_State *L, Table *t, StkId dest)
1061{
1062 Table *mt = t->metatable;
1063 const TValue *tm = luaH_getstr(mt, G(L)->tmname[TM_INDEX]);
1064 if (ttisnil(tm)) { /* No __index method? */
1065 mt->flags |= 1<<TM_INDEX; /* Cache this fact. */
1066 setnilvalue(dest);
1067 } else if (ttisfunction(tm)) { /* __index function? */
1068 ptrdiff_t destr = savestack(L, dest);
1069 setobj2s(L, L->top, tm);
1070 sethvalue(L, L->top+1, t);
1071 setobj2s(L, L->top+2, &L->env);
1072 luaD_checkstack(L, 3);
1073 L->top += 3;
1074 luaD_call(L, L->top - 3, 1);
1075 dest = restorestack(L, destr);
1076 L->top--;
1077 setobjs2s(L, dest, L->top);
1078 } else { /* Let luaV_gettable() continue with the __index object. */
1079 luaV_gettable(L, tm, &L->env, dest);
1080 }
1081
1082 |//-----------------------------------------------------------------------
1083 |.jsub GETGLOBAL // Lookup global variable.
1084 |// Call with: TSTRING:edx (key), BASE (dest)
1085 | mov CI, L->ci
1086 | mov TOP, CI->func
1087 | mov LCL, TOP->value
1088 | mov TABLE:edi, LCL->env
1089 | jmp >9
1090 |.endjsub
1091 |
1092 |//-----------------------------------------------------------------------
1093 |.jsub GETTABLE_KSTR // Lookup constant string in table.
1094 |// Call with: TOP (tab), TSTRING:edx (key), BASE (dest)
1095 | cmp dword TOP->tt, LUA_TTABLE
1096 | mov TABLE:edi, TOP->value
1097 | jne ->DEOPTIMIZE_CALLER // Not a table? Deoptimize.
1098 |
1099 |// Common entry: TABLE:edi (tab), TSTRING:edx (key), BASE (dest)
1100 |// Restores BASE, destroys eax, ecx, edx, edi (TOP).
1101 |9:
1102 | movzx ecx, byte TABLE:edi->lsizenode // hashstr(t, key).
1103 | mov eax, 1
1104 | shl eax, cl
1105 | dec eax
1106 | and eax, TSTRING:edx->tsv.hash
1107 | Nodemul NODE:eax
1108 | add NODE:eax, TABLE:edi->node
1109 |
1110 |1: // Start of inner loop. Check node key.
1111 | cmp dword NODE:eax->i_key.nk.tt, LUA_TSTRING
1112 | jne >2
1113 | cmp aword NODE:eax->i_key.nk.value, TSTRING:edx
1114 | jne >2
1115 | // Note: swapping the two checks is faster, but valgrind complains.
1116 |// Assumes: (int)&(((Node *)0)->i_val) == (int)&(((StkId)0)->value)
1117 |
1118 |// Ok, key found. Copy node value to destination (stack) slot.
1119 | mov ecx, NODE:eax->i_val.tt
1120 | test ecx, ecx; je >3 // Node has nil value?
1121 ||if (J->flags & JIT_F_CPU_SSE2) {
1122 | movq xmm0, qword NODE:eax->i_val.value
1123 | movq qword BASE->value, xmm0
1124 ||} else {
1125 | mov edx, NODE:eax->i_val.value
1126 | mov edi, NODE:eax->i_val.value.na[1]
1127 | mov BASE->value, edx
1128 | mov BASE->value.na[1], edi
1129 ||}
1130 | mov BASE->tt, ecx
1131 | mov BASE, L->base
1132 | ret
1133 |2:
1134 | mov NODE:eax, NODE:eax->i_key.nk.next // Get next key in chain.
1135 | test NODE:eax, NODE:eax
1136 | jnz <1 // Loop if non-NULL.
1137 |
1138 | xor ecx, ecx
1139 |3:
1140 | mov TABLE:eax, TABLE:edi->metatable
1141 | test TABLE:eax, TABLE:eax
1142 | jz >4 // No metatable?
1143 | test byte TABLE:eax->flags, 1<<TM_INDEX
1144 | jz >5 // Or 'no __index' flag set?
1145 |4:
1146 | settt BASE[0], ecx // Yes, set to nil.
1147 | mov BASE, L->base
1148 | ret
1149 |
1150 |5: // Otherwise chain to C code which eventually calls luaV_gettable.
1151 | setsvalue L->env, TSTRING:edx // Use L->env as temp key.
1152 | mov ecx, [esp]
1153 | sub esp, FRAME_OFFSET
1154 | mov L->savedpc, ecx
1155 | call &jit_gettable_fb, L, TABLE:edi, BASE
1156 | add esp, FRAME_OFFSET
1157 | mov BASE, L->base
1158 | ret
1159 |.endjsub
1160 |
1161 |//-----------------------------------------------------------------------
1162 |.jsub GETTABLE_STR // Lookup string in table.
1163 |// Call with: TOP (tab), TVALUE:ecx (key), BASE (dest)
1164 | mov eax, TOP->tt; shl eax, 4; or eax, TVALUE:ecx->tt
1165 | cmp eax, LUA_TTABLE_STR
1166 | mov TABLE:edi, TOP->value
1167 | mov TSTRING:edx, TVALUE:ecx->value
1168 | je <9 // Types ok? Continue above.
1169 | jmp ->DEOPTIMIZE_CALLER // Otherwise deoptimize.
1170 |.endjsub
1171}
1172
1173/* Fallback for SETTABLE_*STR. Temporary (string) key is in L->env. */
1174static void jit_settable_fb(lua_State *L, Table *t, StkId val)
1175{
1176 Table *mt = t->metatable;
1177 const TValue *tm = luaH_getstr(mt, G(L)->tmname[TM_NEWINDEX]);
1178 if (ttisnil(tm)) { /* No __newindex method? */
1179 mt->flags |= 1<<TM_NEWINDEX; /* Cache this fact. */
1180 t->flags = 0; /* But need to clear the cache for the table itself. */
1181 setobj2t(L, luaH_setstr(L, t, rawtsvalue(&L->env)), val);
1182 luaC_barriert(L, t, val);
1183 } else if (ttisfunction(tm)) { /* __newindex function? */
1184 setobj2s(L, L->top, tm);
1185 sethvalue(L, L->top+1, t);
1186 setobj2s(L, L->top+2, &L->env);
1187 setobj2s(L, L->top+3, val);
1188 luaD_checkstack(L, 4);
1189 L->top += 4;
1190 luaD_call(L, L->top - 4, 0);
1191 } else { /* Let luaV_settable() continue with the __newindex object. */
1192 luaV_settable(L, tm, &L->env, val);
1193 }
1194
1195 |//-----------------------------------------------------------------------
1196 |.jsub BARRIERBACK // luaC_barrierback() with regparms.
1197 |// Call with: TABLE:edi (table). Destroys ecx, edx.
1198 | mov GL:ecx, L->l_G
1199 | and byte TABLE:edi->marked, (~bitmask(BLACKBIT))&0xff
1200 | mov edx, GL:ecx->grayagain
1201 | mov GL:ecx->grayagain, TABLE:edi
1202 | mov TABLE:edi->gclist, edx
1203 | ret
1204 |.endjsub
1205 |
1206 |//-----------------------------------------------------------------------
1207 |.jsub SETGLOBAL // Set global variable.
1208 |// Call with: TSTRING:edx (key), BASE (val)
1209 | mov CI, L->ci
1210 | mov TOP, CI->func
1211 | mov LCL, TOP->value
1212 | mov TABLE:edi, LCL->env
1213 | jmp >9
1214 |.endjsub
1215 |
1216 |//-----------------------------------------------------------------------
1217 |.jsub SETTABLE_KSTR // Set constant string entry in table.
1218 |// Call with: TOP (tab), TSTRING:edx (key), BASE (val)
1219 | cmp dword TOP->tt, LUA_TTABLE
1220 | mov TABLE:edi, TOP->value
1221 | jne ->DEOPTIMIZE_CALLER // Not a table? Deoptimize.
1222 |
1223 |// Common entry: TABLE:edi (tab), TSTRING:edx (key), BASE (val)
1224 |// Restores BASE, destroys eax, ecx, edx, edi (TOP).
1225 |9:
1226 | movzx ecx, byte TABLE:edi->lsizenode // hashstr(t, key).
1227 | mov eax, 1
1228 | shl eax, cl
1229 | dec eax
1230 | and eax, TSTRING:edx->tsv.hash
1231 | Nodemul NODE:eax
1232 | add NODE:eax, TABLE:edi->node
1233 |
1234 |1: // Start of inner loop. Check node key.
1235 | cmp dword NODE:eax->i_key.nk.tt, LUA_TSTRING
1236 | jne >4
1237 | cmp aword NODE:eax->i_key.nk.value, TSTRING:edx
1238 | jne >4
1239 | // Note: swapping the two checks is faster, but valgrind complains.
1240 |
1241 |// Ok, key found. Copy new value to node value.
1242 | cmp dword NODE:eax->i_val.tt, LUA_TNIL // Previous value is nil?
1243 | je >6
1244 | // Assumes: (int)&(((Node *)0)->i_val) == (int)&(((StkId)0)->value)
1245 |2:
1246 | mov byte TABLE:edi->flags, 0 // Clear metamethod cache.
1247 |3: // Target for SETTABLE_NUM below.
1248 | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table)
1249 | jnz >8 // Unlikely, but set barrier back.
1250 |7: // Caveat: recycled label.
1251 | copyslot TVALUE:eax[0], BASE[0], ecx, edx, TOP
1252 | mov BASE, L->base
1253 | ret
1254 |
1255 |8: // Avoid valiswhite() check -- black2gray(table) is ok.
1256 | call ->BARRIERBACK
1257 | jmp <7
1258 |
1259 |4:
1260 | mov NODE:eax, NODE:eax->i_key.nk.next // Get next key in chain.
1261 | test NODE:eax, NODE:eax
1262 | jnz <1 // Loop if non-NULL.
1263 |
1264 |// Key not found. Add a new one, but check metatable first.
1265 | mov TABLE:ecx, TABLE:edi->metatable
1266 | test TABLE:ecx, TABLE:ecx
1267 | jz >5 // No metatable?
1268 | test byte TABLE:ecx->flags, 1<<TM_NEWINDEX
1269 | jz >7 // Or 'no __newindex' flag set?
1270 |
1271 |5: // Add new key.
1272 | // No need for setting L->savedpc since only LUA_ERRMEM may be thrown.
1273 | lea TVALUE:eax, L->env
1274 | setsvalue TVALUE:eax[0], TSTRING:edx
1275 | sub esp, FRAME_OFFSET
1276 | call &luaH_newkey, L, TABLE:edi, TVALUE:eax
1277 | add esp, FRAME_OFFSET
1278 | jmp <2 // Copy to the returned value. See Node/TValue assumption above.
1279 |
1280 |6: // Key found, but previous value is nil.
1281 | mov TABLE:ecx, TABLE:edi->metatable
1282 | test TABLE:ecx, TABLE:ecx
1283 | jz <2 // No metatable?
1284 | test byte TABLE:ecx->flags, 1<<TM_NEWINDEX
1285 | jnz <2 // Or 'no __newindex' flag set?
1286 |
1287 |7: // Otherwise chain to C code which eventually calls luaV_settable.
1288 | setsvalue L->env, TSTRING:edx // Use L->env as temp key.
1289 | mov ecx, [esp]
1290 | sub esp, FRAME_OFFSET
1291 | mov L->savedpc, ecx
1292 | call &jit_settable_fb, L, TABLE:edi, BASE
1293 | add esp, FRAME_OFFSET
1294 | mov BASE, L->base
1295 | ret
1296 |.endjsub
1297 |
1298 |//-----------------------------------------------------------------------
1299 |.jsub SETTABLE_STR // Set string entry in table.
1300 |// Call with: TOP (tab), TVALUE:ecx (key), BASE (val)
1301 | mov eax, TOP->tt; shl eax, 4; or eax, TVALUE:ecx->tt
1302 | cmp eax, LUA_TTABLE_STR
1303 | mov TABLE:edi, TOP->value
1304 | mov TSTRING:edx, TVALUE:ecx->value
1305 | je <9 // Types ok? Continue above.
1306 | jmp ->DEOPTIMIZE_CALLER // Otherwise deoptimize.
1307 |.endjsub
1308}
1309
1310/* ------------------------------------------------------------------------ */
1311
1312static void jit_op_newtable(jit_State *J, int dest, int lnarray, int lnhash)
1313{
1314 | call &luaH_new, L, luaO_fb2int(lnarray), luaO_fb2int(lnhash)
1315 | sethvalue BASE[dest], eax
1316 jit_checkGC(J);
1317}
1318
1319static void jit_op_getglobal(jit_State *J, int dest, int kidx)
1320{
1321 const TValue *kk = &J->pt->k[kidx];
1322 jit_assert(ttisstring(kk));
1323 | mov TSTRING:edx, &&kk->value.gc->ts
1324 | addidx BASE, dest
1325 | call ->GETGLOBAL
1326}
1327
1328static void jit_op_setglobal(jit_State *J, int rval, int kidx)
1329{
1330 const TValue *kk = &J->pt->k[kidx];
1331 jit_assert(ttisstring(kk));
1332 | mov TSTRING:edx, &&kk->value.gc->ts
1333 | addidx BASE, rval
1334 | call ->SETGLOBAL
1335}
1336
1337enum { TKEY_KSTR = -2, TKEY_STR = -1, TKEY_ANY = 0 };
1338
1339/* Optimize key lookup depending on consts or hints type. */
1340static int jit_keylookup(jit_State *J, int tab, int rkey)
1341{
1342 const TValue *tabt = hint_get(J, TYPE);
1343 const TValue *key;
1344 if (!ttistable(tabt)) return TKEY_ANY; /* Not a table? Use fallback. */
1345 key = ISK(rkey) ? &J->pt->k[INDEXK(rkey)] : hint_get(J, TYPEKEY);
1346 if (ttisstring(key)) { /* String key? */
1347 if (ISK(rkey)) {
1348 | lea TOP, BASE[tab]
1349 | mov TSTRING:edx, &&key->value.gc->ts
1350 return TKEY_KSTR; /* Const string key. */
1351 } else {
1352 | lea TOP, BASE[tab]
1353 | lea TVALUE:ecx, BASE[rkey]
1354 return TKEY_STR; /* Var string key. */
1355 }
1356 } else if (ttisnumber(key)) { /* Number key? */
1357 lua_Number n = nvalue(key);
1358 int k;
1359 lua_number2int(k, n);
1360 if (!(k >= 1 && k < (1 << 26) && (lua_Number)k == n))
1361 return TKEY_ANY; /* Not a proper array key? Use fallback. */
1362 if (ISK(rkey)) {
1363 | istable tab
1364 | mov TABLE:edi, BASE[tab].value
1365 | jne >9 // TYPE hint was wrong?
1366 | mov ecx, k // Needed for hash fallback.
1367 | mov TVALUE:eax, TABLE:edi->array
1368 | cmp ecx, TABLE:edi->sizearray; ja >5 // Not in array part?
1369 return k; /* Const array key (>= 1). */
1370 } else {
1371 | mov eax, BASE[tab].tt; shl eax, 4; or eax, BASE[rkey].tt
1372 | cmp eax, LUA_TTABLE_NUM; jne >9 // TYPE/TYPEKEY hint was wrong?
1373 if (J->flags & JIT_F_CPU_SSE2) {
1374 | movsd xmm0, qword BASE[rkey]
1375 | cvttsd2si eax, xmm0
1376 | cvtsi2sd xmm1, eax
1377 | dec eax
1378 | ucomisd xmm1, xmm0
1379 | mov TABLE:edi, BASE[tab].value
1380 | jne >9; jp >9 // Not an integer? Deoptimize.
1381 } else {
1382 |// Annoying x87 stuff: check whether a number is an integer.
1383 |// The latency of fist/fild is the real problem here.
1384 | fld qword BASE[rkey].value
1385 | fist dword TMP1
1386 | fild dword TMP1
1387 | fcomparepp // eax may be modified.
1388 | jne >9; jp >9 // Not an integer? Deoptimize.
1389 | mov eax, TMP1
1390 | mov TABLE:edi, BASE[tab].value
1391 | dec eax
1392 }
1393 | cmp eax, TABLE:edi->sizearray; jae >5 // Not in array part?
1394 | TValuemul eax
1395 | add eax, TABLE:edi->array
1396 return 1; /* Variable array key. */
1397 }
1398 }
1399 return TKEY_ANY; /* Use fallback. */
1400}
1401
1402static void jit_op_gettable(jit_State *J, int dest, int tab, int rkey)
1403{
1404 int k = jit_keylookup(J, tab, rkey);
1405 switch (k) {
1406 case TKEY_KSTR: /* Const string key. */
1407 | addidx BASE, dest
1408 | call ->GETTABLE_KSTR
1409 break;
1410 case TKEY_STR: /* Variable string key. */
1411 | addidx BASE, dest
1412 | call ->GETTABLE_STR
1413 break;
1414 case TKEY_ANY: /* Generic gettable fallback. */
1415 if (ISK(rkey)) {
1416 | mov ecx, &&J->pt->k[INDEXK(rkey)]
1417 } else {
1418 | lea ecx, BASE[rkey]
1419 }
1420 | lea edx, BASE[tab]
1421 | addidx BASE, dest
1422 | mov L->savedpc, &J->nextins
1423 | call &luaV_gettable, L, edx, ecx, BASE
1424 | mov BASE, L->base
1425 break;
1426 default: /* Array key. */
1427 |// This is really copyslot BASE[dest], TVALUE:eax[k-1] mixed with compare.
1428 |1:
1429 | mov edx, TVALUE:eax[k-1].tt
1430 | test edx, edx; je >6 // Array has nil value?
1431 if (J->flags & JIT_F_CPU_SSE2) {
1432 | movq xmm0, qword TVALUE:eax[k-1].value
1433 | movq qword BASE[dest].value, xmm0
1434 } else {
1435 | mov ecx, TVALUE:eax[k-1].value
1436 | mov eax, TVALUE:eax[k-1].value.na[1]
1437 | mov BASE[dest].value, ecx
1438 | mov BASE[dest].value.na[1], eax
1439 }
1440 |2:
1441 | mov BASE[dest].tt, edx
1442 |.tail
1443 |5: // Fallback to hash part. TABLE:edi is callee-saved.
1444 if (ISK(rkey)) {
1445 | call ->GETTABLE_KNUM
1446 } else {
1447 | call ->GETTABLE_NUM
1448 }
1449 | jmp <1 // Slot is at TVALUE:eax[k-1].
1450 |
1451 |6: // Shortcut for tables without an __index metamethod.
1452 | mov TABLE:ecx, TABLE:edi->metatable
1453 | test TABLE:ecx, TABLE:ecx
1454 | jz <2 // No metatable?
1455 | test byte TABLE:ecx->flags, 1<<TM_INDEX
1456 | jnz <2 // Or 'no __index' flag set?
1457 |
1458 |9: // Otherwise deoptimize.
1459 | mov edx, &J->nextins
1460 | jmp ->DEOPTIMIZE
1461 |.code
1462 break;
1463 }
1464
1465 |.jsub GETTABLE_KNUM // Gettable fallback for const numeric keys.
1466 | mov TMP2, ecx // Save k.
1467 | sub esp, FRAME_OFFSET
1468 | call &luaH_getnum, TABLE:edi, ecx
1469 | add esp, FRAME_OFFSET
1470 | mov ecx, TMP2 // Restore k.
1471 | TValuemul ecx
1472 | sub TVALUE:eax, ecx // Compensate for TVALUE:eax[k-1].
1473 | add TVALUE:eax, #TVALUE
1474 | ret
1475 |.endjsub
1476 |
1477 |.jsub GETTABLE_NUM // Gettable fallback for variable numeric keys.
1478 | inc eax
1479 | mov ARG2, TABLE:edi // Really ARG1 and ARG2.
1480 | mov ARG3, eax
1481 | jmp &luaH_getnum // Chain to C code.
1482 |.endjsub
1483}
1484
1485static void jit_op_settable(jit_State *J, int tab, int rkey, int rval)
1486{
1487 const TValue *val = ISK(rval) ? &J->pt->k[INDEXK(rval)] : NULL;
1488 int k = jit_keylookup(J, tab, rkey);
1489 switch (k) {
1490 case TKEY_KSTR: /* Const string key. */
1491 case TKEY_STR: /* Variable string key. */
1492 if (ISK(rval)) {
1493 | mov BASE, &val
1494 } else {
1495 | addidx BASE, rval
1496 }
1497 if (k == TKEY_KSTR) {
1498 | call ->SETTABLE_KSTR
1499 } else {
1500 | call ->SETTABLE_STR
1501 }
1502 break;
1503 case TKEY_ANY: /* Generic settable fallback. */
1504 if (ISK(rkey)) {
1505 | mov ecx, &&J->pt->k[INDEXK(rkey)]
1506 } else {
1507 | lea ecx, BASE[rkey]
1508 }
1509 if (ISK(rval)) {
1510 | mov edx, &val
1511 } else {
1512 | lea edx, BASE[rval]
1513 }
1514 | addidx BASE, tab
1515 | mov L->savedpc, &J->nextins
1516 | call &luaV_settable, L, BASE, ecx, edx
1517 | mov BASE, L->base
1518 break;
1519 default: /* Array key. */
1520 |1:
1521 | tvisnil TVALUE:eax[k-1]; je >6 // Previous value is nil?
1522 |2:
1523 |.tail
1524 |5: // Fallback to hash part. TABLE:edi is callee-saved.
1525 if (ISK(rkey)) {
1526 | call ->SETTABLE_KNUM
1527 } else {
1528 | call ->SETTABLE_NUM
1529 }
1530 | jmp <1 // Slot is at TVALUE:eax[k-1].
1531 |
1532 |6: // Shortcut for tables without a __newindex metamethod.
1533 | mov TABLE:ecx, TABLE:edi->metatable
1534 | test TABLE:ecx, TABLE:ecx
1535 | jz <2 // No metatable?
1536 | test byte TABLE:ecx->flags, 1<<TM_NEWINDEX
1537 | jnz <2 // Or 'no __newindex' flag set?
1538 |
1539 |9: // Otherwise deoptimize.
1540 | mov edx, &J->nextins
1541 | jmp ->DEOPTIMIZE
1542 |.code
1543 if (!ISK(rval) || iscollectable(val)) {
1544 | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table)
1545 | jnz >7 // Unlikely, but set barrier back.
1546 |3:
1547 |.tail
1548 |7: // Avoid valiswhite() check -- black2gray(table) is ok.
1549 | call ->BARRIERBACK
1550 | jmp <3
1551 |.code
1552 }
1553 if (ISK(rval)) {
1554 | copyconst TVALUE:eax[k-1], val
1555 } else {
1556 | copyslot TVALUE:eax[k-1], BASE[rval], ecx, edx, TOP
1557 }
1558 break;
1559 }
1560
1561 |.jsub SETTABLE_KNUM // Settable fallback for const numeric keys.
1562 | mov TMP2, ecx // Save k.
1563 | sub esp, FRAME_OFFSET
1564 | call &luaH_setnum, L, TABLE:edi, ecx
1565 | add esp, FRAME_OFFSET
1566 | mov ecx, TMP2 // Restore k.
1567 | TValuemul ecx
1568 | sub TVALUE:eax, ecx // Compensate for TVALUE:eax[k-1].
1569 | add TVALUE:eax, #TVALUE
1570 | ret
1571 |.endjsub
1572 |
1573 |.jsub SETTABLE_NUM // Settable fallback for variable numeric keys.
1574 | inc eax
1575 | mov ARG2, L // Really ARG1, ARG2 and ARG3.
1576 | mov ARG3, TABLE:edi
1577 | mov ARG4, eax
1578 | jmp &luaH_setnum // Chain to C code.
1579 |.endjsub
1580}
1581
1582static void jit_op_self(jit_State *J, int dest, int tab, int rkey)
1583{
1584 | copyslot BASE[dest+1], BASE[tab]
1585 jit_op_gettable(J, dest, tab, rkey);
1586}
1587
1588/* ------------------------------------------------------------------------ */
1589
1590static void jit_op_setlist(jit_State *J, int ra, int num, int batch)
1591{
1592 if (batch == 0) { batch = (int)(*J->nextins); J->combine++; }
1593 batch = (batch-1)*LFIELDS_PER_FLUSH;
1594 if (num == 0) { /* Previous op was open and set TOP: {f()} or {...}. */
1595 | mov L->env.value, TOP // Need to save TOP (edi).
1596 | lea eax, BASE[ra+1]
1597 | sub eax, TOP
1598 | neg eax
1599 | TValuediv eax // num = (TOP-ra-1)/sizeof(TValue).
1600 | mov TABLE:edi, BASE[ra].value
1601 | jz >4 // Nothing to set?
1602 if (batch > 0) {
1603 | add eax, batch
1604 }
1605 | cmp dword TABLE:edi->sizearray, eax
1606 | jae >1 // Skip resize if not needed.
1607 | // A resize is likely, so inline it.
1608 | call &luaH_resizearray, L, TABLE:edi, eax
1609 |1:
1610 | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table)
1611 | mov edx, TABLE:edi->array
1612 | jnz >6 // Unlikely, but set barrier back.
1613 | mov TOP, L->env.value
1614 |
1615 |.tail
1616 |6: // Avoid lots of valiswhite() checks -- black2gray(table) is ok.
1617 | call ->BARRIERBACK
1618 | jmp <1 // Need to reload edx.
1619 |.code
1620 } else { /* Set fixed number of args. */
1621 | mov TABLE:edi, BASE[ra].value // edi is callee-save.
1622 | cmp dword TABLE:edi->sizearray, batch+num
1623 | jb >5 // Need to resize array?
1624 |1:
1625 | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table)
1626 | mov edx, TABLE:edi->array
1627 | jnz >6 // Unlikely, but set barrier back.
1628 | lea TOP, BASE[ra+1+num] // Careful: TOP is edi.
1629 |
1630 |.tail
1631 |5: // A resize is unlikely (impossible?). NEWTABLE should've done it.
1632 | call &luaH_resizearray, L, TABLE:edi, batch+num
1633 | jmp <1
1634 |6: // Avoid lots of valiswhite() checks -- black2gray(table) is ok.
1635 | call ->BARRIERBACK
1636 | jmp <1 // Need to reload edx.
1637 |.code
1638 }
1639 if (batch > 0) {
1640 | add edx, batch*#TVALUE // edx = &t->array[(batch+1)-1]
1641 }
1642 | lea ecx, BASE[ra+1]
1643 |3: // Copy stack slots to array.
1644 | mov eax, [ecx]
1645 | add ecx, aword*1
1646 | mov [edx], eax
1647 | add edx, aword*1
1648 | cmp ecx, TOP
1649 | jb <3
1650 |
1651 |4:
1652 if (num == 0) { /* Previous op was open. Restore L->top. */
1653 | lea TOP, BASE[J->pt->maxstacksize] // Faster than getting L->ci->top.
1654 | mov L->top, TOP
1655 }
1656}
1657
1658/* ------------------------------------------------------------------------ */
1659
1660static void jit_op_arith(jit_State *J, int dest, int rkb, int rkc, int ev)
1661{
1662 const TValue *kkb = ISK(rkb) ? &J->pt->k[INDEXK(rkb)] : NULL;
1663 const TValue *kkc = ISK(rkc) ? &J->pt->k[INDEXK(rkc)] : NULL;
1664 const Value *kval;
1665 int idx, rev;
1666 int target = (ev == TM_LT || ev == TM_LE) ? jit_jmp_target(J) : 0;
1667 int hastail = 0;
1668
1669 /* The bytecode compiler already folds constants except for: k/0, k%0, */
1670 /* NaN results, k1<k2, k1<=k2. No point in optimizing these cases. */
1671 if (ISK(rkb&rkc)) goto fallback;
1672
1673 /* Avoid optimization when non-numeric constants are present. */
1674 if (kkb ? !ttisnumber(kkb) : (kkc && !ttisnumber(kkc))) goto fallback;
1675
1676 /* The TYPE hint selects numeric inlining and/or fallback encoding. */
1677 switch (ttype(hint_get(J, TYPE))) {
1678 case LUA_TNIL: hastail = 1; break; /* No hint: numeric + fallback. */
1679 case LUA_TNUMBER: break; /* Numbers: numeric + deoptimization. */
1680 default: goto fallback; /* Mixed/other types: fallback only. */
1681 }
1682
1683 /* The checks above ensure: at most one of the operands is a constant. */
1684 /* Reverse operation and swap operands so the 2nd operand is a variable. */
1685 if (kkc) { kval = &kkc->value; idx = rkb; rev = 1; }
1686 else { kval = kkb ? &kkb->value : NULL; idx = rkc; rev = 0; }
1687
1688 /* Special handling for some operators. */
1689 switch (ev) {
1690 case TM_MOD:
1691 /* Check for modulo with positive numbers, so we can use fprem. */
1692 if (kval) {
1693 if (kval->na[1] < 0) { hastail = 0; goto fallback; } /* x%-k, -k%x */
1694 | isnumber idx
1695 | mov eax, BASE[idx].value.na[1]
1696 | jne L_DEOPTIMIZEF
1697 | test eax, eax; js L_DEOPTIMIZEF
1698 |// This will trigger deoptimization in some benchmarks (pidigits).
1699 |// But it's still a win.
1700 if (kkb) {
1701 | fld qword BASE[rkc].value
1702 | fld qword [kval]
1703 } else {
1704 | fld qword [kval]
1705 | fld qword BASE[rkb].value
1706 }
1707 } else {
1708 | isnumber2 rkb, rkc
1709 | mov eax, BASE[rkb].value.na[1]
1710 | jne L_DEOPTIMIZEF
1711 | or eax, BASE[rkc].value.na[1]; js L_DEOPTIMIZEF
1712 | fld qword BASE[rkc].value
1713 | fld qword BASE[rkb].value
1714 }
1715 |1: ; fprem; fnstsw ax; sahf; jp <1
1716 | fstp st1
1717 goto fpstore;
1718 case TM_POW:
1719 if (hastail || !kval) break; /* Avoid this if not optimizing. */
1720 if (rev) { /* x^k for k > 0, k integer. */
1721 lua_Number n = kval->n;
1722 int k;
1723 lua_number2int(k, n);
1724 /* All positive integers would work. But need to limit code explosion. */
1725 if (k > 0 && k <= 65536 && (lua_Number)k == n) {
1726 | isnumber idx; jne L_DEOPTIMIZEF
1727 | fld qword BASE[idx]
1728 for (; (k & 1) == 0; k >>= 1) { /* Handle leading zeroes (2^k). */
1729 | fmul st0
1730 }
1731 if ((k >>= 1) != 0) { /* Handle trailing bits. */
1732 | fld st0
1733 | fmul st0
1734 for (; k != 1; k >>= 1) {
1735 if (k & 1) {
1736 | fmul st1, st0
1737 }
1738 | fmul st0
1739 }
1740 | fmulp st1
1741 }
1742 goto fpstore;
1743 }
1744 } else if (kval->n > (lua_Number)0) { /* k^x for k > 0. */
1745 int log2kval[3]; /* Enough storage for a tword (80 bits). */
1746 log2kval[2] = 0; /* Avoid leaking garbage. */
1747 /* Double precision log2(k) doesn't cut it (3^x != 3 for x = 1). */
1748 ((void (*)(int *, double))J->jsub[JSUB_LOG2_TWORD])(log2kval, kval->n);
1749 | mov ARG1, log2kval[0] // Abuse stack for tword const.
1750 | mov ARG2, log2kval[1]
1751 | mov ARG3, log2kval[2] // TODO: store2load fwd stall.
1752 | isnumber idx; jne L_DEOPTIMIZEF
1753 | fld tword [esp]
1754 | fmul qword BASE[idx].value // log2(k)*x
1755 | fld st0; frndint; fsub st1, st0; fxch // Split into fract/int part.
1756 | f2xm1; fld1; faddp st1; fscale // (2^fract-1 +1) << int.
1757 | fstp st1
1758
1759 |.jsub LOG2_TWORD // Calculate log2(k) with max. precision.
1760 |// Called with (int *ptr, double k).
1761 | fld1; fld FPARG2 // Offset ok due to retaddr.
1762 | fyl2x
1763 | mov eax, ARG2 // Really ARG1.
1764 | fstp tword [eax]
1765 | ret
1766 |.endjsub
1767 goto fpstore;
1768 }
1769 break;
1770 }
1771
1772 /* Check number type and load 1st operand. */
1773 if (kval) {
1774 | isnumber idx; jne L_DEOPTIMIZEF
1775 | loadnvaluek kval
1776 } else {
1777 if (rkb == rkc) {
1778 | isnumber rkb
1779 } else {
1780 | isnumber2 rkb, rkc
1781 }
1782 | jne L_DEOPTIMIZEF
1783 | fld qword BASE[rkb].value
1784 }
1785
1786 /* Encode arithmetic operation with 2nd operand. */
1787 switch ((ev<<1)+rev) {
1788 case TM_ADD<<1: case (TM_ADD<<1)+1:
1789 if (rkb == rkc) {
1790 | fadd st0
1791 } else {
1792 | fadd qword BASE[idx].value
1793 }
1794 break;
1795 case TM_SUB<<1:
1796 | fsub qword BASE[idx].value
1797 break;
1798 case (TM_SUB<<1)+1:
1799 | fsubr qword BASE[idx].value
1800 break;
1801 case TM_MUL<<1: case (TM_MUL<<1)+1:
1802 if (rkb == rkc) {
1803 | fmul st0
1804 } else {
1805 | fmul qword BASE[idx].value
1806 }
1807 break;
1808 case TM_DIV<<1:
1809 | fdiv qword BASE[idx].value
1810 break;
1811 case (TM_DIV<<1)+1:
1812 | fdivr qword BASE[idx].value
1813 break;
1814 case TM_POW<<1:
1815 | sub esp, S2LFRAME_OFFSET
1816 | fstp FPARG1
1817 | fld qword BASE[idx].value
1818 | fstp FPARG2
1819 | call &pow
1820 | add esp, S2LFRAME_OFFSET
1821 break;
1822 case (TM_POW<<1)+1:
1823 | sub esp, S2LFRAME_OFFSET
1824 | fstp FPARG2
1825 | fld qword BASE[idx].value
1826 | fstp FPARG1
1827 | call &pow
1828 | add esp, S2LFRAME_OFFSET
1829 break;
1830 case TM_UNM<<1: case (TM_UNM<<1)+1:
1831 | fchs // No 2nd operand.
1832 break;
1833 default: /* TM_LT or TM_LE. */
1834 | fld qword BASE[idx].value
1835 | fcomparepp
1836 | jp =>dest?(J->nextpc+1):target // Unordered means false.
1837 jit_assert(dest == 0 || dest == 1); /* Really cond. */
1838 switch (((rev^dest)<<1)+(dest^(ev == TM_LT))) {
1839 case 0:
1840 | jb =>target
1841 break;
1842 case 1:
1843 | jbe =>target
1844 break;
1845 case 2:
1846 | ja =>target
1847 break;
1848 case 3:
1849 | jae =>target
1850 break;
1851 }
1852 goto skipstore;
1853 }
1854fpstore:
1855 /* Store result and set result type (if necessary). */
1856 | fstp qword BASE[dest].value
1857 if (dest != rkb && dest != rkc) {
1858 | settt BASE[dest], LUA_TNUMBER
1859 }
1860
1861skipstore:
1862 if (!hastail) {
1863 jit_deopt_target(J, 0);
1864 return;
1865 }
1866
1867 |4:
1868 |.tail
1869 |L_DEOPTLABEL: // Recycle as fallback label.
1870
1871fallback:
1872 /* Generic fallback for arithmetic ops. */
1873 if (kkb) {
1874 | mov ecx, &kkb
1875 } else {
1876 | lea ecx, BASE[rkb]
1877 }
1878 if (kkc) {
1879 | mov edx, &kkc
1880 } else {
1881 | lea edx, BASE[rkc]
1882 }
1883 if (target) { /* TM_LT or TM_LE. */
1884 | mov L->savedpc, &(J->nextins+1)
1885 | call &ev==TM_LT?luaV_lessthan:luaV_lessequal, L, ecx, edx
1886 | test eax, eax
1887 | mov BASE, L->base
1888 if (dest) { /* cond */
1889 | jnz =>target
1890 } else {
1891 | jz =>target
1892 }
1893 } else {
1894 | addidx BASE, dest
1895 | mov L->savedpc, &J->nextins
1896 | call &luaV_arith, L, BASE, ecx, edx, ev
1897 | mov BASE, L->base
1898 }
1899
1900 if (hastail) {
1901 | jmp <4
1902 |.code
1903 }
1904}
1905
1906/* ------------------------------------------------------------------------ */
1907
1908static void jit_fallback_len(lua_State *L, StkId ra, const TValue *rb)
1909{
1910 switch (ttype(rb)) {
1911 case LUA_TTABLE:
1912 setnvalue(ra, cast_num(luaH_getn(hvalue(rb))));
1913 break;
1914 case LUA_TSTRING:
1915 setnvalue(ra, cast_num(tsvalue(rb)->len));
1916 break;
1917 default: {
1918 const TValue *tm = luaT_gettmbyobj(L, rb, TM_LEN);
1919 if (ttisfunction(tm)) {
1920 ptrdiff_t rasave = savestack(L, ra);
1921 setobj2s(L, L->top, tm);
1922 setobj2s(L, L->top+1, rb);
1923 luaD_checkstack(L, 2);
1924 L->top += 2;
1925 luaD_call(L, L->top - 2, 1);
1926 ra = restorestack(L, rasave);
1927 L->top--;
1928 setobjs2s(L, ra, L->top);
1929 } else {
1930 luaG_typeerror(L, rb, "get length of");
1931 }
1932 break;
1933 }
1934 }
1935}
1936
1937static void jit_op_len(jit_State *J, int dest, int rb)
1938{
1939 switch (ttype(hint_get(J, TYPE))) {
1940 case LUA_TTABLE:
1941 jit_deopt_target(J, 0);
1942 | istable rb
1943 | mov TABLE:ecx, BASE[rb].value
1944 | jne L_DEOPTIMIZE // TYPE hint was wrong?
1945 | call &luaH_getn, TABLE:ecx
1946 | mov TMP1, eax
1947 | fild dword TMP1
1948 | fstp qword BASE[dest].value
1949 | settt BASE[dest], LUA_TNUMBER
1950 break;
1951 case LUA_TSTRING:
1952 jit_deopt_target(J, 0);
1953 | isstring rb
1954 | mov TSTRING:ecx, BASE[rb].value
1955 | jne L_DEOPTIMIZE // TYPE hint was wrong?
1956 | fild aword TSTRING:ecx->tsv.len // size_t
1957 | fstp qword BASE[dest].value
1958 | settt BASE[dest], LUA_TNUMBER
1959 break;
1960 default:
1961 | lea TVALUE:ecx, BASE[rb]
1962 | addidx BASE, dest
1963 | mov L->savedpc, &J->nextins
1964 | call &jit_fallback_len, L, BASE, TVALUE:ecx
1965 | mov BASE, L->base
1966 break;
1967 }
1968}
1969
1970static void jit_op_not(jit_State *J, int dest, int rb)
1971{
1972 /* l_isfalse() without a branch -- truly devious. */
1973 /* ((value & tt) | (tt>>1)) is only zero for nil/false. */
1974 /* Assumes: LUA_TNIL == 0, LUA_TBOOLEAN == 1, bvalue() == 0/1 */
1975 | mov eax, BASE[rb].tt
1976 | mov ecx, BASE[rb].value
1977 | mov edx, 1
1978 | and ecx, eax
1979 | shr eax, 1
1980 | or ecx, eax
1981 | xor eax, eax
1982 | cmp ecx, edx
1983 | adc eax, eax
1984 | mov BASE[dest].tt, edx
1985 | mov BASE[dest].value, eax
1986}
1987
1988/* ------------------------------------------------------------------------ */
1989
1990static void jit_op_concat(jit_State *J, int dest, int first, int last)
1991{
1992 int num = last-first+1;
1993 if (num == 2 && ttisstring(hint_get(J, TYPE))) { /* Optimize common case. */
1994 | addidx BASE, first
1995 | call ->CONCAT_STR2
1996 | setsvalue BASE[dest], eax
1997 } else { /* Generic fallback. */
1998 | mov L->savedpc, &J->nextins
1999 | call &luaV_concat, L, num, last
2000 | mov BASE, L->base
2001 if (dest != first) {
2002 | copyslot BASE[dest], BASE[first]
2003 }
2004 }
2005 jit_checkGC(J); /* Always do this, even for the optimized variant. */
2006
2007 |.jsub CONCAT_STR2 // Concatenate two strings.
2008 |// Call with: BASE (first). Destroys all regs. L and BASE restored.
2009 | mov ARG2, L // Save L (esi).
2010 | mov eax, BASE[0].tt; shl eax, 4; or eax, BASE[1].tt
2011 | sub eax, LUA_TSTR_STR // eax = 0 on success.
2012 | jne ->DEOPTIMIZE_CALLER // Wrong types? Deoptimize.
2013 |
2014 |1:
2015 | mov GL:edi, L->l_G
2016 | mov TSTRING:esi, BASE[0].value // Caveat: L (esi) is gone now!
2017 | mov TSTRING:edx, BASE[1].value
2018 | mov ecx, TSTRING:esi->tsv.len // size_t
2019 | test ecx, ecx
2020 | jz >2 // 1st string is empty?
2021 | or eax, TSTRING:edx->tsv.len // eax is known to be zero.
2022 | jz >4 // 2nd string is empty?
2023 | add eax, ecx
2024 | jc >9 // Length overflow?
2025 | cmp eax, GL:edi->buff.buffsize // size_t
2026 | ja >5 // Temp buffer overflow?
2027 | mov edi, GL:edi->buff.buffer
2028 | add esi, #TSTRING
2029 | rep; movsb // Copy first string.
2030 | mov ecx, TSTRING:edx->tsv.len
2031 | lea esi, TSTRING:edx[1]
2032 | rep; movsb // Copy second string.
2033 |
2034 | sub edi, eax // start = end - total.
2035 | mov L, ARG2 // Restore L (esi). Reuse as 1st arg.
2036 | mov ARG3, edi
2037 | mov ARG4, eax
2038 | mov BASE, L->base // Restore BASE.
2039 | jmp &luaS_newlstr
2040 |
2041 |2: // 1st string is empty.
2042 | mov eax, TSTRING:edx // Return 2nd string.
2043 |3:
2044 | mov L, ARG2 // Restore L (esi) and BASE.
2045 | mov BASE, L->base
2046 | ret
2047 |
2048 |4: // 2nd string is empty.
2049 | mov eax, TSTRING:esi // Return 1st string.
2050 | jmp <3
2051 |
2052 |5: // Resize temp buffer.
2053 | // No need for setting L->savedpc since only LUA_ERRMEM may be thrown.
2054 | mov L, ARG2 // Restore L.
2055 | lea ecx, GL:edi->buff
2056 | sub esp, FRAME_OFFSET
2057 | call &luaZ_openspace, L, ecx, eax
2058 | add esp, FRAME_OFFSET
2059 | xor eax, eax // BASE (first) and L saved. eax = 0.
2060 | jmp <1 // Just restart.
2061 |
2062 |9: // Length overflow errors are rare (> 2 GB string required).
2063 | mov L, ARG2 // Need L for deoptimization.
2064 | jmp ->DEOPTIMIZE_CALLER
2065 |.endjsub
2066}
2067
2068/* ------------------------------------------------------------------------ */
2069
2070static void jit_op_eq(jit_State *J, int cond, int rkb, int rkc)
2071{
2072 int target = jit_jmp_target(J);
2073 int condtarget = cond ? (J->nextpc+1) : target;
2074 jit_assert(cond == 0 || cond == 1);
2075
2076 /* Comparison of two constants. Evaluate at compile time. */
2077 if (ISK(rkb&rkc)) {
2078 if ((rkb == rkc) == cond) { /* Constants are already unique. */
2079 | jmp =>target
2080 }
2081 return;
2082 }
2083
2084 if (ISK(rkb|rkc)) { /* Compare a variable and a constant. */
2085 const TValue *kk;
2086 if (ISK(rkb)) { int t = rkc; rkc = rkb; rkb = t; } /* rkc holds const. */
2087 kk = &J->pt->k[INDEXK(rkc)];
2088 switch (ttype(kk)) {
2089 case LUA_TNIL:
2090 | isnil rkb
2091 break;
2092 case LUA_TBOOLEAN:
2093 if (bvalue(kk)) {
2094 | mov eax, BASE[rkb].tt
2095 | mov ecx, BASE[rkb].value
2096 | dec eax
2097 | dec ecx
2098 | or eax, ecx
2099 } else {
2100 | mov eax, BASE[rkb].tt
2101 | dec eax
2102 | or eax, BASE[rkb].value
2103 }
2104 break;
2105 case LUA_TNUMBER:
2106 |// Note: bitwise comparison is not faster (and needs to handle -0 == 0).
2107 | isnumber rkb
2108 | jne =>condtarget
2109 | fld qword BASE[rkb].value
2110 | fld qword [&kk->value]
2111 | fcomparepp
2112 | jp =>condtarget // Unordered means not equal.
2113 break;
2114 case LUA_TSTRING:
2115 | isstring rkb
2116 | jne =>condtarget
2117 | cmp aword BASE[rkb].value, &rawtsvalue(kk)
2118 break;
2119 default: jit_assert(0); break;
2120 }
2121 } else { /* Compare two variables. */
2122 | mov eax, BASE[rkb].tt
2123 | cmp eax, BASE[rkc].tt
2124 | jne =>condtarget
2125 switch (ttype(hint_get(J, TYPE))) {
2126 case LUA_TNUMBER:
2127 jit_deopt_target(J, 0);
2128 |// Note: bitwise comparison is not an option (-0 == 0, NaN ~= NaN).
2129 | cmp eax, LUA_TNUMBER; jne L_DEOPTIMIZE
2130 | fld qword BASE[rkb].value
2131 | fld qword BASE[rkc].value
2132 | fcomparepp
2133 | jp =>condtarget // Unordered means not equal.
2134 break;
2135 case LUA_TSTRING:
2136 jit_deopt_target(J, 0);
2137 | cmp eax, LUA_TSTRING; jne L_DEOPTIMIZE
2138 | mov ecx, BASE[rkb].value
2139 | cmp ecx, BASE[rkc].value
2140 break;
2141 default:
2142 |// Generic equality comparison fallback.
2143 | lea edx, BASE[rkc]
2144 | lea ecx, BASE[rkb]
2145 | mov L->savedpc, &J->nextins
2146 | call &luaV_equalval, L, ecx, edx
2147 | dec eax
2148 | mov BASE, L->base
2149 break;
2150 }
2151 }
2152 if (cond) {
2153 | je =>target
2154 } else {
2155 | jne =>target
2156 }
2157}
2158
2159/* ------------------------------------------------------------------------ */
2160
2161static void jit_op_test(jit_State *J, int cond, int dest, int src)
2162{
2163 int target = jit_jmp_target(J);
2164
2165 /* l_isfalse() without a branch. But this time preserve tt/value. */
2166 /* (((value & tt) * 2 + tt) >> 1) is only zero for nil/false. */
2167 /* Assumes: 3*tt < 2^32, LUA_TNIL == 0, LUA_TBOOLEAN == 1, bvalue() == 0/1 */
2168 | mov eax, BASE[src].tt
2169 | mov ecx, BASE[src].value
2170 | mov edx, eax
2171 | and edx, ecx
2172 | lea edx, [eax+edx*2]
2173 | shr edx, 1
2174
2175 /* Check if we can omit the stack copy. */
2176 if (dest == src) { /* Yes, invert branch condition. */
2177 if (cond) {
2178 | jnz =>target
2179 } else {
2180 | jz =>target
2181 }
2182 } else { /* No, jump around copy code. */
2183 if (cond) {
2184 | jz >1
2185 } else {
2186 | jnz >1
2187 }
2188 | mov edx, BASE[src].value.na[1]
2189 | mov BASE[dest].tt, eax
2190 | mov BASE[dest].value, ecx
2191 | mov BASE[dest].value.na[1], edx
2192 | jmp =>target
2193 |1:
2194 }
2195}
2196
2197static void jit_op_jmp(jit_State *J, int target)
2198{
2199 | jmp =>target
2200}
2201
2202/* ------------------------------------------------------------------------ */
2203
2204enum { FOR_IDX, FOR_LIM, FOR_STP, FOR_EXT };
2205
2206static const char *const jit_for_coerce_error[] = {
2207 LUA_QL("for") " initial value must be a number",
2208 LUA_QL("for") " limit must be a number",
2209 LUA_QL("for") " step must be a number",
2210};
2211
2212/* Try to coerce for slots with strings to numbers in place or complain. */
2213static void jit_for_coerce(lua_State *L, TValue *o)
2214{
2215 int i;
2216 for (i = FOR_IDX; i <= FOR_STP; i++, o++) {
2217 lua_Number num;
2218 if (ttisnumber(o)) continue;
2219 if (ttisstring(o) && luaO_str2d(svalue(o), &num)) {
2220 setnvalue(o, num);
2221 } else {
2222 luaG_runerror(L, jit_for_coerce_error[i]);
2223 }
2224 }
2225}
2226
2227static void jit_op_forprep(jit_State *J, int ra, int target)
2228{
2229 const TValue *step = hint_get(J, FOR_STEP_K);
2230 if (ttisnumber(step)) {
2231 | isnumber2 ra+FOR_IDX, ra+FOR_LIM; jne L_DEOPTIMIZEF
2232 |4:
2233 | fld qword BASE[ra+FOR_LIM].value // [lim]
2234 | fld qword BASE[ra+FOR_IDX].value // [idx lim]
2235 | fst qword BASE[ra+FOR_EXT].value // extidx = idx
2236 | fcomparepp // idx >< lim ?
2237 | settt BASE[ra+FOR_EXT], LUA_TNUMBER
2238 if (nvalue(step) < (lua_Number)0) {
2239 | jb =>target+1 // step < 0 && idx < lim: skip loop.
2240 } else {
2241 | ja =>target+1 // step >= 0 && idx > lim: skip loop.
2242 }
2243 } else {
2244 |4:
2245 | isnumber3 ra+FOR_IDX, ra+FOR_LIM, ra+FOR_STP
2246 | mov eax, BASE[ra+FOR_STP].value.na[1] // Sign bit is in hi dword.
2247 | jne L_DEOPTIMIZEF
2248 | fld qword BASE[ra+FOR_LIM].value // [lim] (FP stack notation)
2249 | fld qword BASE[ra+FOR_IDX].value // [idx lim]
2250 | test eax, eax // step >< 0 ?
2251 | fst qword BASE[ra+FOR_EXT].value // extidx = idx
2252 | js >1
2253 | fxch // if (step > 0) [lim idx]
2254 |1:
2255 | fcomparepp // step > 0 ? lim < idx : idx < lim
2256 | settt BASE[ra+FOR_EXT], LUA_TNUMBER
2257 | jb =>target+1 // Skip loop.
2258 }
2259 if (ttisnumber(hint_get(J, TYPE))) {
2260 jit_deopt_target(J, 0);
2261 } else {
2262 |.tail
2263 |L_DEOPTLABEL: // Recycle as fallback label.
2264 | // Fallback for strings as loop vars. No need to make this fast.
2265 | lea eax, BASE[ra]
2266 | mov L->savedpc, &J->nextins
2267 | call &jit_for_coerce, L, eax // Coerce strings or throw error.
2268 | jmp <4 // Easier than reloading eax.
2269 |.code
2270 }
2271}
2272
2273static void jit_op_forloop(jit_State *J, int ra, int target)
2274{
2275 const TValue *step = hint_getpc(J, FOR_STEP_K, target-1);
2276 if (ttisnumber(step)) {
2277 | fld qword BASE[ra+FOR_LIM].value // [lim] (FP stack notation)
2278 | fld qword BASE[ra+FOR_IDX].value // [idx lim]
2279 | fadd qword BASE[ra+FOR_STP].value // [nidx lim]
2280 | fst qword BASE[ra+FOR_EXT].value // extidx = nidx
2281 | fst qword BASE[ra+FOR_IDX].value // idx = nidx
2282 | settt BASE[ra+FOR_EXT], LUA_TNUMBER
2283 | fcomparepp // nidx >< lim ?
2284 if (nvalue(step) < (lua_Number)0) {
2285 | jae =>target // step < 0 && nidx >= lim: loop again.
2286 } else {
2287 | jbe =>target // step >= 0 && nidx <= lim: loop again.
2288 }
2289 } else {
2290 | mov eax, BASE[ra+FOR_STP].value.na[1] // Sign bit is in hi dword.
2291 | fld qword BASE[ra+FOR_LIM].value // [lim] (FP stack notation)
2292 | fld qword BASE[ra+FOR_IDX].value // [idx lim]
2293 | fld qword BASE[ra+FOR_STP].value // [stp idx lim]
2294 | faddp st1 // [nidx lim]
2295 | fst qword BASE[ra+FOR_IDX].value // idx = nidx
2296 | fst qword BASE[ra+FOR_EXT].value // extidx = nidx
2297 | settt BASE[ra+FOR_EXT], LUA_TNUMBER
2298 | test eax, eax // step >< 0 ?
2299 | js >1
2300 | fxch // if (step > 0) [lim nidx]
2301 |1:
2302 | fcomparepp // step > 0 ? lim >= nidx : nidx >= lim
2303 | jae =>target // Loop again.
2304 }
2305}
2306
2307/* ------------------------------------------------------------------------ */
2308
2309static void jit_op_tforloop(jit_State *J, int ra, int nresults)
2310{
2311 int target = jit_jmp_target(J);
2312 int i;
2313 if (jit_inline_tforloop(J, ra, nresults, target)) return; /* Inlined? */
2314 for (i = 2; i >= 0; i--) {
2315 | copyslot BASE[ra+i+3], BASE[ra+i] // Copy ctlvar/state/callable.
2316 }
2317 jit_op_call(J, ra+3, 2, nresults);
2318 | isnil ra+3; je >1
2319 | copyslot BASE[ra+2], BASE[ra+3] // Save control variable.
2320 | jmp =>target
2321 |1:
2322}
2323
2324/* ------------------------------------------------------------------------ */
2325
2326static void jit_op_close(jit_State *J, int ra)
2327{
2328 if (ra) {
2329 | lea eax, BASE[ra]
2330 | mov ARG2, eax
2331 } else {
2332 | mov ARG2, BASE
2333 }
2334 | call &luaF_close, L // , StkId level (ARG2)
2335}
2336
2337static void jit_op_closure(jit_State *J, int dest, int ptidx)
2338{
2339 Proto *npt = J->pt->p[ptidx];
2340 int nup = npt->nups;
2341 | getLCL edi // LCL:edi is callee-saved.
2342 | mov edx, LCL:edi->env
2343 | call &luaF_newLclosure, L, nup, edx
2344 | mov LCL->p, &npt // Store new proto in returned closure.
2345 | mov aword BASE[dest].value, LCL // setclvalue()
2346 | settt BASE[dest], LUA_TFUNCTION
2347 /* Process pseudo-instructions for upvalues. */
2348 if (nup > 0) {
2349 const Instruction *uvcode = J->nextins;
2350 int i, uvuv;
2351 /* Check which of the two types we need. */
2352 for (i = 0, uvuv = 0; i < nup; i++)
2353 if (GET_OPCODE(uvcode[i]) == OP_GETUPVAL) uvuv++;
2354 /* Copy upvalues from parent first. */
2355 if (uvuv) {
2356 /* LCL:eax->upvals (new closure) <-- LCL:edi->upvals (own closure). */
2357 for (i = 0; i < nup; i++)
2358 if (GET_OPCODE(uvcode[i]) == OP_GETUPVAL) {
2359 | mov UPVAL:edx, LCL:edi->upvals[GETARG_B(uvcode[i])]
2360 | mov LCL->upvals[i], UPVAL:edx
2361 }
2362 }
2363 /* Next find or create upvalues for our own stack slots. */
2364 if (nup > uvuv) {
2365 | mov LCL:edi, LCL // Move new closure to callee-save register. */
2366 /* LCL:edi->upvals (new closure) <-- upvalue for stack slot. */
2367 for (i = 0; i < nup; i++)
2368 if (GET_OPCODE(uvcode[i]) == OP_MOVE) {
2369 int rb = GETARG_B(uvcode[i]);
2370 if (rb) {
2371 | lea eax, BASE[rb]
2372 | mov ARG2, eax
2373 } else {
2374 | mov ARG2, BASE
2375 }
2376 | call &luaF_findupval, L // , StkId level (ARG2)
2377 | mov LCL:edi->upvals[i], UPVAL:eax
2378 }
2379 }
2380 J->combine += nup; /* Skip pseudo-instructions. */
2381 }
2382 jit_checkGC(J);
2383}
2384
2385/* ------------------------------------------------------------------------ */
2386
2387static void jit_op_vararg(jit_State *J, int dest, int num)
2388{
2389 if (num < 0) { /* Copy all varargs. */
2390 |// Copy [ci->func+1+pt->numparams, BASE) -> [BASE+dest, *).
2391 |1:
2392 | mov CI, L->ci
2393 | mov edx, CI->func
2394 | add edx, (1+J->pt->numparams)*#TVALUE // Start of varargs.
2395 |
2396 | // luaD_checkstack(L, nvararg) with nvararg = L->base - vastart.
2397 | // This is a slight overallocation (BASE[dest+nvararg] would be enough).
2398 | // We duplicate OP_VARARG behaviour so we can use luaD_growstack().
2399 | lea eax, [BASE+BASE+J->pt->maxstacksize*#TVALUE] // L->base + L->top
2400 | sub eax, edx // L->top + (L->base - vastart)
2401 | cmp eax, L->stack_last
2402 | jae >5 // Need to grow stack?
2403 |
2404 | lea TOP, BASE[dest]
2405 | cmp edx, BASE
2406 | jnb >3
2407 |2: // Copy loop.
2408 | mov eax, [edx]
2409 | add edx, aword*1
2410 | mov [TOP], eax
2411 | add TOP, aword*1
2412 | cmp edx, BASE
2413 | jb <2
2414 |3:
2415 |// This is an open ins. Must keep TOP for next instruction.
2416 |
2417 |.tail
2418 |5: // Grow stack for varargs.
2419 | sub eax, L->top
2420 | TValuediv eax
2421 | call &luaD_growstack, L, eax
2422 | mov BASE, L->base
2423 | jmp <1 // Just restart op to avoid saving/restoring regs.
2424 |.code
2425 } else if (num > 0) { /* Copy limited number of varargs. */
2426 |// Copy [ci->func+1+pt->numparams, BASE) -> [BASE+dest, BASE+dest+num).
2427 | mov CI, L->ci
2428 | mov edx, CI->func
2429 | add edx, (1+J->pt->numparams)*#TVALUE
2430 | lea TOP, BASE[dest]
2431 | lea ecx, BASE[dest+num]
2432 | cmp edx, BASE // No varargs present: only fill.
2433 | jnb >2
2434 |
2435 |1: // Copy loop.
2436 | mov eax, [edx]
2437 | add edx, aword*1
2438 | mov [TOP], eax
2439 | add TOP, aword*1
2440 | cmp TOP, ecx // Stop if all dest slots got a vararg.
2441 | jnb >4
2442 | cmp edx, BASE // Continue if more varargs present.
2443 | jb <1
2444 |
2445 |2: // Fill remaining slots with nils.
2446 | xor eax, eax // Assumes: LUA_TNIL == 0
2447 |3: // Fill loop.
2448 | settt TOP[0], eax
2449 | add TOP, #TVALUE
2450 | cmp TOP, ecx
2451 | jb <3
2452 |4:
2453 }
2454}
2455
2456/* ------------------------------------------------------------------------ */
2457