diff options
Diffstat (limited to 'libraries/LuaJIT-1.1.7/src/ljit_x86.dasc')
-rw-r--r-- | libraries/LuaJIT-1.1.7/src/ljit_x86.dasc | 2457 |
1 files changed, 0 insertions, 2457 deletions
diff --git a/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc b/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc deleted file mode 100644 index f7be91e..0000000 --- a/libraries/LuaJIT-1.1.7/src/ljit_x86.dasc +++ /dev/null | |||
@@ -1,2457 +0,0 @@ | |||
1 | /* | ||
2 | ** Bytecode to machine code translation for x86 CPUs. | ||
3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
4 | */ | ||
5 | |||
6 | |// Include common definitions and macros. | ||
7 | |.include ljit_x86.dash | ||
8 | | | ||
9 | |// Place actionlist and globals here at the top of the file. | ||
10 | |.actionlist jit_actionlist | ||
11 | |.globals JSUB_ | ||
12 | |||
13 | /* ------------------------------------------------------------------------ */ | ||
14 | |||
15 | /* Arch string. */ | ||
16 | const char luaJIT_arch[] = "x86"; | ||
17 | |||
18 | /* Forward declarations for C functions called from jsubs. */ | ||
19 | static void jit_hookins(lua_State *L, const Instruction *newpc); | ||
20 | static void jit_gettable_fb(lua_State *L, Table *t, StkId dest); | ||
21 | static void jit_settable_fb(lua_State *L, Table *t, StkId val); | ||
22 | |||
23 | /* ------------------------------------------------------------------------ */ | ||
24 | |||
25 | /* Detect CPU features and set JIT flags. */ | ||
26 | static int jit_cpudetect(jit_State *J) | ||
27 | { | ||
28 | void *mcode; | ||
29 | size_t sz; | ||
30 | int status; | ||
31 | /* Some of the jsubs need the flags. So compile this separately. */ | ||
32 | unsigned int feature; | ||
33 | dasm_setup(Dst, jit_actionlist); | ||
34 | | // Check for CPUID support first. | ||
35 | | pushfd | ||
36 | | pop edx | ||
37 | | mov ecx, edx | ||
38 | | xor edx, 0x00200000 // Toggle ID bit in flags. | ||
39 | | push edx | ||
40 | | popfd | ||
41 | | pushfd | ||
42 | | pop edx | ||
43 | | xor eax, eax // Zero means no features supported. | ||
44 | | cmp ecx, edx | ||
45 | | jz >1 // No ID toggle means no CPUID support. | ||
46 | | | ||
47 | | inc eax // CPUID function 1. | ||
48 | | push ebx // Callee-save ebx modified by CPUID. | ||
49 | | cpuid | ||
50 | | pop ebx | ||
51 | | mov eax, edx // Return feature support bits. | ||
52 | |1: | ||
53 | | ret | ||
54 | (void)dasm_checkstep(Dst, DASM_SECTION_CODE); | ||
55 | status = luaJIT_link(J, &mcode, &sz); | ||
56 | if (status != JIT_S_OK) | ||
57 | return status; | ||
58 | /* Check feature bits. See the Intel/AMD manuals for the bit definitions. */ | ||
59 | feature = ((unsigned int (*)(void))mcode)(); | ||
60 | if (feature & (1<<15)) J->flags |= JIT_F_CPU_CMOV; | ||
61 | if (feature & (1<<26)) J->flags |= JIT_F_CPU_SSE2; | ||
62 | luaJIT_freemcode(J, mcode, sz); /* We don't need this code anymore. */ | ||
63 | return JIT_S_OK; | ||
64 | } | ||
65 | |||
66 | /* Check some assumptions. Should compile to nop. */ | ||
67 | static int jit_consistency_check(jit_State *J) | ||
68 | { | ||
69 | do { | ||
70 | /* Force a compiler error for inconsistent structure sizes. */ | ||
71 | /* Check LUA_TVALUE_ALIGN in luaconf.h, too. */ | ||
72 | ||int check_TVALUE_SIZE_in_ljit_x86_dash[1+TVALUE_SIZE-sizeof(TValue)]; | ||
73 | ||int check_TVALUE_SIZE_in_ljit_x86_dash_[1+sizeof(TValue)-TVALUE_SIZE]; | ||
74 | ((void)check_TVALUE_SIZE_in_ljit_x86_dash[0]); | ||
75 | ((void)check_TVALUE_SIZE_in_ljit_x86_dash_[0]); | ||
76 | if (LUA_TNIL != 0 || LUA_TBOOLEAN != 1 || PCRLUA != 0) break; | ||
77 | if ((int)&(((Node *)0)->i_val) != (int)&(((StkId)0)->value)) break; | ||
78 | return JIT_S_OK; | ||
79 | } while (0); | ||
80 | J->dasmstatus = 999999999; /* Recognizable error. */ | ||
81 | return JIT_S_COMPILER_ERROR; | ||
82 | } | ||
83 | |||
84 | /* Compile JIT subroutines (once). */ | ||
85 | static int jit_compile_jsub(jit_State *J) | ||
86 | { | ||
87 | int status = jit_consistency_check(J); | ||
88 | if (status != JIT_S_OK) return status; | ||
89 | status = jit_cpudetect(J); | ||
90 | if (status != JIT_S_OK) return status; | ||
91 | dasm_setup(Dst, jit_actionlist); | ||
92 | |// Macros to reorder and combine JIT subroutine definitions. | ||
93 | |.macro .jsub, name | ||
94 | |.capture JSUB // Add the entry point. | ||
95 | ||//----------------------------------------------------------------------- | ||
96 | ||//->name: | ||
97 | | .align 16 | ||
98 | |->name: | ||
99 | |.endmacro | ||
100 | |.macro .endjsub; .endcapture; .endmacro | ||
101 | |.macro .dumpjsub; .dumpcapture JSUB; .endmacro | ||
102 | | | ||
103 | |.code | ||
104 | |//----------------------------------------------------------------------- | ||
105 | | .align 16 | ||
106 | | // Must be the first JSUB defined or used. | ||
107 | |->STACKPTR: // Get stack pointer (for jit.util.*). | ||
108 | | lea eax, [esp+aword*1] // But adjust for the return address. | ||
109 | | ret | ||
110 | | | ||
111 | |//----------------------------------------------------------------------- | ||
112 | | .align 16 | ||
113 | |->GATE_LJ: // Lua -> JIT gate. (L, func, nresults) | ||
114 | | push ebp | ||
115 | | mov ebp, esp | ||
116 | | sub esp, LJFRAME_OFFSET | ||
117 | | mov SAVER1, BASE | ||
118 | | mov BASE, CARG2 // func | ||
119 | | mov CARG2, L // Arg used as savereg. Avoids aword*8 stack frame. | ||
120 | | mov L, CARG1 // L | ||
121 | | mov SAVER2, TOP | ||
122 | | mov TOP, L->top | ||
123 | | mov LCL, BASE->value | ||
124 | | mov CI, L->ci | ||
125 | | // Prevent stackless yields. No limit check -- this is not a real C call. | ||
126 | | inc word L->nCcalls // short | ||
127 | | | ||
128 | | call aword LCL->jit_gate // Call the compiled code. | ||
129 | | | ||
130 | | mov CI, L->ci | ||
131 | | mov L->top, TOP // Only correct for LUA_MULTRET. | ||
132 | | mov edx, CI->savedpc | ||
133 | | mov eax, CARG3 // nresults | ||
134 | | mov L->savedpc, edx // L->savedpc = CI->savedpc | ||
135 | | mov edx, CI->base | ||
136 | | test eax, eax | ||
137 | | mov L->base, edx // L->base = CI->base | ||
138 | | js >2 // Skip for nresults == LUA_MULTRET. | ||
139 | | | ||
140 | | TValuemul eax | ||
141 | | add BASE, eax | ||
142 | | xor ecx, ecx | ||
143 | | mov L->top, BASE // L->top = &func[nresults] | ||
144 | |1: // No initial check. May use EXTRA_STACK (once). | ||
145 | | mov TOP->tt, ecx // Clear unset stack slots. | ||
146 | | add TOP, #TOP | ||
147 | | cmp TOP, BASE | ||
148 | | jb <1 | ||
149 | | | ||
150 | |2: | ||
151 | | dec word L->nCcalls // short | ||
152 | | mov eax, PCRC | ||
153 | | mov TOP, SAVER2 | ||
154 | | mov BASE, SAVER1 | ||
155 | | mov L, CARG2 | ||
156 | | mov esp, ebp | ||
157 | | pop ebp | ||
158 | | ret | ||
159 | | | ||
160 | |//----------------------------------------------------------------------- | ||
161 | | .align 16 | ||
162 | |->GATE_JL: // JIT -> Lua callgate. | ||
163 | | mov PROTO:edx, LCL->p | ||
164 | | cmp dword PROTO:edx->jit_status, JIT_S_OK | ||
165 | | jne >1 // Already compiled? | ||
166 | | | ||
167 | | // Yes, copy callgate to closure (so GATE_JL is not called again). | ||
168 | | mov edx, PROTO:edx->jit_mcode | ||
169 | | mov LCL->jit_gate, edx | ||
170 | | jmp edx // Chain to compiled code. | ||
171 | | | ||
172 | |1: // Let luaD_precall do the hard work: compile & run or fallback. | ||
173 | | sub esp, FRAME_OFFSET | ||
174 | | mov eax, CI->savedpc | ||
175 | | mov L->ci, CI // May not be in sync for tailcalls. | ||
176 | | mov L->top, TOP | ||
177 | | mov ARG3, -1 // LUA_MULTRET | ||
178 | | mov L->savedpc, eax // luaD_precall expects it there. | ||
179 | | mov ARG2, BASE | ||
180 | | sub BASE, L->stack // Preserve old BASE (= func). | ||
181 | | mov ARG1, L | ||
182 | | call &luaD_precall // luaD_precall(L, func, nresults) | ||
183 | | test eax,eax // Assumes: PCRLUA == 0 | ||
184 | | jnz >2 // PCRC? PCRYIELD cannot happen. | ||
185 | | | ||
186 | | // Returned PCRLUA: need to call the bytecode interpreter. | ||
187 | | call &luaV_execute, L, 1 | ||
188 | | // Indirect yield (L->status == LUA_YIELD) cannot happen. | ||
189 | | | ||
190 | |2: // Returned PCRC: compile & run done. Frame is already unwound. | ||
191 | | add esp, FRAME_OFFSET | ||
192 | | add BASE, L->stack // Restore stack-relative pointers BASE and TOP. | ||
193 | | mov TOP, L->top | ||
194 | | ret | ||
195 | | | ||
196 | |//----------------------------------------------------------------------- | ||
197 | | .align 16 | ||
198 | |->GATE_JC: // JIT -> C callgate. | ||
199 | | lea eax, TOP[LUA_MINSTACK] | ||
200 | | sub esp, FRAME_OFFSET | ||
201 | | cmp eax, L->stack_last | ||
202 | | jae ->GROW_STACK // Stack overflow? | ||
203 | | cmp CI, L->end_ci | ||
204 | | lea CI, CI[1] | ||
205 | | je ->GROW_CI // CI overflow? | ||
206 | | mov L->ci, CI | ||
207 | | mov CI->func, BASE | ||
208 | | mov CI->top, eax | ||
209 | | mov CCLOSURE:edx, BASE->value | ||
210 | | add BASE, #BASE | ||
211 | | mov L->top, TOP | ||
212 | | mov L->base, BASE | ||
213 | | mov CI->base, BASE | ||
214 | | // ci->nresults is not set because we don't use luaD_poscall(). | ||
215 | | | ||
216 | |->GATE_JC_PATCH: // Patch mark for jmp to GATE_JC_DEBUG. | ||
217 | | | ||
218 | | call aword CCLOSURE:edx->f, L // Call the C function. | ||
219 | | | ||
220 | |2: // Label used below! | ||
221 | | add esp, FRAME_OFFSET | ||
222 | | mov CI, L->ci | ||
223 | | TValuemul eax // eax = nresults*sizeof(TValue) | ||
224 | | mov TOP, CI->func | ||
225 | | jz >4 // Skip loop if nresults == 0. | ||
226 | | // Yield (-1) cannot happen. | ||
227 | | mov BASE, L->top | ||
228 | | mov edx, BASE | ||
229 | | sub BASE, eax // BASE = &L->top[-nresults] | ||
230 | |3: // Relocate [L->top-nresults, L->top) -> [ci->func, ci->func+nresults) | ||
231 | | mov eax, [BASE] | ||
232 | | add BASE, aword*1 | ||
233 | | mov [TOP], eax | ||
234 | | add TOP, aword*1 | ||
235 | | cmp BASE, edx | ||
236 | | jb <3 | ||
237 | | | ||
238 | |4: | ||
239 | | mov BASE, CI->func | ||
240 | | sub CI, #CI | ||
241 | | mov L->ci, CI | ||
242 | | ret | ||
243 | | | ||
244 | |//----------------------------------------------------------------------- | ||
245 | | nop; nop; nop; nop; nop; nop // Save area. See DEBUGPATCH_SIZE. | ||
246 | | .align 16 | ||
247 | |->GATE_JC_DEBUG: // JIT -> C callgate for debugging. | ||
248 | | test byte L->hookmask, LUA_MASKCALL // Need to call hook? | ||
249 | | jnz >7 | ||
250 | |6: | ||
251 | | call aword CCLOSURE:edx->f, L // Call the C function. | ||
252 | | | ||
253 | | test byte L->hookmask, LUA_MASKRET // Need to call hook? | ||
254 | | jz <2 | ||
255 | | | ||
256 | | // Return hook. TODO: LUA_HOOKTAILRET is not called since tailcalls == 0. | ||
257 | | mov BASE, eax // BASE (ebx) is callee-save. | ||
258 | | call &luaD_callhook, L, LUA_HOOKRET, -1 | ||
259 | | mov eax, BASE | ||
260 | | jmp <2 | ||
261 | | | ||
262 | |7: // Call hook. | ||
263 | | mov BASE, CCLOSURE:edx // BASE (ebx) is callee-save. | ||
264 | | call &luaD_callhook, L, LUA_HOOKCALL, -1 | ||
265 | | mov CCLOSURE:edx, BASE | ||
266 | | jmp <6 | ||
267 | | | ||
268 | |//----------------------------------------------------------------------- | ||
269 | | .align 16 | ||
270 | |->GROW_STACK: // Grow stack. Jump from/to prologue. | ||
271 | | sub eax, TOP | ||
272 | | TValuediv eax // eax = (eax-TOP)/sizeof(TValue). | ||
273 | | mov L->top, TOP | ||
274 | | sub BASE, L->stack | ||
275 | | mov ARG3, CI | ||
276 | | call &luaD_growstack, L, eax | ||
277 | | mov CI, ARG3 // CI may not be in sync with L->ci. | ||
278 | | add BASE, L->stack // Restore stack-relative pointers. | ||
279 | | mov TOP, L->top | ||
280 | | mov LCL, BASE->value | ||
281 | | add esp, FRAME_OFFSET // Undo esp adjust of prologue/GATE_JC. | ||
282 | | jmp aword LCL->jit_gate // Retry prologue. | ||
283 | | | ||
284 | |//----------------------------------------------------------------------- | ||
285 | | .align 16 | ||
286 | |->GROW_CI: // Grow CI. Jump from/to prologue. | ||
287 | | mov L->top, TOP // May throw LUA_ERRMEM, so save TOP. | ||
288 | | call &luaD_growCI, L | ||
289 | | lea CI, CINFO:eax[-1] // Undo ci++ (L->ci reset in prologue). | ||
290 | | mov LCL, BASE->value | ||
291 | | mov L->ci, CI | ||
292 | | add esp, FRAME_OFFSET // Undo esp adjust of prologue/GATE_JC. | ||
293 | | jmp aword LCL->jit_gate // Retry prologue. | ||
294 | | | ||
295 | |//----------------------------------------------------------------------- | ||
296 | |.dumpjsub // Dump all captured .jsub's. | ||
297 | | | ||
298 | |// Uncritical jsubs follow. No need to align them. | ||
299 | |//----------------------------------------------------------------------- | ||
300 | |->DEOPTIMIZE_CALLER: // Deoptimize calling instruction. | ||
301 | | pop edx | ||
302 | | jmp ->DEOPTIMIZE | ||
303 | | | ||
304 | |->DEOPTIMIZE_OPEN: // Deoptimize open instruction. | ||
305 | | mov L->top, TOP // Save TOP. | ||
306 | | | ||
307 | |->DEOPTIMIZE: // Deoptimize instruction. | ||
308 | | mov L->savedpc, edx // &J->nextins expected in edx. | ||
309 | | call &luaJIT_deoptimize, L | ||
310 | | mov BASE, L->base | ||
311 | | mov TOP, L->top // Restore TOP for open ins. | ||
312 | | jmp eax // Continue with new mcode addr. | ||
313 | | | ||
314 | | .align 16 | ||
315 | |//----------------------------------------------------------------------- | ||
316 | |||
317 | (void)dasm_checkstep(Dst, DASM_SECTION_CODE); | ||
318 | status = luaJIT_link(J, &J->jsubmcode, &J->szjsubmcode); | ||
319 | if (status != JIT_S_OK) | ||
320 | return status; | ||
321 | |||
322 | /* Copy the callgates from the globals to the global state. */ | ||
323 | G(J->L)->jit_gateLJ = (luaJIT_GateLJ)J->jsub[JSUB_GATE_LJ]; | ||
324 | G(J->L)->jit_gateJL = (lua_CFunction)J->jsub[JSUB_GATE_JL]; | ||
325 | G(J->L)->jit_gateJC = (lua_CFunction)J->jsub[JSUB_GATE_JC]; | ||
326 | return JIT_S_OK; | ||
327 | } | ||
328 | |||
329 | /* Match with number of nops above. Avoid confusing the instruction decoder. */ | ||
330 | #define DEBUGPATCH_SIZE 6 | ||
331 | |||
332 | /* Notify backend that the debug mode may have changed. */ | ||
333 | void luaJIT_debugnotify(jit_State *J) | ||
334 | { | ||
335 | unsigned char *patch = (unsigned char *)J->jsub[JSUB_GATE_JC_PATCH]; | ||
336 | unsigned char *target = (unsigned char *)J->jsub[JSUB_GATE_JC_DEBUG]; | ||
337 | /* Yep, this is self-modifying code -- don't tell anyone. */ | ||
338 | if (patch[0] == 0xe9) { /* Debug patch is active. */ | ||
339 | if (!(J->flags & JIT_F_DEBUG_CALL)) /* Deactivate it. */ | ||
340 | memcpy(patch, target-DEBUGPATCH_SIZE, DEBUGPATCH_SIZE); | ||
341 | } else { /* Debug patch is inactive. */ | ||
342 | if (J->flags & JIT_F_DEBUG_CALL) { /* Activate it. */ | ||
343 | int rel = target-(patch+5); | ||
344 | memcpy(target-DEBUGPATCH_SIZE, patch, DEBUGPATCH_SIZE); | ||
345 | patch[0] = 0xe9; /* jmp */ | ||
346 | memcpy(patch+1, &rel, 4); /* Relative address. */ | ||
347 | memset(patch+5, 0x90, DEBUGPATCH_SIZE-5); /* nop */ | ||
348 | } | ||
349 | } | ||
350 | } | ||
351 | |||
352 | /* Patch a jmp into existing mcode. */ | ||
353 | static void jit_patch_jmp(jit_State *J, void *mcode, void *to) | ||
354 | { | ||
355 | unsigned char *patch = (unsigned char *)mcode; | ||
356 | int rel = ((unsigned char *)to)-(patch+5); | ||
357 | patch[0] = 0xe9; /* jmp */ | ||
358 | memcpy((void *)(patch+1), &rel, 4); /* Relative addr. */ | ||
359 | } | ||
360 | |||
361 | /* ------------------------------------------------------------------------ */ | ||
362 | |||
363 | /* Call line/count hook. */ | ||
364 | static void jit_hookins(lua_State *L, const Instruction *newpc) | ||
365 | { | ||
366 | Proto *pt = ci_func(L->ci)->l.p; | ||
367 | int pc = luaJIT_findpc(pt, newpc); /* Sloooow with mcode addrs. */ | ||
368 | const Instruction *savedpc = L->savedpc; | ||
369 | L->savedpc = pt->code + pc + 1; | ||
370 | if (L->hookmask > LUA_MASKLINE && L->hookcount == 0) { | ||
371 | resethookcount(L); | ||
372 | luaD_callhook(L, LUA_HOOKCOUNT, -1); | ||
373 | } | ||
374 | if (L->hookmask & LUA_MASKLINE) { | ||
375 | int newline = getline(pt, pc); | ||
376 | if (pc != 0) { | ||
377 | int oldpc = luaJIT_findpc(pt, savedpc); | ||
378 | if (!(pc <= oldpc || newline != getline(pt, oldpc))) return; | ||
379 | } | ||
380 | luaD_callhook(L, LUA_HOOKLINE, newline); | ||
381 | } | ||
382 | } | ||
383 | |||
384 | /* Insert hook check for each instruction in full debug mode. */ | ||
385 | static void jit_ins_debug(jit_State *J, int openop) | ||
386 | { | ||
387 | if (openop) { | ||
388 | | mov L->top, TOP | ||
389 | } | ||
390 | |// TODO: Passing bytecode addrs would speed this up (but use more space). | ||
391 | | call ->HOOKINS | ||
392 | |||
393 | |.jsub HOOKINS | ||
394 | | test byte L->hookmask, LUA_MASKLINE|LUA_MASKCOUNT | ||
395 | | jz >2 | ||
396 | | dec dword L->hookcount | ||
397 | | jz >1 | ||
398 | | test byte L->hookmask, LUA_MASKLINE | ||
399 | | jz >2 | ||
400 | |1: | ||
401 | | mov eax, [esp] // Current machine code address. | ||
402 | | sub esp, FRAME_OFFSET | ||
403 | | call &jit_hookins, L, eax | ||
404 | | add esp, FRAME_OFFSET | ||
405 | | mov BASE, L->base // Restore stack-relative pointers. | ||
406 | | mov TOP, L->top | ||
407 | |2: | ||
408 | | ret | ||
409 | |.endjsub | ||
410 | } | ||
411 | |||
412 | /* Called before every instruction. */ | ||
413 | static void jit_ins_start(jit_State *J) | ||
414 | { | ||
415 | |// Always emit PC labels, even for dead code (but not for combined JMP). | ||
416 | |=>J->nextpc: | ||
417 | } | ||
418 | |||
419 | /* Chain to another instruction. */ | ||
420 | static void jit_ins_chainto(jit_State *J, int pc) | ||
421 | { | ||
422 | | jmp =>pc | ||
423 | } | ||
424 | |||
425 | /* Set PC label. */ | ||
426 | static void jit_ins_setpc(jit_State *J, int pc, void *target) | ||
427 | { | ||
428 | |.label =>pc, &target | ||
429 | } | ||
430 | |||
431 | /* Called after the last instruction has been encoded. */ | ||
432 | static void jit_ins_last(jit_State *J, int lastpc, int sizemfm) | ||
433 | { | ||
434 | if (J->tflags & JIT_TF_USED_DEOPT) { /* Deopt section has been used? */ | ||
435 | |.deopt | ||
436 | | jmp ->DEOPTIMIZE // Yes, need to add final jmp. | ||
437 | |.code | ||
438 | } | ||
439 | |=>lastpc+1: // Extra label at the end of .code. | ||
440 | |.tail | ||
441 | |=>lastpc+2: // And at the end of .deopt/.tail. | ||
442 | | .align word // Keep next section word aligned. | ||
443 | | .word 0xffff // Terminate mfm with JIT_MFM_STOP. | ||
444 | |.mfmap | ||
445 | | // <-- Deoptimization hints are inserted here. | ||
446 | | .space sizemfm // To be filled in with inverse mfm. | ||
447 | | .aword 0, 0 // Next mcode block pointer and size. | ||
448 | | // The previous two awords are only word, but not aword aligned. | ||
449 | | // Copying them is easier than aligning them and adjusting mfm handling. | ||
450 | |.code | ||
451 | } | ||
452 | |||
453 | /* Add a deoptimize target for the current instruction. */ | ||
454 | static void jit_deopt_target(jit_State *J, int nargs) | ||
455 | { | ||
456 | |.define L_DEOPTLABEL, 9 // Local deopt label. | ||
457 | |.define L_DEOPTIMIZE, <9 // Local deopt target. Use after call. | ||
458 | |.define L_DEOPTIMIZEF, >9 // Local deopt target. Use before call. | ||
459 | if (nargs != -1) { | ||
460 | |// Alas, x86 doesn't have conditional calls. So branch to the .deopt | ||
461 | |// section to load J->nextins and jump to JSUB_DEOPTIMIZE. | ||
462 | |// Only a single jump is added at the end (if needed) and any | ||
463 | |// intervening code sequences are shadowed (lea trick). | ||
464 | |.deopt // Occupies 6 bytes in .deopt section. | ||
465 | | .byte 0x8d // Shadow mov with lea edi, [edx+ofs]. | ||
466 | |L_DEOPTLABEL: | ||
467 | | mov edx, &J->nextins // Current instruction + 1. | ||
468 | |.code | ||
469 | J->tflags |= JIT_TF_USED_DEOPT; | ||
470 | } else { | ||
471 | |.tail // Occupies 10 bytes in .tail section. | ||
472 | |L_DEOPTLABEL: | ||
473 | | mov edx, &J->nextins | ||
474 | | jmp ->DEOPTIMIZE_OPEN // Open ins need to save TOP, too. | ||
475 | | // And TOP (edi) would be overwritten by the lea trick. | ||
476 | | // So checking for open ops later on wouldn't suffice. Sigh. | ||
477 | |.code | ||
478 | } | ||
479 | } | ||
480 | |||
481 | /* luaC_checkGC() inlined. Destroys caller-saves + TOP (edi). Uses label 7:. */ | ||
482 | /* Use this only at the _end_ of an instruction. */ | ||
483 | static void jit_checkGC(jit_State *J) | ||
484 | { | ||
485 | | mov GL:ecx, L->l_G | ||
486 | | mov eax, GL:ecx->totalbytes // size_t | ||
487 | | mov TOP, >7 | ||
488 | | cmp eax, GL:ecx->GCthreshold // size_t | ||
489 | | jae ->GCSTEP | ||
490 | |7: | ||
491 | |||
492 | |.jsub GCSTEP | ||
493 | | call &luaC_step, L | ||
494 | | mov BASE, L->base | ||
495 | | jmp TOP | ||
496 | |.endjsub | ||
497 | } | ||
498 | |||
499 | /* ------------------------------------------------------------------------ */ | ||
500 | |||
501 | |// JIT->JIT calling conventions: | ||
502 | |// | ||
503 | |// Register/Type | Call Setup | Prologue | Epilogue | Call Finish | ||
504 | |// =========================================================================== | ||
505 | |// eax | LCL | = BASE->value| | * | * | ||
506 | |// ecx | CI | = L->ci | L->ci = ++CI | * | * | ||
507 | |// edx | * | * | * | * | * | ||
508 | |// --------------------------------------------------------------------------- | ||
509 | |// esi | L | | | | | ||
510 | |// ebx | BASE | += f | ++ | -- | -= f | ||
511 | |// edi | TOP | += f+1+nargs | = BASE+maxst | = f+nresults | = BASE+maxst | ||
512 | |// --------------------------------------------------------------------------- | ||
513 | |// L->base | | = BASE | | = BASE | ||
514 | |// L->top | | = TOP | | = TOP | ||
515 | |// L->ci | | ++, -> = ... | -- | | ||
516 | |// L->ci->savedpc| = &code[pc] | [ L-> = ] | | | ||
517 | |// --------------------------------------------------------------------------- | ||
518 | |// args + vars | | setnil | | | ||
519 | |// results | | | move | setnil | ||
520 | |// --------------------------------------------------------------------------- | ||
521 | |||
522 | |||
523 | |// Include support for function inlining. | ||
524 | |.include ljit_x86_inline.dash | ||
525 | |||
526 | |||
527 | #ifdef LUA_COMPAT_VARARG | ||
528 | static void jit_vararg_table(lua_State *L) | ||
529 | { | ||
530 | Table *tab; | ||
531 | StkId base, func; | ||
532 | int i, num, numparams; | ||
533 | luaC_checkGC(L); | ||
534 | base = L->base; | ||
535 | func = L->ci->func; | ||
536 | numparams = clvalue(func)->l.p->numparams; | ||
537 | num = base - func - numparams - 1; | ||
538 | tab = luaH_new(L, num, 1); | ||
539 | for (i = 0; i < num; i++) | ||
540 | setobj2n(L, luaH_setnum(L, tab, i+1), base - num + i); | ||
541 | setnvalue(luaH_setstr(L, tab, luaS_newliteral(L, "n")), (lua_Number)num); | ||
542 | sethvalue(L, base + numparams, tab); | ||
543 | } | ||
544 | #endif | ||
545 | |||
546 | /* Encode JIT function prologue. */ | ||
547 | static void jit_prologue(jit_State *J) | ||
548 | { | ||
549 | Proto *pt = J->pt; | ||
550 | int numparams = pt->numparams; | ||
551 | int stacksize = pt->maxstacksize; | ||
552 | |||
553 | |// Note: the order of the following instructions has been carefully tuned. | ||
554 | | lea eax, TOP[stacksize] | ||
555 | | sub esp, FRAME_OFFSET | ||
556 | | cmp eax, L->stack_last | ||
557 | | jae ->GROW_STACK // Stack overflow? | ||
558 | | // This is a slight overallocation (BASE[1+stacksize] would be enough). | ||
559 | | // We duplicate luaD_precall() behaviour so we can use luaD_growstack(). | ||
560 | | cmp CI, L->end_ci | ||
561 | | lea CI, CI[1] | ||
562 | | je ->GROW_CI // CI overflow? | ||
563 | | xor eax, eax // Assumes: LUA_TNIL == 0 | ||
564 | | mov CI->func, BASE | ||
565 | | add BASE, #BASE | ||
566 | | mov L->ci, CI | ||
567 | |||
568 | if (numparams > 0) { | ||
569 | | lea edx, BASE[numparams] | ||
570 | | cmp TOP, edx // L->top >< L->base+numparams ? | ||
571 | } | ||
572 | |||
573 | if (!pt->is_vararg) { /* Fixarg function. */ | ||
574 | /* Must cap L->top at L->base+numparams because 1st LOADNIL is omitted. */ | ||
575 | if (numparams == 0) { | ||
576 | | mov TOP, BASE | ||
577 | } else if (J->flags & JIT_F_CPU_CMOV) { | ||
578 | | cmova TOP, edx | ||
579 | } else { | ||
580 | | jna >1 | ||
581 | | mov TOP, edx | ||
582 | |1: | ||
583 | } | ||
584 | | lea edx, BASE[stacksize] // New ci->top. | ||
585 | | mov CI->tailcalls, eax // 0 | ||
586 | | mov CI->top, edx | ||
587 | | mov L->top, edx | ||
588 | | mov L->base, BASE | ||
589 | | mov CI->base, BASE | ||
590 | } else { /* Vararg function. */ | ||
591 | int i; | ||
592 | if (numparams > 0) { | ||
593 | |// If some fixargs are missing we need to clear them and | ||
594 | |// bump TOP to get a consistent frame layout for OP_VARARG. | ||
595 | | jb >5 | ||
596 | |4: | ||
597 | |.tail | ||
598 | |5: // This is uncommon. So move it to .tail and use a loop. | ||
599 | | mov TOP->tt, eax | ||
600 | | add TOP, #TOP | ||
601 | | cmp TOP, edx | ||
602 | | jb <5 | ||
603 | | jmp <4 | ||
604 | |.code | ||
605 | } | ||
606 | | mov L->base, TOP // New base is after last arg. | ||
607 | | mov CI->base, TOP | ||
608 | | mov CI->tailcalls, eax // 0 | ||
609 | for (i = 0; i < numparams; i++) { /* Move/clear fixargs. */ | ||
610 | |// Inline this. Vararg funcs usually have very few fixargs. | ||
611 | | copyslot TOP[i], BASE[i], ecx, edx | ||
612 | | mov BASE[i].tt, eax // Clear old fixarg slot (help the GC). | ||
613 | } | ||
614 | if (numparams > 0) { | ||
615 | | mov CI, L->ci // Reload CI = ecx (used by move). | ||
616 | } | ||
617 | | mov BASE, TOP | ||
618 | | lea edx, BASE[stacksize] // New ci->top. | ||
619 | | lea TOP, BASE[numparams] // Start of vars to clear. | ||
620 | | mov CI->top, edx | ||
621 | | mov L->top, edx | ||
622 | stacksize -= numparams; /* Fixargs are already cleared. */ | ||
623 | } | ||
624 | |||
625 | /* Clear undefined args and all vars. Still assumes eax = LUA_TNIL = 0. */ | ||
626 | /* Note: cannot clear only args because L->top has grown. */ | ||
627 | if (stacksize <= EXTRA_STACK) { /* Loopless clear. May use EXTRA_STACK. */ | ||
628 | int i; | ||
629 | for (i = 0; i < stacksize; i++) { | ||
630 | | mov TOP[i].tt, eax | ||
631 | } | ||
632 | } else { /* Standard loop. */ | ||
633 | |2: // Unrolled for 2 stack slots. No initial check. May use EXTRA_STACK. | ||
634 | | mov TOP[0].tt, eax | ||
635 | | mov TOP[1].tt, eax | ||
636 | | add TOP, 2*#TOP | ||
637 | | cmp TOP, edx | ||
638 | | jb <2 | ||
639 | |// Note: TOP is undefined now. TOP is only valid across calls/open ins. | ||
640 | } | ||
641 | |||
642 | #ifdef LUA_COMPAT_VARARG | ||
643 | if (pt->is_vararg & VARARG_NEEDSARG) { | ||
644 | | call &jit_vararg_table, L | ||
645 | } | ||
646 | #endif | ||
647 | |||
648 | /* Call hook check. */ | ||
649 | if (J->flags & JIT_F_DEBUG_CALL) { | ||
650 | | test byte L->hookmask, LUA_MASKCALL | ||
651 | | jz >9 | ||
652 | | call ->HOOKCALL | ||
653 | |9: | ||
654 | |||
655 | |.jsub HOOKCALL | ||
656 | | mov CI, L->ci | ||
657 | | mov TOP, CI->func | ||
658 | | mov LCL, TOP->value | ||
659 | | mov PROTO:edi, LCL->p // clvalue(L->ci->func)->l.p | ||
660 | | mov eax, PROTO:edi->code | ||
661 | | add eax, 4 // Hooks expect incremented PC. | ||
662 | | mov L->savedpc, eax | ||
663 | | sub esp, FRAME_OFFSET | ||
664 | | call &luaD_callhook, L, LUA_HOOKCALL, -1 | ||
665 | | add esp, FRAME_OFFSET | ||
666 | | mov eax, PROTO:edi->code // PROTO:edi is callee-save. | ||
667 | | mov L->savedpc, eax // jit_hookins needs previous PC. | ||
668 | | mov BASE, L->base | ||
669 | | ret | ||
670 | |.endjsub | ||
671 | } | ||
672 | } | ||
673 | |||
674 | /* Check if we can combine 'return const'. */ | ||
675 | static int jit_return_k(jit_State *J) | ||
676 | { | ||
677 | if (!J->combine) return 0; /* COMBINE hint set? */ | ||
678 | /* May need to close open upvalues. */ | ||
679 | if (!fhint_isset(J, NOCLOSE)) { | ||
680 | | call &luaF_close, L, BASE | ||
681 | } | ||
682 | if (!J->pt->is_vararg) { /* Fixarg function. */ | ||
683 | | sub aword L->ci, #CI | ||
684 | | mov TOP, BASE | ||
685 | | sub BASE, #BASE | ||
686 | | add esp, FRAME_OFFSET | ||
687 | } else { /* Vararg function. */ | ||
688 | | mov CI, L->ci | ||
689 | | mov BASE, CI->func | ||
690 | | sub CI, #CI | ||
691 | | mov L->ci, CI | ||
692 | | lea TOP, BASE[1] | ||
693 | | add esp, FRAME_OFFSET | ||
694 | } | ||
695 | jit_assert(J->combine == 1); /* Required to skip next RETURN instruction. */ | ||
696 | return 1; | ||
697 | } | ||
698 | |||
699 | static void jit_op_return(jit_State *J, int rbase, int nresults) | ||
700 | { | ||
701 | /* Return hook check. */ | ||
702 | if (J->flags & JIT_F_DEBUG_CALL) { | ||
703 | if (nresults < 0 && !(J->flags & JIT_F_DEBUG_INS)) { | ||
704 | | mov L->top, TOP | ||
705 | } | ||
706 | |// TODO: LUA_HOOKTAILRET (+ ci->tailcalls counting) or changed debug API. | ||
707 | | test byte L->hookmask, LUA_MASKRET | ||
708 | | jz >7 | ||
709 | | call ->HOOKRET | ||
710 | |7: | ||
711 | if (J->flags & JIT_F_DEBUG_INS) { | ||
712 | | mov eax, FRAME_RETADDR | ||
713 | | mov L->savedpc, eax | ||
714 | } | ||
715 | |||
716 | |.jsub HOOKRET | ||
717 | | mov eax, [esp] // Current machine code address. | ||
718 | | mov L->savedpc, eax | ||
719 | | sub esp, FRAME_OFFSET | ||
720 | | call &luaD_callhook, L, LUA_HOOKRET, -1 | ||
721 | | add esp, FRAME_OFFSET | ||
722 | | mov BASE, L->base // Restore stack-relative pointers. | ||
723 | | mov TOP, L->top | ||
724 | | ret | ||
725 | |.endjsub | ||
726 | } | ||
727 | |||
728 | /* May need to close open upvalues. */ | ||
729 | if (!fhint_isset(J, NOCLOSE)) { | ||
730 | | call &luaF_close, L, BASE | ||
731 | } | ||
732 | |||
733 | /* Previous op was open: 'return f()' or 'return ...' */ | ||
734 | if (nresults < 0) { | ||
735 | |// Relocate [BASE+rbase, TOP) -> [ci->func, *). | ||
736 | | mov CI, L->ci | ||
737 | | addidx BASE, rbase | ||
738 | | mov edx, CI->func | ||
739 | | cmp BASE, TOP | ||
740 | | jnb >2 | ||
741 | |1: | ||
742 | | mov eax, [BASE] | ||
743 | | add BASE, aword*1 | ||
744 | | mov [edx], eax | ||
745 | | add edx, aword*1 | ||
746 | | cmp BASE, TOP | ||
747 | | jb <1 | ||
748 | |2: | ||
749 | | add esp, FRAME_OFFSET | ||
750 | | mov BASE, CI->func | ||
751 | | sub CI, #CI | ||
752 | | mov TOP, edx // Relocated TOP. | ||
753 | | mov L->ci, CI | ||
754 | | ret | ||
755 | return; | ||
756 | } | ||
757 | |||
758 | if (!J->pt->is_vararg) { /* Fixarg function, nresults >= 0. */ | ||
759 | int i; | ||
760 | | sub aword L->ci, #CI | ||
761 | |// Relocate [BASE+rbase,BASE+rbase+nresults) -> [BASE-1, *). | ||
762 | |// TODO: loop for large nresults? | ||
763 | | sub BASE, #BASE | ||
764 | for (i = 0; i < nresults; i++) { | ||
765 | | copyslot BASE[i], BASE[rbase+i+1] | ||
766 | } | ||
767 | | add esp, FRAME_OFFSET | ||
768 | | lea TOP, BASE[nresults] | ||
769 | | ret | ||
770 | } else { /* Vararg function, nresults >= 0. */ | ||
771 | int i; | ||
772 | |// Relocate [BASE+rbase,BASE+rbase+nresults) -> [ci->func, *). | ||
773 | | mov CI, L->ci | ||
774 | | mov TOP, CI->func | ||
775 | | sub CI, #CI | ||
776 | | mov L->ci, CI // CI = ecx is used by copyslot. | ||
777 | for (i = 0; i < nresults; i++) { | ||
778 | | copyslot TOP[i], BASE[rbase+i] | ||
779 | } | ||
780 | | add esp, FRAME_OFFSET | ||
781 | | mov BASE, TOP | ||
782 | | addidx TOP, nresults | ||
783 | | ret | ||
784 | } | ||
785 | } | ||
786 | |||
787 | static void jit_op_call(jit_State *J, int func, int nargs, int nresults) | ||
788 | { | ||
789 | int cltype = jit_inline_call(J, func, nargs, nresults); | ||
790 | if (cltype < 0) return; /* Inlined? */ | ||
791 | |||
792 | |// Note: the order of the following instructions has been carefully tuned. | ||
793 | | addidx BASE, func | ||
794 | | mov CI, L->ci | ||
795 | | isfunction 0 // BASE[0] is L->base[func]. | ||
796 | if (nargs >= 0) { /* Previous op was not open and did not set TOP. */ | ||
797 | | lea TOP, BASE[1+nargs] | ||
798 | } | ||
799 | | mov LCL, BASE->value | ||
800 | | mov edx, &J->nextins | ||
801 | | mov CI->savedpc, edx | ||
802 | if (cltype == LUA_TFUNCTION) { | ||
803 | if (nargs == -1) { | ||
804 | | jne ->DEOPTIMIZE_OPEN // TYPE hint was wrong (open op)? | ||
805 | } else { | ||
806 | | jne ->DEOPTIMIZE // TYPE hint was wrong? | ||
807 | } | ||
808 | } else { | ||
809 | | je >1 // Skip __call handling for functions. | ||
810 | | call ->METACALL | ||
811 | |1: | ||
812 | |||
813 | |.jsub METACALL // CALL to __call metamethod. | ||
814 | | sub esp, FRAME_OFFSET | ||
815 | | mov L->savedpc, edx // May throw errors. Save PC and TOP. | ||
816 | | mov L->top, TOP | ||
817 | | call &luaD_tryfuncTM, L, BASE // Resolve __call metamethod. | ||
818 | | add esp, FRAME_OFFSET | ||
819 | | mov BASE, eax // Restore stack-relative pointers. | ||
820 | | mov TOP, L->top | ||
821 | | mov LCL, BASE->value | ||
822 | | mov CI, L->ci | ||
823 | | ret | ||
824 | |.endjsub | ||
825 | } | ||
826 | | call aword LCL->jit_gate // Call JIT func or GATE_JL/GATE_JC. | ||
827 | | subidx BASE, func | ||
828 | | mov L->base, BASE | ||
829 | |||
830 | /* Clear undefined results TOP <= o < func+nresults. */ | ||
831 | if (nresults > 0) { | ||
832 | | xor eax, eax | ||
833 | if (nresults <= EXTRA_STACK) { /* Loopless clear. May use EXTRA_STACK. */ | ||
834 | int i; | ||
835 | for (i = 0; i < nresults; i++) { | ||
836 | | mov TOP[i].tt, eax | ||
837 | } | ||
838 | } else { /* Standard loop. TODO: move to .tail? */ | ||
839 | | lea edx, BASE[func+nresults] | ||
840 | |1: // Unrolled for 2 stack slots. No initial check. May use EXTRA_STACK. | ||
841 | | mov TOP[0].tt, eax // LUA_TNIL | ||
842 | | mov TOP[1].tt, eax // LUA_TNIL | ||
843 | | add TOP, 2*#TOP | ||
844 | | cmp TOP, edx | ||
845 | | jb <1 | ||
846 | } | ||
847 | } | ||
848 | |||
849 | if (nresults >= 0) { /* Not an open ins. Restore L->top. */ | ||
850 | | lea TOP, BASE[J->pt->maxstacksize] // Faster than getting L->ci->top. | ||
851 | | mov L->top, TOP | ||
852 | } /* Otherwise keep TOP for next instruction. */ | ||
853 | } | ||
854 | |||
855 | static void jit_op_tailcall(jit_State *J, int func, int nargs) | ||
856 | { | ||
857 | int cltype; | ||
858 | |||
859 | if (!fhint_isset(J, NOCLOSE)) { /* May need to close open upvalues. */ | ||
860 | | call &luaF_close, L, BASE | ||
861 | } | ||
862 | |||
863 | cltype = jit_inline_call(J, func, nargs, -2); | ||
864 | if (cltype < 0) goto finish; /* Inlined? */ | ||
865 | |||
866 | if (cltype == LUA_TFUNCTION) { | ||
867 | jit_deopt_target(J, nargs); | ||
868 | | isfunction func | ||
869 | | jne L_DEOPTIMIZE // TYPE hint was wrong? | ||
870 | } else { | ||
871 | | isfunction func; jne >5 // Handle generic callables first. | ||
872 | |.tail | ||
873 | |5: // Fallback for generic callables. | ||
874 | | addidx BASE, func | ||
875 | if (nargs >= 0) { | ||
876 | | lea TOP, BASE[1+nargs] | ||
877 | } | ||
878 | | mov edx, &J->nextins | ||
879 | | jmp ->METATAILCALL | ||
880 | |.code | ||
881 | |||
882 | |.jsub METATAILCALL // TAILCALL to __call metamethod. | ||
883 | | mov L->savedpc, edx | ||
884 | | mov L->top, TOP | ||
885 | | call &luaD_tryfuncTM, L, BASE // Resolve __call metamethod. | ||
886 | | | ||
887 | |// Relocate [eax, L->top) -> [L->ci->func, *). | ||
888 | | mov CI, L->ci | ||
889 | | mov edx, L->top | ||
890 | | mov TOP, CI->func | ||
891 | |1: | ||
892 | | mov BASE, [eax] | ||
893 | | add eax, aword*1 | ||
894 | | mov [TOP], BASE | ||
895 | | add TOP, aword*1 | ||
896 | | cmp eax, edx | ||
897 | | jb <1 | ||
898 | | | ||
899 | | mov BASE, CI->func | ||
900 | | mov LCL, BASE->value | ||
901 | | sub CI, #CI | ||
902 | | add esp, FRAME_OFFSET | ||
903 | | jmp aword LCL->jit_gate // Chain to callgate. | ||
904 | |.endjsub | ||
905 | } | ||
906 | |||
907 | if (nargs >= 0) { /* Previous op was not open and did not set TOP. */ | ||
908 | int i; | ||
909 | /* Relocate [BASE+func, BASE+func+nargs] -> [ci->func, ci->func+nargs]. */ | ||
910 | /* TODO: loop for large nargs? */ | ||
911 | if (!J->pt->is_vararg) { /* Fixarg function. */ | ||
912 | | mov LCL, BASE[func].value | ||
913 | for (i = 0; i < nargs; i++) { | ||
914 | | copyslot BASE[i], BASE[func+1+i], ecx, edx | ||
915 | } | ||
916 | | lea TOP, BASE[nargs] | ||
917 | | sub BASE, #BASE | ||
918 | | mov CI, L->ci | ||
919 | | mov BASE->value, LCL // Sufficient to copy func->value. | ||
920 | } else { /* Vararg function. */ | ||
921 | | mov CI, L->ci | ||
922 | | lea TOP, BASE[func] | ||
923 | | mov BASE, CI->func | ||
924 | | mov LCL, TOP->value | ||
925 | | mov BASE->value, LCL // Sufficient to copy func->value. | ||
926 | for (i = 0; i < nargs; i++) { | ||
927 | | copyslot BASE[i+1], TOP[i+1], eax, edx | ||
928 | } | ||
929 | | lea TOP, BASE[1+nargs] | ||
930 | | mov LCL, BASE->value // Need to reload LCL = eax. | ||
931 | } | ||
932 | } else { /* Previous op was open and set TOP. */ | ||
933 | |// Relocate [BASE+func, TOP) -> [ci->func, *). | ||
934 | | mov CI, L->ci | ||
935 | | addidx BASE, func | ||
936 | | mov edx, CI->func | ||
937 | |1: | ||
938 | | mov eax, [BASE] | ||
939 | | add BASE, aword*1 | ||
940 | | mov [edx], eax | ||
941 | | add edx, aword*1 | ||
942 | | cmp BASE, TOP | ||
943 | | jb <1 | ||
944 | | mov BASE, CI->func | ||
945 | | mov TOP, edx // Relocated TOP. | ||
946 | | mov LCL, BASE->value | ||
947 | } | ||
948 | | sub CI, #CI | ||
949 | | add esp, FRAME_OFFSET | ||
950 | | jmp aword LCL->jit_gate // Chain to JIT function. | ||
951 | |||
952 | finish: | ||
953 | J->combine++; /* Combine with following return instruction. */ | ||
954 | } | ||
955 | |||
956 | /* ------------------------------------------------------------------------ */ | ||
957 | |||
958 | static void jit_op_move(jit_State *J, int dest, int src) | ||
959 | { | ||
960 | | copyslot BASE[dest], BASE[src] | ||
961 | } | ||
962 | |||
963 | static void jit_op_loadk(jit_State *J, int dest, int kidx) | ||
964 | { | ||
965 | const TValue *kk = &J->pt->k[kidx]; | ||
966 | int rk = jit_return_k(J); | ||
967 | if (rk) dest = 0; | ||
968 | | copyconst BASE[dest], kk | ||
969 | if (rk) { | ||
970 | | ret | ||
971 | } | ||
972 | } | ||
973 | |||
974 | static void jit_op_loadnil(jit_State *J, int first, int last) | ||
975 | { | ||
976 | int idx, num = last - first + 1; | ||
977 | int rk = jit_return_k(J); | ||
978 | | xor eax, eax // Assumes: LUA_TNIL == 0 | ||
979 | if (rk) { | ||
980 | | settt BASE[0], eax | ||
981 | | ret | ||
982 | } else if (num <= 8) { | ||
983 | for (idx = first; idx <= last; idx++) { | ||
984 | | settt BASE[idx], eax // 3/6 bytes | ||
985 | } | ||
986 | } else { | ||
987 | | lea ecx, BASE[first].tt // 15-21 bytes | ||
988 | | lea edx, BASE[last].tt | ||
989 | |1: | ||
990 | | mov [ecx], eax | ||
991 | | cmp ecx, edx | ||
992 | | lea ecx, [ecx+#BASE] // Preserves CC. | ||
993 | | jbe <1 | ||
994 | } | ||
995 | } | ||
996 | |||
997 | static void jit_op_loadbool(jit_State *J, int dest, int b, int dojump) | ||
998 | { | ||
999 | int rk = jit_return_k(J); | ||
1000 | if (rk) dest = 0; | ||
1001 | | setbvalue BASE[dest], b | ||
1002 | if (rk) { | ||
1003 | | ret | ||
1004 | } else if (dojump) { | ||
1005 | const TValue *h = hint_getpc(J, COMBINE, J->nextpc); | ||
1006 | if (!(ttisboolean(h) && bvalue(h) == 0)) { /* Avoid jmp around dead ins. */ | ||
1007 | | jmp =>J->nextpc+1 | ||
1008 | } | ||
1009 | } | ||
1010 | } | ||
1011 | |||
1012 | /* ------------------------------------------------------------------------ */ | ||
1013 | |||
1014 | static void jit_op_getupval(jit_State *J, int dest, int uvidx) | ||
1015 | { | ||
1016 | | getLCL | ||
1017 | | mov UPVAL:ecx, LCL->upvals[uvidx] | ||
1018 | | mov TOP, UPVAL:ecx->v | ||
1019 | | copyslot BASE[dest], TOP[0] | ||
1020 | } | ||
1021 | |||
1022 | static void jit_op_setupval(jit_State *J, int src, int uvidx) | ||
1023 | { | ||
1024 | | getLCL | ||
1025 | | mov UPVAL:ecx, LCL->upvals[uvidx] | ||
1026 | | mov TOP, UPVAL:ecx->v | ||
1027 | | // This is really copyslot TOP[0], BASE[src] with compare mixed in. | ||
1028 | | mov eax, BASE[src].tt | ||
1029 | | mov GCOBJECT:edx, BASE[src].value | ||
1030 | | mov TOP->tt, eax | ||
1031 | | cmp eax, LUA_TSTRING // iscollectable(val)? | ||
1032 | | mov eax, BASE[src].value.na[1] | ||
1033 | | mov TOP->value, GCOBJECT:edx | ||
1034 | | mov TOP->value.na[1], eax | ||
1035 | | jae >5 | ||
1036 | |4: | ||
1037 | |.tail | ||
1038 | |5: | ||
1039 | | test byte GCOBJECT:edx->gch.marked, WHITEBITS // && iswhite(val) | ||
1040 | | jz <4 | ||
1041 | | test byte UPVAL:ecx->marked, bitmask(BLACKBIT) // && isblack(uv) | ||
1042 | | jz <4 | ||
1043 | | call ->BARRIERF // Yes, need barrier. | ||
1044 | | jmp <4 | ||
1045 | |.code | ||
1046 | |||
1047 | |.jsub BARRIERF // luaC_barrierf() with regparms. | ||
1048 | | mov ARG4, GCOBJECT:edx | ||
1049 | | mov ARG3, UPVAL:ecx | ||
1050 | | mov ARG2, L | ||
1051 | | jmp &luaC_barrierf // Chain to C code. | ||
1052 | |.endjsub | ||
1053 | } | ||
1054 | |||
1055 | /* ------------------------------------------------------------------------ */ | ||
1056 | |||
1057 | /* Optimized table lookup routines. Enter via jsub, fallback to C. */ | ||
1058 | |||
1059 | /* Fallback for GETTABLE_*. Temporary key is in L->env. */ | ||
1060 | static void jit_gettable_fb(lua_State *L, Table *t, StkId dest) | ||
1061 | { | ||
1062 | Table *mt = t->metatable; | ||
1063 | const TValue *tm = luaH_getstr(mt, G(L)->tmname[TM_INDEX]); | ||
1064 | if (ttisnil(tm)) { /* No __index method? */ | ||
1065 | mt->flags |= 1<<TM_INDEX; /* Cache this fact. */ | ||
1066 | setnilvalue(dest); | ||
1067 | } else if (ttisfunction(tm)) { /* __index function? */ | ||
1068 | ptrdiff_t destr = savestack(L, dest); | ||
1069 | setobj2s(L, L->top, tm); | ||
1070 | sethvalue(L, L->top+1, t); | ||
1071 | setobj2s(L, L->top+2, &L->env); | ||
1072 | luaD_checkstack(L, 3); | ||
1073 | L->top += 3; | ||
1074 | luaD_call(L, L->top - 3, 1); | ||
1075 | dest = restorestack(L, destr); | ||
1076 | L->top--; | ||
1077 | setobjs2s(L, dest, L->top); | ||
1078 | } else { /* Let luaV_gettable() continue with the __index object. */ | ||
1079 | luaV_gettable(L, tm, &L->env, dest); | ||
1080 | } | ||
1081 | |||
1082 | |//----------------------------------------------------------------------- | ||
1083 | |.jsub GETGLOBAL // Lookup global variable. | ||
1084 | |// Call with: TSTRING:edx (key), BASE (dest) | ||
1085 | | mov CI, L->ci | ||
1086 | | mov TOP, CI->func | ||
1087 | | mov LCL, TOP->value | ||
1088 | | mov TABLE:edi, LCL->env | ||
1089 | | jmp >9 | ||
1090 | |.endjsub | ||
1091 | | | ||
1092 | |//----------------------------------------------------------------------- | ||
1093 | |.jsub GETTABLE_KSTR // Lookup constant string in table. | ||
1094 | |// Call with: TOP (tab), TSTRING:edx (key), BASE (dest) | ||
1095 | | cmp dword TOP->tt, LUA_TTABLE | ||
1096 | | mov TABLE:edi, TOP->value | ||
1097 | | jne ->DEOPTIMIZE_CALLER // Not a table? Deoptimize. | ||
1098 | | | ||
1099 | |// Common entry: TABLE:edi (tab), TSTRING:edx (key), BASE (dest) | ||
1100 | |// Restores BASE, destroys eax, ecx, edx, edi (TOP). | ||
1101 | |9: | ||
1102 | | movzx ecx, byte TABLE:edi->lsizenode // hashstr(t, key). | ||
1103 | | mov eax, 1 | ||
1104 | | shl eax, cl | ||
1105 | | dec eax | ||
1106 | | and eax, TSTRING:edx->tsv.hash | ||
1107 | | Nodemul NODE:eax | ||
1108 | | add NODE:eax, TABLE:edi->node | ||
1109 | | | ||
1110 | |1: // Start of inner loop. Check node key. | ||
1111 | | cmp dword NODE:eax->i_key.nk.tt, LUA_TSTRING | ||
1112 | | jne >2 | ||
1113 | | cmp aword NODE:eax->i_key.nk.value, TSTRING:edx | ||
1114 | | jne >2 | ||
1115 | | // Note: swapping the two checks is faster, but valgrind complains. | ||
1116 | |// Assumes: (int)&(((Node *)0)->i_val) == (int)&(((StkId)0)->value) | ||
1117 | | | ||
1118 | |// Ok, key found. Copy node value to destination (stack) slot. | ||
1119 | | mov ecx, NODE:eax->i_val.tt | ||
1120 | | test ecx, ecx; je >3 // Node has nil value? | ||
1121 | ||if (J->flags & JIT_F_CPU_SSE2) { | ||
1122 | | movq xmm0, qword NODE:eax->i_val.value | ||
1123 | | movq qword BASE->value, xmm0 | ||
1124 | ||} else { | ||
1125 | | mov edx, NODE:eax->i_val.value | ||
1126 | | mov edi, NODE:eax->i_val.value.na[1] | ||
1127 | | mov BASE->value, edx | ||
1128 | | mov BASE->value.na[1], edi | ||
1129 | ||} | ||
1130 | | mov BASE->tt, ecx | ||
1131 | | mov BASE, L->base | ||
1132 | | ret | ||
1133 | |2: | ||
1134 | | mov NODE:eax, NODE:eax->i_key.nk.next // Get next key in chain. | ||
1135 | | test NODE:eax, NODE:eax | ||
1136 | | jnz <1 // Loop if non-NULL. | ||
1137 | | | ||
1138 | | xor ecx, ecx | ||
1139 | |3: | ||
1140 | | mov TABLE:eax, TABLE:edi->metatable | ||
1141 | | test TABLE:eax, TABLE:eax | ||
1142 | | jz >4 // No metatable? | ||
1143 | | test byte TABLE:eax->flags, 1<<TM_INDEX | ||
1144 | | jz >5 // Or 'no __index' flag set? | ||
1145 | |4: | ||
1146 | | settt BASE[0], ecx // Yes, set to nil. | ||
1147 | | mov BASE, L->base | ||
1148 | | ret | ||
1149 | | | ||
1150 | |5: // Otherwise chain to C code which eventually calls luaV_gettable. | ||
1151 | | setsvalue L->env, TSTRING:edx // Use L->env as temp key. | ||
1152 | | mov ecx, [esp] | ||
1153 | | sub esp, FRAME_OFFSET | ||
1154 | | mov L->savedpc, ecx | ||
1155 | | call &jit_gettable_fb, L, TABLE:edi, BASE | ||
1156 | | add esp, FRAME_OFFSET | ||
1157 | | mov BASE, L->base | ||
1158 | | ret | ||
1159 | |.endjsub | ||
1160 | | | ||
1161 | |//----------------------------------------------------------------------- | ||
1162 | |.jsub GETTABLE_STR // Lookup string in table. | ||
1163 | |// Call with: TOP (tab), TVALUE:ecx (key), BASE (dest) | ||
1164 | | mov eax, TOP->tt; shl eax, 4; or eax, TVALUE:ecx->tt | ||
1165 | | cmp eax, LUA_TTABLE_STR | ||
1166 | | mov TABLE:edi, TOP->value | ||
1167 | | mov TSTRING:edx, TVALUE:ecx->value | ||
1168 | | je <9 // Types ok? Continue above. | ||
1169 | | jmp ->DEOPTIMIZE_CALLER // Otherwise deoptimize. | ||
1170 | |.endjsub | ||
1171 | } | ||
1172 | |||
1173 | /* Fallback for SETTABLE_*STR. Temporary (string) key is in L->env. */ | ||
1174 | static void jit_settable_fb(lua_State *L, Table *t, StkId val) | ||
1175 | { | ||
1176 | Table *mt = t->metatable; | ||
1177 | const TValue *tm = luaH_getstr(mt, G(L)->tmname[TM_NEWINDEX]); | ||
1178 | if (ttisnil(tm)) { /* No __newindex method? */ | ||
1179 | mt->flags |= 1<<TM_NEWINDEX; /* Cache this fact. */ | ||
1180 | t->flags = 0; /* But need to clear the cache for the table itself. */ | ||
1181 | setobj2t(L, luaH_setstr(L, t, rawtsvalue(&L->env)), val); | ||
1182 | luaC_barriert(L, t, val); | ||
1183 | } else if (ttisfunction(tm)) { /* __newindex function? */ | ||
1184 | setobj2s(L, L->top, tm); | ||
1185 | sethvalue(L, L->top+1, t); | ||
1186 | setobj2s(L, L->top+2, &L->env); | ||
1187 | setobj2s(L, L->top+3, val); | ||
1188 | luaD_checkstack(L, 4); | ||
1189 | L->top += 4; | ||
1190 | luaD_call(L, L->top - 4, 0); | ||
1191 | } else { /* Let luaV_settable() continue with the __newindex object. */ | ||
1192 | luaV_settable(L, tm, &L->env, val); | ||
1193 | } | ||
1194 | |||
1195 | |//----------------------------------------------------------------------- | ||
1196 | |.jsub BARRIERBACK // luaC_barrierback() with regparms. | ||
1197 | |// Call with: TABLE:edi (table). Destroys ecx, edx. | ||
1198 | | mov GL:ecx, L->l_G | ||
1199 | | and byte TABLE:edi->marked, (~bitmask(BLACKBIT))&0xff | ||
1200 | | mov edx, GL:ecx->grayagain | ||
1201 | | mov GL:ecx->grayagain, TABLE:edi | ||
1202 | | mov TABLE:edi->gclist, edx | ||
1203 | | ret | ||
1204 | |.endjsub | ||
1205 | | | ||
1206 | |//----------------------------------------------------------------------- | ||
1207 | |.jsub SETGLOBAL // Set global variable. | ||
1208 | |// Call with: TSTRING:edx (key), BASE (val) | ||
1209 | | mov CI, L->ci | ||
1210 | | mov TOP, CI->func | ||
1211 | | mov LCL, TOP->value | ||
1212 | | mov TABLE:edi, LCL->env | ||
1213 | | jmp >9 | ||
1214 | |.endjsub | ||
1215 | | | ||
1216 | |//----------------------------------------------------------------------- | ||
1217 | |.jsub SETTABLE_KSTR // Set constant string entry in table. | ||
1218 | |// Call with: TOP (tab), TSTRING:edx (key), BASE (val) | ||
1219 | | cmp dword TOP->tt, LUA_TTABLE | ||
1220 | | mov TABLE:edi, TOP->value | ||
1221 | | jne ->DEOPTIMIZE_CALLER // Not a table? Deoptimize. | ||
1222 | | | ||
1223 | |// Common entry: TABLE:edi (tab), TSTRING:edx (key), BASE (val) | ||
1224 | |// Restores BASE, destroys eax, ecx, edx, edi (TOP). | ||
1225 | |9: | ||
1226 | | movzx ecx, byte TABLE:edi->lsizenode // hashstr(t, key). | ||
1227 | | mov eax, 1 | ||
1228 | | shl eax, cl | ||
1229 | | dec eax | ||
1230 | | and eax, TSTRING:edx->tsv.hash | ||
1231 | | Nodemul NODE:eax | ||
1232 | | add NODE:eax, TABLE:edi->node | ||
1233 | | | ||
1234 | |1: // Start of inner loop. Check node key. | ||
1235 | | cmp dword NODE:eax->i_key.nk.tt, LUA_TSTRING | ||
1236 | | jne >4 | ||
1237 | | cmp aword NODE:eax->i_key.nk.value, TSTRING:edx | ||
1238 | | jne >4 | ||
1239 | | // Note: swapping the two checks is faster, but valgrind complains. | ||
1240 | | | ||
1241 | |// Ok, key found. Copy new value to node value. | ||
1242 | | cmp dword NODE:eax->i_val.tt, LUA_TNIL // Previous value is nil? | ||
1243 | | je >6 | ||
1244 | | // Assumes: (int)&(((Node *)0)->i_val) == (int)&(((StkId)0)->value) | ||
1245 | |2: | ||
1246 | | mov byte TABLE:edi->flags, 0 // Clear metamethod cache. | ||
1247 | |3: // Target for SETTABLE_NUM below. | ||
1248 | | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table) | ||
1249 | | jnz >8 // Unlikely, but set barrier back. | ||
1250 | |7: // Caveat: recycled label. | ||
1251 | | copyslot TVALUE:eax[0], BASE[0], ecx, edx, TOP | ||
1252 | | mov BASE, L->base | ||
1253 | | ret | ||
1254 | | | ||
1255 | |8: // Avoid valiswhite() check -- black2gray(table) is ok. | ||
1256 | | call ->BARRIERBACK | ||
1257 | | jmp <7 | ||
1258 | | | ||
1259 | |4: | ||
1260 | | mov NODE:eax, NODE:eax->i_key.nk.next // Get next key in chain. | ||
1261 | | test NODE:eax, NODE:eax | ||
1262 | | jnz <1 // Loop if non-NULL. | ||
1263 | | | ||
1264 | |// Key not found. Add a new one, but check metatable first. | ||
1265 | | mov TABLE:ecx, TABLE:edi->metatable | ||
1266 | | test TABLE:ecx, TABLE:ecx | ||
1267 | | jz >5 // No metatable? | ||
1268 | | test byte TABLE:ecx->flags, 1<<TM_NEWINDEX | ||
1269 | | jz >7 // Or 'no __newindex' flag set? | ||
1270 | | | ||
1271 | |5: // Add new key. | ||
1272 | | // No need for setting L->savedpc since only LUA_ERRMEM may be thrown. | ||
1273 | | lea TVALUE:eax, L->env | ||
1274 | | setsvalue TVALUE:eax[0], TSTRING:edx | ||
1275 | | sub esp, FRAME_OFFSET | ||
1276 | | call &luaH_newkey, L, TABLE:edi, TVALUE:eax | ||
1277 | | add esp, FRAME_OFFSET | ||
1278 | | jmp <2 // Copy to the returned value. See Node/TValue assumption above. | ||
1279 | | | ||
1280 | |6: // Key found, but previous value is nil. | ||
1281 | | mov TABLE:ecx, TABLE:edi->metatable | ||
1282 | | test TABLE:ecx, TABLE:ecx | ||
1283 | | jz <2 // No metatable? | ||
1284 | | test byte TABLE:ecx->flags, 1<<TM_NEWINDEX | ||
1285 | | jnz <2 // Or 'no __newindex' flag set? | ||
1286 | | | ||
1287 | |7: // Otherwise chain to C code which eventually calls luaV_settable. | ||
1288 | | setsvalue L->env, TSTRING:edx // Use L->env as temp key. | ||
1289 | | mov ecx, [esp] | ||
1290 | | sub esp, FRAME_OFFSET | ||
1291 | | mov L->savedpc, ecx | ||
1292 | | call &jit_settable_fb, L, TABLE:edi, BASE | ||
1293 | | add esp, FRAME_OFFSET | ||
1294 | | mov BASE, L->base | ||
1295 | | ret | ||
1296 | |.endjsub | ||
1297 | | | ||
1298 | |//----------------------------------------------------------------------- | ||
1299 | |.jsub SETTABLE_STR // Set string entry in table. | ||
1300 | |// Call with: TOP (tab), TVALUE:ecx (key), BASE (val) | ||
1301 | | mov eax, TOP->tt; shl eax, 4; or eax, TVALUE:ecx->tt | ||
1302 | | cmp eax, LUA_TTABLE_STR | ||
1303 | | mov TABLE:edi, TOP->value | ||
1304 | | mov TSTRING:edx, TVALUE:ecx->value | ||
1305 | | je <9 // Types ok? Continue above. | ||
1306 | | jmp ->DEOPTIMIZE_CALLER // Otherwise deoptimize. | ||
1307 | |.endjsub | ||
1308 | } | ||
1309 | |||
1310 | /* ------------------------------------------------------------------------ */ | ||
1311 | |||
1312 | static void jit_op_newtable(jit_State *J, int dest, int lnarray, int lnhash) | ||
1313 | { | ||
1314 | | call &luaH_new, L, luaO_fb2int(lnarray), luaO_fb2int(lnhash) | ||
1315 | | sethvalue BASE[dest], eax | ||
1316 | jit_checkGC(J); | ||
1317 | } | ||
1318 | |||
1319 | static void jit_op_getglobal(jit_State *J, int dest, int kidx) | ||
1320 | { | ||
1321 | const TValue *kk = &J->pt->k[kidx]; | ||
1322 | jit_assert(ttisstring(kk)); | ||
1323 | | mov TSTRING:edx, &&kk->value.gc->ts | ||
1324 | | addidx BASE, dest | ||
1325 | | call ->GETGLOBAL | ||
1326 | } | ||
1327 | |||
1328 | static void jit_op_setglobal(jit_State *J, int rval, int kidx) | ||
1329 | { | ||
1330 | const TValue *kk = &J->pt->k[kidx]; | ||
1331 | jit_assert(ttisstring(kk)); | ||
1332 | | mov TSTRING:edx, &&kk->value.gc->ts | ||
1333 | | addidx BASE, rval | ||
1334 | | call ->SETGLOBAL | ||
1335 | } | ||
1336 | |||
1337 | enum { TKEY_KSTR = -2, TKEY_STR = -1, TKEY_ANY = 0 }; | ||
1338 | |||
1339 | /* Optimize key lookup depending on consts or hints type. */ | ||
1340 | static int jit_keylookup(jit_State *J, int tab, int rkey) | ||
1341 | { | ||
1342 | const TValue *tabt = hint_get(J, TYPE); | ||
1343 | const TValue *key; | ||
1344 | if (!ttistable(tabt)) return TKEY_ANY; /* Not a table? Use fallback. */ | ||
1345 | key = ISK(rkey) ? &J->pt->k[INDEXK(rkey)] : hint_get(J, TYPEKEY); | ||
1346 | if (ttisstring(key)) { /* String key? */ | ||
1347 | if (ISK(rkey)) { | ||
1348 | | lea TOP, BASE[tab] | ||
1349 | | mov TSTRING:edx, &&key->value.gc->ts | ||
1350 | return TKEY_KSTR; /* Const string key. */ | ||
1351 | } else { | ||
1352 | | lea TOP, BASE[tab] | ||
1353 | | lea TVALUE:ecx, BASE[rkey] | ||
1354 | return TKEY_STR; /* Var string key. */ | ||
1355 | } | ||
1356 | } else if (ttisnumber(key)) { /* Number key? */ | ||
1357 | lua_Number n = nvalue(key); | ||
1358 | int k; | ||
1359 | lua_number2int(k, n); | ||
1360 | if (!(k >= 1 && k < (1 << 26) && (lua_Number)k == n)) | ||
1361 | return TKEY_ANY; /* Not a proper array key? Use fallback. */ | ||
1362 | if (ISK(rkey)) { | ||
1363 | | istable tab | ||
1364 | | mov TABLE:edi, BASE[tab].value | ||
1365 | | jne >9 // TYPE hint was wrong? | ||
1366 | | mov ecx, k // Needed for hash fallback. | ||
1367 | | mov TVALUE:eax, TABLE:edi->array | ||
1368 | | cmp ecx, TABLE:edi->sizearray; ja >5 // Not in array part? | ||
1369 | return k; /* Const array key (>= 1). */ | ||
1370 | } else { | ||
1371 | | mov eax, BASE[tab].tt; shl eax, 4; or eax, BASE[rkey].tt | ||
1372 | | cmp eax, LUA_TTABLE_NUM; jne >9 // TYPE/TYPEKEY hint was wrong? | ||
1373 | if (J->flags & JIT_F_CPU_SSE2) { | ||
1374 | | movsd xmm0, qword BASE[rkey] | ||
1375 | | cvttsd2si eax, xmm0 | ||
1376 | | cvtsi2sd xmm1, eax | ||
1377 | | dec eax | ||
1378 | | ucomisd xmm1, xmm0 | ||
1379 | | mov TABLE:edi, BASE[tab].value | ||
1380 | | jne >9; jp >9 // Not an integer? Deoptimize. | ||
1381 | } else { | ||
1382 | |// Annoying x87 stuff: check whether a number is an integer. | ||
1383 | |// The latency of fist/fild is the real problem here. | ||
1384 | | fld qword BASE[rkey].value | ||
1385 | | fist dword TMP1 | ||
1386 | | fild dword TMP1 | ||
1387 | | fcomparepp // eax may be modified. | ||
1388 | | jne >9; jp >9 // Not an integer? Deoptimize. | ||
1389 | | mov eax, TMP1 | ||
1390 | | mov TABLE:edi, BASE[tab].value | ||
1391 | | dec eax | ||
1392 | } | ||
1393 | | cmp eax, TABLE:edi->sizearray; jae >5 // Not in array part? | ||
1394 | | TValuemul eax | ||
1395 | | add eax, TABLE:edi->array | ||
1396 | return 1; /* Variable array key. */ | ||
1397 | } | ||
1398 | } | ||
1399 | return TKEY_ANY; /* Use fallback. */ | ||
1400 | } | ||
1401 | |||
1402 | static void jit_op_gettable(jit_State *J, int dest, int tab, int rkey) | ||
1403 | { | ||
1404 | int k = jit_keylookup(J, tab, rkey); | ||
1405 | switch (k) { | ||
1406 | case TKEY_KSTR: /* Const string key. */ | ||
1407 | | addidx BASE, dest | ||
1408 | | call ->GETTABLE_KSTR | ||
1409 | break; | ||
1410 | case TKEY_STR: /* Variable string key. */ | ||
1411 | | addidx BASE, dest | ||
1412 | | call ->GETTABLE_STR | ||
1413 | break; | ||
1414 | case TKEY_ANY: /* Generic gettable fallback. */ | ||
1415 | if (ISK(rkey)) { | ||
1416 | | mov ecx, &&J->pt->k[INDEXK(rkey)] | ||
1417 | } else { | ||
1418 | | lea ecx, BASE[rkey] | ||
1419 | } | ||
1420 | | lea edx, BASE[tab] | ||
1421 | | addidx BASE, dest | ||
1422 | | mov L->savedpc, &J->nextins | ||
1423 | | call &luaV_gettable, L, edx, ecx, BASE | ||
1424 | | mov BASE, L->base | ||
1425 | break; | ||
1426 | default: /* Array key. */ | ||
1427 | |// This is really copyslot BASE[dest], TVALUE:eax[k-1] mixed with compare. | ||
1428 | |1: | ||
1429 | | mov edx, TVALUE:eax[k-1].tt | ||
1430 | | test edx, edx; je >6 // Array has nil value? | ||
1431 | if (J->flags & JIT_F_CPU_SSE2) { | ||
1432 | | movq xmm0, qword TVALUE:eax[k-1].value | ||
1433 | | movq qword BASE[dest].value, xmm0 | ||
1434 | } else { | ||
1435 | | mov ecx, TVALUE:eax[k-1].value | ||
1436 | | mov eax, TVALUE:eax[k-1].value.na[1] | ||
1437 | | mov BASE[dest].value, ecx | ||
1438 | | mov BASE[dest].value.na[1], eax | ||
1439 | } | ||
1440 | |2: | ||
1441 | | mov BASE[dest].tt, edx | ||
1442 | |.tail | ||
1443 | |5: // Fallback to hash part. TABLE:edi is callee-saved. | ||
1444 | if (ISK(rkey)) { | ||
1445 | | call ->GETTABLE_KNUM | ||
1446 | } else { | ||
1447 | | call ->GETTABLE_NUM | ||
1448 | } | ||
1449 | | jmp <1 // Slot is at TVALUE:eax[k-1]. | ||
1450 | | | ||
1451 | |6: // Shortcut for tables without an __index metamethod. | ||
1452 | | mov TABLE:ecx, TABLE:edi->metatable | ||
1453 | | test TABLE:ecx, TABLE:ecx | ||
1454 | | jz <2 // No metatable? | ||
1455 | | test byte TABLE:ecx->flags, 1<<TM_INDEX | ||
1456 | | jnz <2 // Or 'no __index' flag set? | ||
1457 | | | ||
1458 | |9: // Otherwise deoptimize. | ||
1459 | | mov edx, &J->nextins | ||
1460 | | jmp ->DEOPTIMIZE | ||
1461 | |.code | ||
1462 | break; | ||
1463 | } | ||
1464 | |||
1465 | |.jsub GETTABLE_KNUM // Gettable fallback for const numeric keys. | ||
1466 | | mov TMP2, ecx // Save k. | ||
1467 | | sub esp, FRAME_OFFSET | ||
1468 | | call &luaH_getnum, TABLE:edi, ecx | ||
1469 | | add esp, FRAME_OFFSET | ||
1470 | | mov ecx, TMP2 // Restore k. | ||
1471 | | TValuemul ecx | ||
1472 | | sub TVALUE:eax, ecx // Compensate for TVALUE:eax[k-1]. | ||
1473 | | add TVALUE:eax, #TVALUE | ||
1474 | | ret | ||
1475 | |.endjsub | ||
1476 | | | ||
1477 | |.jsub GETTABLE_NUM // Gettable fallback for variable numeric keys. | ||
1478 | | inc eax | ||
1479 | | mov ARG2, TABLE:edi // Really ARG1 and ARG2. | ||
1480 | | mov ARG3, eax | ||
1481 | | jmp &luaH_getnum // Chain to C code. | ||
1482 | |.endjsub | ||
1483 | } | ||
1484 | |||
1485 | static void jit_op_settable(jit_State *J, int tab, int rkey, int rval) | ||
1486 | { | ||
1487 | const TValue *val = ISK(rval) ? &J->pt->k[INDEXK(rval)] : NULL; | ||
1488 | int k = jit_keylookup(J, tab, rkey); | ||
1489 | switch (k) { | ||
1490 | case TKEY_KSTR: /* Const string key. */ | ||
1491 | case TKEY_STR: /* Variable string key. */ | ||
1492 | if (ISK(rval)) { | ||
1493 | | mov BASE, &val | ||
1494 | } else { | ||
1495 | | addidx BASE, rval | ||
1496 | } | ||
1497 | if (k == TKEY_KSTR) { | ||
1498 | | call ->SETTABLE_KSTR | ||
1499 | } else { | ||
1500 | | call ->SETTABLE_STR | ||
1501 | } | ||
1502 | break; | ||
1503 | case TKEY_ANY: /* Generic settable fallback. */ | ||
1504 | if (ISK(rkey)) { | ||
1505 | | mov ecx, &&J->pt->k[INDEXK(rkey)] | ||
1506 | } else { | ||
1507 | | lea ecx, BASE[rkey] | ||
1508 | } | ||
1509 | if (ISK(rval)) { | ||
1510 | | mov edx, &val | ||
1511 | } else { | ||
1512 | | lea edx, BASE[rval] | ||
1513 | } | ||
1514 | | addidx BASE, tab | ||
1515 | | mov L->savedpc, &J->nextins | ||
1516 | | call &luaV_settable, L, BASE, ecx, edx | ||
1517 | | mov BASE, L->base | ||
1518 | break; | ||
1519 | default: /* Array key. */ | ||
1520 | |1: | ||
1521 | | tvisnil TVALUE:eax[k-1]; je >6 // Previous value is nil? | ||
1522 | |2: | ||
1523 | |.tail | ||
1524 | |5: // Fallback to hash part. TABLE:edi is callee-saved. | ||
1525 | if (ISK(rkey)) { | ||
1526 | | call ->SETTABLE_KNUM | ||
1527 | } else { | ||
1528 | | call ->SETTABLE_NUM | ||
1529 | } | ||
1530 | | jmp <1 // Slot is at TVALUE:eax[k-1]. | ||
1531 | | | ||
1532 | |6: // Shortcut for tables without a __newindex metamethod. | ||
1533 | | mov TABLE:ecx, TABLE:edi->metatable | ||
1534 | | test TABLE:ecx, TABLE:ecx | ||
1535 | | jz <2 // No metatable? | ||
1536 | | test byte TABLE:ecx->flags, 1<<TM_NEWINDEX | ||
1537 | | jnz <2 // Or 'no __newindex' flag set? | ||
1538 | | | ||
1539 | |9: // Otherwise deoptimize. | ||
1540 | | mov edx, &J->nextins | ||
1541 | | jmp ->DEOPTIMIZE | ||
1542 | |.code | ||
1543 | if (!ISK(rval) || iscollectable(val)) { | ||
1544 | | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table) | ||
1545 | | jnz >7 // Unlikely, but set barrier back. | ||
1546 | |3: | ||
1547 | |.tail | ||
1548 | |7: // Avoid valiswhite() check -- black2gray(table) is ok. | ||
1549 | | call ->BARRIERBACK | ||
1550 | | jmp <3 | ||
1551 | |.code | ||
1552 | } | ||
1553 | if (ISK(rval)) { | ||
1554 | | copyconst TVALUE:eax[k-1], val | ||
1555 | } else { | ||
1556 | | copyslot TVALUE:eax[k-1], BASE[rval], ecx, edx, TOP | ||
1557 | } | ||
1558 | break; | ||
1559 | } | ||
1560 | |||
1561 | |.jsub SETTABLE_KNUM // Settable fallback for const numeric keys. | ||
1562 | | mov TMP2, ecx // Save k. | ||
1563 | | sub esp, FRAME_OFFSET | ||
1564 | | call &luaH_setnum, L, TABLE:edi, ecx | ||
1565 | | add esp, FRAME_OFFSET | ||
1566 | | mov ecx, TMP2 // Restore k. | ||
1567 | | TValuemul ecx | ||
1568 | | sub TVALUE:eax, ecx // Compensate for TVALUE:eax[k-1]. | ||
1569 | | add TVALUE:eax, #TVALUE | ||
1570 | | ret | ||
1571 | |.endjsub | ||
1572 | | | ||
1573 | |.jsub SETTABLE_NUM // Settable fallback for variable numeric keys. | ||
1574 | | inc eax | ||
1575 | | mov ARG2, L // Really ARG1, ARG2 and ARG3. | ||
1576 | | mov ARG3, TABLE:edi | ||
1577 | | mov ARG4, eax | ||
1578 | | jmp &luaH_setnum // Chain to C code. | ||
1579 | |.endjsub | ||
1580 | } | ||
1581 | |||
1582 | static void jit_op_self(jit_State *J, int dest, int tab, int rkey) | ||
1583 | { | ||
1584 | | copyslot BASE[dest+1], BASE[tab] | ||
1585 | jit_op_gettable(J, dest, tab, rkey); | ||
1586 | } | ||
1587 | |||
1588 | /* ------------------------------------------------------------------------ */ | ||
1589 | |||
1590 | static void jit_op_setlist(jit_State *J, int ra, int num, int batch) | ||
1591 | { | ||
1592 | if (batch == 0) { batch = (int)(*J->nextins); J->combine++; } | ||
1593 | batch = (batch-1)*LFIELDS_PER_FLUSH; | ||
1594 | if (num == 0) { /* Previous op was open and set TOP: {f()} or {...}. */ | ||
1595 | | mov L->env.value, TOP // Need to save TOP (edi). | ||
1596 | | lea eax, BASE[ra+1] | ||
1597 | | sub eax, TOP | ||
1598 | | neg eax | ||
1599 | | TValuediv eax // num = (TOP-ra-1)/sizeof(TValue). | ||
1600 | | mov TABLE:edi, BASE[ra].value | ||
1601 | | jz >4 // Nothing to set? | ||
1602 | if (batch > 0) { | ||
1603 | | add eax, batch | ||
1604 | } | ||
1605 | | cmp dword TABLE:edi->sizearray, eax | ||
1606 | | jae >1 // Skip resize if not needed. | ||
1607 | | // A resize is likely, so inline it. | ||
1608 | | call &luaH_resizearray, L, TABLE:edi, eax | ||
1609 | |1: | ||
1610 | | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table) | ||
1611 | | mov edx, TABLE:edi->array | ||
1612 | | jnz >6 // Unlikely, but set barrier back. | ||
1613 | | mov TOP, L->env.value | ||
1614 | | | ||
1615 | |.tail | ||
1616 | |6: // Avoid lots of valiswhite() checks -- black2gray(table) is ok. | ||
1617 | | call ->BARRIERBACK | ||
1618 | | jmp <1 // Need to reload edx. | ||
1619 | |.code | ||
1620 | } else { /* Set fixed number of args. */ | ||
1621 | | mov TABLE:edi, BASE[ra].value // edi is callee-save. | ||
1622 | | cmp dword TABLE:edi->sizearray, batch+num | ||
1623 | | jb >5 // Need to resize array? | ||
1624 | |1: | ||
1625 | | test byte TABLE:edi->marked, bitmask(BLACKBIT) // isblack(table) | ||
1626 | | mov edx, TABLE:edi->array | ||
1627 | | jnz >6 // Unlikely, but set barrier back. | ||
1628 | | lea TOP, BASE[ra+1+num] // Careful: TOP is edi. | ||
1629 | | | ||
1630 | |.tail | ||
1631 | |5: // A resize is unlikely (impossible?). NEWTABLE should've done it. | ||
1632 | | call &luaH_resizearray, L, TABLE:edi, batch+num | ||
1633 | | jmp <1 | ||
1634 | |6: // Avoid lots of valiswhite() checks -- black2gray(table) is ok. | ||
1635 | | call ->BARRIERBACK | ||
1636 | | jmp <1 // Need to reload edx. | ||
1637 | |.code | ||
1638 | } | ||
1639 | if (batch > 0) { | ||
1640 | | add edx, batch*#TVALUE // edx = &t->array[(batch+1)-1] | ||
1641 | } | ||
1642 | | lea ecx, BASE[ra+1] | ||
1643 | |3: // Copy stack slots to array. | ||
1644 | | mov eax, [ecx] | ||
1645 | | add ecx, aword*1 | ||
1646 | | mov [edx], eax | ||
1647 | | add edx, aword*1 | ||
1648 | | cmp ecx, TOP | ||
1649 | | jb <3 | ||
1650 | | | ||
1651 | |4: | ||
1652 | if (num == 0) { /* Previous op was open. Restore L->top. */ | ||
1653 | | lea TOP, BASE[J->pt->maxstacksize] // Faster than getting L->ci->top. | ||
1654 | | mov L->top, TOP | ||
1655 | } | ||
1656 | } | ||
1657 | |||
1658 | /* ------------------------------------------------------------------------ */ | ||
1659 | |||
1660 | static void jit_op_arith(jit_State *J, int dest, int rkb, int rkc, int ev) | ||
1661 | { | ||
1662 | const TValue *kkb = ISK(rkb) ? &J->pt->k[INDEXK(rkb)] : NULL; | ||
1663 | const TValue *kkc = ISK(rkc) ? &J->pt->k[INDEXK(rkc)] : NULL; | ||
1664 | const Value *kval; | ||
1665 | int idx, rev; | ||
1666 | int target = (ev == TM_LT || ev == TM_LE) ? jit_jmp_target(J) : 0; | ||
1667 | int hastail = 0; | ||
1668 | |||
1669 | /* The bytecode compiler already folds constants except for: k/0, k%0, */ | ||
1670 | /* NaN results, k1<k2, k1<=k2. No point in optimizing these cases. */ | ||
1671 | if (ISK(rkb&rkc)) goto fallback; | ||
1672 | |||
1673 | /* Avoid optimization when non-numeric constants are present. */ | ||
1674 | if (kkb ? !ttisnumber(kkb) : (kkc && !ttisnumber(kkc))) goto fallback; | ||
1675 | |||
1676 | /* The TYPE hint selects numeric inlining and/or fallback encoding. */ | ||
1677 | switch (ttype(hint_get(J, TYPE))) { | ||
1678 | case LUA_TNIL: hastail = 1; break; /* No hint: numeric + fallback. */ | ||
1679 | case LUA_TNUMBER: break; /* Numbers: numeric + deoptimization. */ | ||
1680 | default: goto fallback; /* Mixed/other types: fallback only. */ | ||
1681 | } | ||
1682 | |||
1683 | /* The checks above ensure: at most one of the operands is a constant. */ | ||
1684 | /* Reverse operation and swap operands so the 2nd operand is a variable. */ | ||
1685 | if (kkc) { kval = &kkc->value; idx = rkb; rev = 1; } | ||
1686 | else { kval = kkb ? &kkb->value : NULL; idx = rkc; rev = 0; } | ||
1687 | |||
1688 | /* Special handling for some operators. */ | ||
1689 | switch (ev) { | ||
1690 | case TM_MOD: | ||
1691 | /* Check for modulo with positive numbers, so we can use fprem. */ | ||
1692 | if (kval) { | ||
1693 | if (kval->na[1] < 0) { hastail = 0; goto fallback; } /* x%-k, -k%x */ | ||
1694 | | isnumber idx | ||
1695 | | mov eax, BASE[idx].value.na[1] | ||
1696 | | jne L_DEOPTIMIZEF | ||
1697 | | test eax, eax; js L_DEOPTIMIZEF | ||
1698 | |// This will trigger deoptimization in some benchmarks (pidigits). | ||
1699 | |// But it's still a win. | ||
1700 | if (kkb) { | ||
1701 | | fld qword BASE[rkc].value | ||
1702 | | fld qword [kval] | ||
1703 | } else { | ||
1704 | | fld qword [kval] | ||
1705 | | fld qword BASE[rkb].value | ||
1706 | } | ||
1707 | } else { | ||
1708 | | isnumber2 rkb, rkc | ||
1709 | | mov eax, BASE[rkb].value.na[1] | ||
1710 | | jne L_DEOPTIMIZEF | ||
1711 | | or eax, BASE[rkc].value.na[1]; js L_DEOPTIMIZEF | ||
1712 | | fld qword BASE[rkc].value | ||
1713 | | fld qword BASE[rkb].value | ||
1714 | } | ||
1715 | |1: ; fprem; fnstsw ax; sahf; jp <1 | ||
1716 | | fstp st1 | ||
1717 | goto fpstore; | ||
1718 | case TM_POW: | ||
1719 | if (hastail || !kval) break; /* Avoid this if not optimizing. */ | ||
1720 | if (rev) { /* x^k for k > 0, k integer. */ | ||
1721 | lua_Number n = kval->n; | ||
1722 | int k; | ||
1723 | lua_number2int(k, n); | ||
1724 | /* All positive integers would work. But need to limit code explosion. */ | ||
1725 | if (k > 0 && k <= 65536 && (lua_Number)k == n) { | ||
1726 | | isnumber idx; jne L_DEOPTIMIZEF | ||
1727 | | fld qword BASE[idx] | ||
1728 | for (; (k & 1) == 0; k >>= 1) { /* Handle leading zeroes (2^k). */ | ||
1729 | | fmul st0 | ||
1730 | } | ||
1731 | if ((k >>= 1) != 0) { /* Handle trailing bits. */ | ||
1732 | | fld st0 | ||
1733 | | fmul st0 | ||
1734 | for (; k != 1; k >>= 1) { | ||
1735 | if (k & 1) { | ||
1736 | | fmul st1, st0 | ||
1737 | } | ||
1738 | | fmul st0 | ||
1739 | } | ||
1740 | | fmulp st1 | ||
1741 | } | ||
1742 | goto fpstore; | ||
1743 | } | ||
1744 | } else if (kval->n > (lua_Number)0) { /* k^x for k > 0. */ | ||
1745 | int log2kval[3]; /* Enough storage for a tword (80 bits). */ | ||
1746 | log2kval[2] = 0; /* Avoid leaking garbage. */ | ||
1747 | /* Double precision log2(k) doesn't cut it (3^x != 3 for x = 1). */ | ||
1748 | ((void (*)(int *, double))J->jsub[JSUB_LOG2_TWORD])(log2kval, kval->n); | ||
1749 | | mov ARG1, log2kval[0] // Abuse stack for tword const. | ||
1750 | | mov ARG2, log2kval[1] | ||
1751 | | mov ARG3, log2kval[2] // TODO: store2load fwd stall. | ||
1752 | | isnumber idx; jne L_DEOPTIMIZEF | ||
1753 | | fld tword [esp] | ||
1754 | | fmul qword BASE[idx].value // log2(k)*x | ||
1755 | | fld st0; frndint; fsub st1, st0; fxch // Split into fract/int part. | ||
1756 | | f2xm1; fld1; faddp st1; fscale // (2^fract-1 +1) << int. | ||
1757 | | fstp st1 | ||
1758 | |||
1759 | |.jsub LOG2_TWORD // Calculate log2(k) with max. precision. | ||
1760 | |// Called with (int *ptr, double k). | ||
1761 | | fld1; fld FPARG2 // Offset ok due to retaddr. | ||
1762 | | fyl2x | ||
1763 | | mov eax, ARG2 // Really ARG1. | ||
1764 | | fstp tword [eax] | ||
1765 | | ret | ||
1766 | |.endjsub | ||
1767 | goto fpstore; | ||
1768 | } | ||
1769 | break; | ||
1770 | } | ||
1771 | |||
1772 | /* Check number type and load 1st operand. */ | ||
1773 | if (kval) { | ||
1774 | | isnumber idx; jne L_DEOPTIMIZEF | ||
1775 | | loadnvaluek kval | ||
1776 | } else { | ||
1777 | if (rkb == rkc) { | ||
1778 | | isnumber rkb | ||
1779 | } else { | ||
1780 | | isnumber2 rkb, rkc | ||
1781 | } | ||
1782 | | jne L_DEOPTIMIZEF | ||
1783 | | fld qword BASE[rkb].value | ||
1784 | } | ||
1785 | |||
1786 | /* Encode arithmetic operation with 2nd operand. */ | ||
1787 | switch ((ev<<1)+rev) { | ||
1788 | case TM_ADD<<1: case (TM_ADD<<1)+1: | ||
1789 | if (rkb == rkc) { | ||
1790 | | fadd st0 | ||
1791 | } else { | ||
1792 | | fadd qword BASE[idx].value | ||
1793 | } | ||
1794 | break; | ||
1795 | case TM_SUB<<1: | ||
1796 | | fsub qword BASE[idx].value | ||
1797 | break; | ||
1798 | case (TM_SUB<<1)+1: | ||
1799 | | fsubr qword BASE[idx].value | ||
1800 | break; | ||
1801 | case TM_MUL<<1: case (TM_MUL<<1)+1: | ||
1802 | if (rkb == rkc) { | ||
1803 | | fmul st0 | ||
1804 | } else { | ||
1805 | | fmul qword BASE[idx].value | ||
1806 | } | ||
1807 | break; | ||
1808 | case TM_DIV<<1: | ||
1809 | | fdiv qword BASE[idx].value | ||
1810 | break; | ||
1811 | case (TM_DIV<<1)+1: | ||
1812 | | fdivr qword BASE[idx].value | ||
1813 | break; | ||
1814 | case TM_POW<<1: | ||
1815 | | sub esp, S2LFRAME_OFFSET | ||
1816 | | fstp FPARG1 | ||
1817 | | fld qword BASE[idx].value | ||
1818 | | fstp FPARG2 | ||
1819 | | call &pow | ||
1820 | | add esp, S2LFRAME_OFFSET | ||
1821 | break; | ||
1822 | case (TM_POW<<1)+1: | ||
1823 | | sub esp, S2LFRAME_OFFSET | ||
1824 | | fstp FPARG2 | ||
1825 | | fld qword BASE[idx].value | ||
1826 | | fstp FPARG1 | ||
1827 | | call &pow | ||
1828 | | add esp, S2LFRAME_OFFSET | ||
1829 | break; | ||
1830 | case TM_UNM<<1: case (TM_UNM<<1)+1: | ||
1831 | | fchs // No 2nd operand. | ||
1832 | break; | ||
1833 | default: /* TM_LT or TM_LE. */ | ||
1834 | | fld qword BASE[idx].value | ||
1835 | | fcomparepp | ||
1836 | | jp =>dest?(J->nextpc+1):target // Unordered means false. | ||
1837 | jit_assert(dest == 0 || dest == 1); /* Really cond. */ | ||
1838 | switch (((rev^dest)<<1)+(dest^(ev == TM_LT))) { | ||
1839 | case 0: | ||
1840 | | jb =>target | ||
1841 | break; | ||
1842 | case 1: | ||
1843 | | jbe =>target | ||
1844 | break; | ||
1845 | case 2: | ||
1846 | | ja =>target | ||
1847 | break; | ||
1848 | case 3: | ||
1849 | | jae =>target | ||
1850 | break; | ||
1851 | } | ||
1852 | goto skipstore; | ||
1853 | } | ||
1854 | fpstore: | ||
1855 | /* Store result and set result type (if necessary). */ | ||
1856 | | fstp qword BASE[dest].value | ||
1857 | if (dest != rkb && dest != rkc) { | ||
1858 | | settt BASE[dest], LUA_TNUMBER | ||
1859 | } | ||
1860 | |||
1861 | skipstore: | ||
1862 | if (!hastail) { | ||
1863 | jit_deopt_target(J, 0); | ||
1864 | return; | ||
1865 | } | ||
1866 | |||
1867 | |4: | ||
1868 | |.tail | ||
1869 | |L_DEOPTLABEL: // Recycle as fallback label. | ||
1870 | |||
1871 | fallback: | ||
1872 | /* Generic fallback for arithmetic ops. */ | ||
1873 | if (kkb) { | ||
1874 | | mov ecx, &kkb | ||
1875 | } else { | ||
1876 | | lea ecx, BASE[rkb] | ||
1877 | } | ||
1878 | if (kkc) { | ||
1879 | | mov edx, &kkc | ||
1880 | } else { | ||
1881 | | lea edx, BASE[rkc] | ||
1882 | } | ||
1883 | if (target) { /* TM_LT or TM_LE. */ | ||
1884 | | mov L->savedpc, &(J->nextins+1) | ||
1885 | | call &ev==TM_LT?luaV_lessthan:luaV_lessequal, L, ecx, edx | ||
1886 | | test eax, eax | ||
1887 | | mov BASE, L->base | ||
1888 | if (dest) { /* cond */ | ||
1889 | | jnz =>target | ||
1890 | } else { | ||
1891 | | jz =>target | ||
1892 | } | ||
1893 | } else { | ||
1894 | | addidx BASE, dest | ||
1895 | | mov L->savedpc, &J->nextins | ||
1896 | | call &luaV_arith, L, BASE, ecx, edx, ev | ||
1897 | | mov BASE, L->base | ||
1898 | } | ||
1899 | |||
1900 | if (hastail) { | ||
1901 | | jmp <4 | ||
1902 | |.code | ||
1903 | } | ||
1904 | } | ||
1905 | |||
1906 | /* ------------------------------------------------------------------------ */ | ||
1907 | |||
1908 | static void jit_fallback_len(lua_State *L, StkId ra, const TValue *rb) | ||
1909 | { | ||
1910 | switch (ttype(rb)) { | ||
1911 | case LUA_TTABLE: | ||
1912 | setnvalue(ra, cast_num(luaH_getn(hvalue(rb)))); | ||
1913 | break; | ||
1914 | case LUA_TSTRING: | ||
1915 | setnvalue(ra, cast_num(tsvalue(rb)->len)); | ||
1916 | break; | ||
1917 | default: { | ||
1918 | const TValue *tm = luaT_gettmbyobj(L, rb, TM_LEN); | ||
1919 | if (ttisfunction(tm)) { | ||
1920 | ptrdiff_t rasave = savestack(L, ra); | ||
1921 | setobj2s(L, L->top, tm); | ||
1922 | setobj2s(L, L->top+1, rb); | ||
1923 | luaD_checkstack(L, 2); | ||
1924 | L->top += 2; | ||
1925 | luaD_call(L, L->top - 2, 1); | ||
1926 | ra = restorestack(L, rasave); | ||
1927 | L->top--; | ||
1928 | setobjs2s(L, ra, L->top); | ||
1929 | } else { | ||
1930 | luaG_typeerror(L, rb, "get length of"); | ||
1931 | } | ||
1932 | break; | ||
1933 | } | ||
1934 | } | ||
1935 | } | ||
1936 | |||
1937 | static void jit_op_len(jit_State *J, int dest, int rb) | ||
1938 | { | ||
1939 | switch (ttype(hint_get(J, TYPE))) { | ||
1940 | case LUA_TTABLE: | ||
1941 | jit_deopt_target(J, 0); | ||
1942 | | istable rb | ||
1943 | | mov TABLE:ecx, BASE[rb].value | ||
1944 | | jne L_DEOPTIMIZE // TYPE hint was wrong? | ||
1945 | | call &luaH_getn, TABLE:ecx | ||
1946 | | mov TMP1, eax | ||
1947 | | fild dword TMP1 | ||
1948 | | fstp qword BASE[dest].value | ||
1949 | | settt BASE[dest], LUA_TNUMBER | ||
1950 | break; | ||
1951 | case LUA_TSTRING: | ||
1952 | jit_deopt_target(J, 0); | ||
1953 | | isstring rb | ||
1954 | | mov TSTRING:ecx, BASE[rb].value | ||
1955 | | jne L_DEOPTIMIZE // TYPE hint was wrong? | ||
1956 | | fild aword TSTRING:ecx->tsv.len // size_t | ||
1957 | | fstp qword BASE[dest].value | ||
1958 | | settt BASE[dest], LUA_TNUMBER | ||
1959 | break; | ||
1960 | default: | ||
1961 | | lea TVALUE:ecx, BASE[rb] | ||
1962 | | addidx BASE, dest | ||
1963 | | mov L->savedpc, &J->nextins | ||
1964 | | call &jit_fallback_len, L, BASE, TVALUE:ecx | ||
1965 | | mov BASE, L->base | ||
1966 | break; | ||
1967 | } | ||
1968 | } | ||
1969 | |||
1970 | static void jit_op_not(jit_State *J, int dest, int rb) | ||
1971 | { | ||
1972 | /* l_isfalse() without a branch -- truly devious. */ | ||
1973 | /* ((value & tt) | (tt>>1)) is only zero for nil/false. */ | ||
1974 | /* Assumes: LUA_TNIL == 0, LUA_TBOOLEAN == 1, bvalue() == 0/1 */ | ||
1975 | | mov eax, BASE[rb].tt | ||
1976 | | mov ecx, BASE[rb].value | ||
1977 | | mov edx, 1 | ||
1978 | | and ecx, eax | ||
1979 | | shr eax, 1 | ||
1980 | | or ecx, eax | ||
1981 | | xor eax, eax | ||
1982 | | cmp ecx, edx | ||
1983 | | adc eax, eax | ||
1984 | | mov BASE[dest].tt, edx | ||
1985 | | mov BASE[dest].value, eax | ||
1986 | } | ||
1987 | |||
1988 | /* ------------------------------------------------------------------------ */ | ||
1989 | |||
1990 | static void jit_op_concat(jit_State *J, int dest, int first, int last) | ||
1991 | { | ||
1992 | int num = last-first+1; | ||
1993 | if (num == 2 && ttisstring(hint_get(J, TYPE))) { /* Optimize common case. */ | ||
1994 | | addidx BASE, first | ||
1995 | | call ->CONCAT_STR2 | ||
1996 | | setsvalue BASE[dest], eax | ||
1997 | } else { /* Generic fallback. */ | ||
1998 | | mov L->savedpc, &J->nextins | ||
1999 | | call &luaV_concat, L, num, last | ||
2000 | | mov BASE, L->base | ||
2001 | if (dest != first) { | ||
2002 | | copyslot BASE[dest], BASE[first] | ||
2003 | } | ||
2004 | } | ||
2005 | jit_checkGC(J); /* Always do this, even for the optimized variant. */ | ||
2006 | |||
2007 | |.jsub CONCAT_STR2 // Concatenate two strings. | ||
2008 | |// Call with: BASE (first). Destroys all regs. L and BASE restored. | ||
2009 | | mov ARG2, L // Save L (esi). | ||
2010 | | mov eax, BASE[0].tt; shl eax, 4; or eax, BASE[1].tt | ||
2011 | | sub eax, LUA_TSTR_STR // eax = 0 on success. | ||
2012 | | jne ->DEOPTIMIZE_CALLER // Wrong types? Deoptimize. | ||
2013 | | | ||
2014 | |1: | ||
2015 | | mov GL:edi, L->l_G | ||
2016 | | mov TSTRING:esi, BASE[0].value // Caveat: L (esi) is gone now! | ||
2017 | | mov TSTRING:edx, BASE[1].value | ||
2018 | | mov ecx, TSTRING:esi->tsv.len // size_t | ||
2019 | | test ecx, ecx | ||
2020 | | jz >2 // 1st string is empty? | ||
2021 | | or eax, TSTRING:edx->tsv.len // eax is known to be zero. | ||
2022 | | jz >4 // 2nd string is empty? | ||
2023 | | add eax, ecx | ||
2024 | | jc >9 // Length overflow? | ||
2025 | | cmp eax, GL:edi->buff.buffsize // size_t | ||
2026 | | ja >5 // Temp buffer overflow? | ||
2027 | | mov edi, GL:edi->buff.buffer | ||
2028 | | add esi, #TSTRING | ||
2029 | | rep; movsb // Copy first string. | ||
2030 | | mov ecx, TSTRING:edx->tsv.len | ||
2031 | | lea esi, TSTRING:edx[1] | ||
2032 | | rep; movsb // Copy second string. | ||
2033 | | | ||
2034 | | sub edi, eax // start = end - total. | ||
2035 | | mov L, ARG2 // Restore L (esi). Reuse as 1st arg. | ||
2036 | | mov ARG3, edi | ||
2037 | | mov ARG4, eax | ||
2038 | | mov BASE, L->base // Restore BASE. | ||
2039 | | jmp &luaS_newlstr | ||
2040 | | | ||
2041 | |2: // 1st string is empty. | ||
2042 | | mov eax, TSTRING:edx // Return 2nd string. | ||
2043 | |3: | ||
2044 | | mov L, ARG2 // Restore L (esi) and BASE. | ||
2045 | | mov BASE, L->base | ||
2046 | | ret | ||
2047 | | | ||
2048 | |4: // 2nd string is empty. | ||
2049 | | mov eax, TSTRING:esi // Return 1st string. | ||
2050 | | jmp <3 | ||
2051 | | | ||
2052 | |5: // Resize temp buffer. | ||
2053 | | // No need for setting L->savedpc since only LUA_ERRMEM may be thrown. | ||
2054 | | mov L, ARG2 // Restore L. | ||
2055 | | lea ecx, GL:edi->buff | ||
2056 | | sub esp, FRAME_OFFSET | ||
2057 | | call &luaZ_openspace, L, ecx, eax | ||
2058 | | add esp, FRAME_OFFSET | ||
2059 | | xor eax, eax // BASE (first) and L saved. eax = 0. | ||
2060 | | jmp <1 // Just restart. | ||
2061 | | | ||
2062 | |9: // Length overflow errors are rare (> 2 GB string required). | ||
2063 | | mov L, ARG2 // Need L for deoptimization. | ||
2064 | | jmp ->DEOPTIMIZE_CALLER | ||
2065 | |.endjsub | ||
2066 | } | ||
2067 | |||
2068 | /* ------------------------------------------------------------------------ */ | ||
2069 | |||
2070 | static void jit_op_eq(jit_State *J, int cond, int rkb, int rkc) | ||
2071 | { | ||
2072 | int target = jit_jmp_target(J); | ||
2073 | int condtarget = cond ? (J->nextpc+1) : target; | ||
2074 | jit_assert(cond == 0 || cond == 1); | ||
2075 | |||
2076 | /* Comparison of two constants. Evaluate at compile time. */ | ||
2077 | if (ISK(rkb&rkc)) { | ||
2078 | if ((rkb == rkc) == cond) { /* Constants are already unique. */ | ||
2079 | | jmp =>target | ||
2080 | } | ||
2081 | return; | ||
2082 | } | ||
2083 | |||
2084 | if (ISK(rkb|rkc)) { /* Compare a variable and a constant. */ | ||
2085 | const TValue *kk; | ||
2086 | if (ISK(rkb)) { int t = rkc; rkc = rkb; rkb = t; } /* rkc holds const. */ | ||
2087 | kk = &J->pt->k[INDEXK(rkc)]; | ||
2088 | switch (ttype(kk)) { | ||
2089 | case LUA_TNIL: | ||
2090 | | isnil rkb | ||
2091 | break; | ||
2092 | case LUA_TBOOLEAN: | ||
2093 | if (bvalue(kk)) { | ||
2094 | | mov eax, BASE[rkb].tt | ||
2095 | | mov ecx, BASE[rkb].value | ||
2096 | | dec eax | ||
2097 | | dec ecx | ||
2098 | | or eax, ecx | ||
2099 | } else { | ||
2100 | | mov eax, BASE[rkb].tt | ||
2101 | | dec eax | ||
2102 | | or eax, BASE[rkb].value | ||
2103 | } | ||
2104 | break; | ||
2105 | case LUA_TNUMBER: | ||
2106 | |// Note: bitwise comparison is not faster (and needs to handle -0 == 0). | ||
2107 | | isnumber rkb | ||
2108 | | jne =>condtarget | ||
2109 | | fld qword BASE[rkb].value | ||
2110 | | fld qword [&kk->value] | ||
2111 | | fcomparepp | ||
2112 | | jp =>condtarget // Unordered means not equal. | ||
2113 | break; | ||
2114 | case LUA_TSTRING: | ||
2115 | | isstring rkb | ||
2116 | | jne =>condtarget | ||
2117 | | cmp aword BASE[rkb].value, &rawtsvalue(kk) | ||
2118 | break; | ||
2119 | default: jit_assert(0); break; | ||
2120 | } | ||
2121 | } else { /* Compare two variables. */ | ||
2122 | | mov eax, BASE[rkb].tt | ||
2123 | | cmp eax, BASE[rkc].tt | ||
2124 | | jne =>condtarget | ||
2125 | switch (ttype(hint_get(J, TYPE))) { | ||
2126 | case LUA_TNUMBER: | ||
2127 | jit_deopt_target(J, 0); | ||
2128 | |// Note: bitwise comparison is not an option (-0 == 0, NaN ~= NaN). | ||
2129 | | cmp eax, LUA_TNUMBER; jne L_DEOPTIMIZE | ||
2130 | | fld qword BASE[rkb].value | ||
2131 | | fld qword BASE[rkc].value | ||
2132 | | fcomparepp | ||
2133 | | jp =>condtarget // Unordered means not equal. | ||
2134 | break; | ||
2135 | case LUA_TSTRING: | ||
2136 | jit_deopt_target(J, 0); | ||
2137 | | cmp eax, LUA_TSTRING; jne L_DEOPTIMIZE | ||
2138 | | mov ecx, BASE[rkb].value | ||
2139 | | cmp ecx, BASE[rkc].value | ||
2140 | break; | ||
2141 | default: | ||
2142 | |// Generic equality comparison fallback. | ||
2143 | | lea edx, BASE[rkc] | ||
2144 | | lea ecx, BASE[rkb] | ||
2145 | | mov L->savedpc, &J->nextins | ||
2146 | | call &luaV_equalval, L, ecx, edx | ||
2147 | | dec eax | ||
2148 | | mov BASE, L->base | ||
2149 | break; | ||
2150 | } | ||
2151 | } | ||
2152 | if (cond) { | ||
2153 | | je =>target | ||
2154 | } else { | ||
2155 | | jne =>target | ||
2156 | } | ||
2157 | } | ||
2158 | |||
2159 | /* ------------------------------------------------------------------------ */ | ||
2160 | |||
2161 | static void jit_op_test(jit_State *J, int cond, int dest, int src) | ||
2162 | { | ||
2163 | int target = jit_jmp_target(J); | ||
2164 | |||
2165 | /* l_isfalse() without a branch. But this time preserve tt/value. */ | ||
2166 | /* (((value & tt) * 2 + tt) >> 1) is only zero for nil/false. */ | ||
2167 | /* Assumes: 3*tt < 2^32, LUA_TNIL == 0, LUA_TBOOLEAN == 1, bvalue() == 0/1 */ | ||
2168 | | mov eax, BASE[src].tt | ||
2169 | | mov ecx, BASE[src].value | ||
2170 | | mov edx, eax | ||
2171 | | and edx, ecx | ||
2172 | | lea edx, [eax+edx*2] | ||
2173 | | shr edx, 1 | ||
2174 | |||
2175 | /* Check if we can omit the stack copy. */ | ||
2176 | if (dest == src) { /* Yes, invert branch condition. */ | ||
2177 | if (cond) { | ||
2178 | | jnz =>target | ||
2179 | } else { | ||
2180 | | jz =>target | ||
2181 | } | ||
2182 | } else { /* No, jump around copy code. */ | ||
2183 | if (cond) { | ||
2184 | | jz >1 | ||
2185 | } else { | ||
2186 | | jnz >1 | ||
2187 | } | ||
2188 | | mov edx, BASE[src].value.na[1] | ||
2189 | | mov BASE[dest].tt, eax | ||
2190 | | mov BASE[dest].value, ecx | ||
2191 | | mov BASE[dest].value.na[1], edx | ||
2192 | | jmp =>target | ||
2193 | |1: | ||
2194 | } | ||
2195 | } | ||
2196 | |||
2197 | static void jit_op_jmp(jit_State *J, int target) | ||
2198 | { | ||
2199 | | jmp =>target | ||
2200 | } | ||
2201 | |||
2202 | /* ------------------------------------------------------------------------ */ | ||
2203 | |||
2204 | enum { FOR_IDX, FOR_LIM, FOR_STP, FOR_EXT }; | ||
2205 | |||
2206 | static const char *const jit_for_coerce_error[] = { | ||
2207 | LUA_QL("for") " initial value must be a number", | ||
2208 | LUA_QL("for") " limit must be a number", | ||
2209 | LUA_QL("for") " step must be a number", | ||
2210 | }; | ||
2211 | |||
2212 | /* Try to coerce for slots with strings to numbers in place or complain. */ | ||
2213 | static void jit_for_coerce(lua_State *L, TValue *o) | ||
2214 | { | ||
2215 | int i; | ||
2216 | for (i = FOR_IDX; i <= FOR_STP; i++, o++) { | ||
2217 | lua_Number num; | ||
2218 | if (ttisnumber(o)) continue; | ||
2219 | if (ttisstring(o) && luaO_str2d(svalue(o), &num)) { | ||
2220 | setnvalue(o, num); | ||
2221 | } else { | ||
2222 | luaG_runerror(L, jit_for_coerce_error[i]); | ||
2223 | } | ||
2224 | } | ||
2225 | } | ||
2226 | |||
2227 | static void jit_op_forprep(jit_State *J, int ra, int target) | ||
2228 | { | ||
2229 | const TValue *step = hint_get(J, FOR_STEP_K); | ||
2230 | if (ttisnumber(step)) { | ||
2231 | | isnumber2 ra+FOR_IDX, ra+FOR_LIM; jne L_DEOPTIMIZEF | ||
2232 | |4: | ||
2233 | | fld qword BASE[ra+FOR_LIM].value // [lim] | ||
2234 | | fld qword BASE[ra+FOR_IDX].value // [idx lim] | ||
2235 | | fst qword BASE[ra+FOR_EXT].value // extidx = idx | ||
2236 | | fcomparepp // idx >< lim ? | ||
2237 | | settt BASE[ra+FOR_EXT], LUA_TNUMBER | ||
2238 | if (nvalue(step) < (lua_Number)0) { | ||
2239 | | jb =>target+1 // step < 0 && idx < lim: skip loop. | ||
2240 | } else { | ||
2241 | | ja =>target+1 // step >= 0 && idx > lim: skip loop. | ||
2242 | } | ||
2243 | } else { | ||
2244 | |4: | ||
2245 | | isnumber3 ra+FOR_IDX, ra+FOR_LIM, ra+FOR_STP | ||
2246 | | mov eax, BASE[ra+FOR_STP].value.na[1] // Sign bit is in hi dword. | ||
2247 | | jne L_DEOPTIMIZEF | ||
2248 | | fld qword BASE[ra+FOR_LIM].value // [lim] (FP stack notation) | ||
2249 | | fld qword BASE[ra+FOR_IDX].value // [idx lim] | ||
2250 | | test eax, eax // step >< 0 ? | ||
2251 | | fst qword BASE[ra+FOR_EXT].value // extidx = idx | ||
2252 | | js >1 | ||
2253 | | fxch // if (step > 0) [lim idx] | ||
2254 | |1: | ||
2255 | | fcomparepp // step > 0 ? lim < idx : idx < lim | ||
2256 | | settt BASE[ra+FOR_EXT], LUA_TNUMBER | ||
2257 | | jb =>target+1 // Skip loop. | ||
2258 | } | ||
2259 | if (ttisnumber(hint_get(J, TYPE))) { | ||
2260 | jit_deopt_target(J, 0); | ||
2261 | } else { | ||
2262 | |.tail | ||
2263 | |L_DEOPTLABEL: // Recycle as fallback label. | ||
2264 | | // Fallback for strings as loop vars. No need to make this fast. | ||
2265 | | lea eax, BASE[ra] | ||
2266 | | mov L->savedpc, &J->nextins | ||
2267 | | call &jit_for_coerce, L, eax // Coerce strings or throw error. | ||
2268 | | jmp <4 // Easier than reloading eax. | ||
2269 | |.code | ||
2270 | } | ||
2271 | } | ||
2272 | |||
2273 | static void jit_op_forloop(jit_State *J, int ra, int target) | ||
2274 | { | ||
2275 | const TValue *step = hint_getpc(J, FOR_STEP_K, target-1); | ||
2276 | if (ttisnumber(step)) { | ||
2277 | | fld qword BASE[ra+FOR_LIM].value // [lim] (FP stack notation) | ||
2278 | | fld qword BASE[ra+FOR_IDX].value // [idx lim] | ||
2279 | | fadd qword BASE[ra+FOR_STP].value // [nidx lim] | ||
2280 | | fst qword BASE[ra+FOR_EXT].value // extidx = nidx | ||
2281 | | fst qword BASE[ra+FOR_IDX].value // idx = nidx | ||
2282 | | settt BASE[ra+FOR_EXT], LUA_TNUMBER | ||
2283 | | fcomparepp // nidx >< lim ? | ||
2284 | if (nvalue(step) < (lua_Number)0) { | ||
2285 | | jae =>target // step < 0 && nidx >= lim: loop again. | ||
2286 | } else { | ||
2287 | | jbe =>target // step >= 0 && nidx <= lim: loop again. | ||
2288 | } | ||
2289 | } else { | ||
2290 | | mov eax, BASE[ra+FOR_STP].value.na[1] // Sign bit is in hi dword. | ||
2291 | | fld qword BASE[ra+FOR_LIM].value // [lim] (FP stack notation) | ||
2292 | | fld qword BASE[ra+FOR_IDX].value // [idx lim] | ||
2293 | | fld qword BASE[ra+FOR_STP].value // [stp idx lim] | ||
2294 | | faddp st1 // [nidx lim] | ||
2295 | | fst qword BASE[ra+FOR_IDX].value // idx = nidx | ||
2296 | | fst qword BASE[ra+FOR_EXT].value // extidx = nidx | ||
2297 | | settt BASE[ra+FOR_EXT], LUA_TNUMBER | ||
2298 | | test eax, eax // step >< 0 ? | ||
2299 | | js >1 | ||
2300 | | fxch // if (step > 0) [lim nidx] | ||
2301 | |1: | ||
2302 | | fcomparepp // step > 0 ? lim >= nidx : nidx >= lim | ||
2303 | | jae =>target // Loop again. | ||
2304 | } | ||
2305 | } | ||
2306 | |||
2307 | /* ------------------------------------------------------------------------ */ | ||
2308 | |||
2309 | static void jit_op_tforloop(jit_State *J, int ra, int nresults) | ||
2310 | { | ||
2311 | int target = jit_jmp_target(J); | ||
2312 | int i; | ||
2313 | if (jit_inline_tforloop(J, ra, nresults, target)) return; /* Inlined? */ | ||
2314 | for (i = 2; i >= 0; i--) { | ||
2315 | | copyslot BASE[ra+i+3], BASE[ra+i] // Copy ctlvar/state/callable. | ||
2316 | } | ||
2317 | jit_op_call(J, ra+3, 2, nresults); | ||
2318 | | isnil ra+3; je >1 | ||
2319 | | copyslot BASE[ra+2], BASE[ra+3] // Save control variable. | ||
2320 | | jmp =>target | ||
2321 | |1: | ||
2322 | } | ||
2323 | |||
2324 | /* ------------------------------------------------------------------------ */ | ||
2325 | |||
2326 | static void jit_op_close(jit_State *J, int ra) | ||
2327 | { | ||
2328 | if (ra) { | ||
2329 | | lea eax, BASE[ra] | ||
2330 | | mov ARG2, eax | ||
2331 | } else { | ||
2332 | | mov ARG2, BASE | ||
2333 | } | ||
2334 | | call &luaF_close, L // , StkId level (ARG2) | ||
2335 | } | ||
2336 | |||
2337 | static void jit_op_closure(jit_State *J, int dest, int ptidx) | ||
2338 | { | ||
2339 | Proto *npt = J->pt->p[ptidx]; | ||
2340 | int nup = npt->nups; | ||
2341 | | getLCL edi // LCL:edi is callee-saved. | ||
2342 | | mov edx, LCL:edi->env | ||
2343 | | call &luaF_newLclosure, L, nup, edx | ||
2344 | | mov LCL->p, &npt // Store new proto in returned closure. | ||
2345 | | mov aword BASE[dest].value, LCL // setclvalue() | ||
2346 | | settt BASE[dest], LUA_TFUNCTION | ||
2347 | /* Process pseudo-instructions for upvalues. */ | ||
2348 | if (nup > 0) { | ||
2349 | const Instruction *uvcode = J->nextins; | ||
2350 | int i, uvuv; | ||
2351 | /* Check which of the two types we need. */ | ||
2352 | for (i = 0, uvuv = 0; i < nup; i++) | ||
2353 | if (GET_OPCODE(uvcode[i]) == OP_GETUPVAL) uvuv++; | ||
2354 | /* Copy upvalues from parent first. */ | ||
2355 | if (uvuv) { | ||
2356 | /* LCL:eax->upvals (new closure) <-- LCL:edi->upvals (own closure). */ | ||
2357 | for (i = 0; i < nup; i++) | ||
2358 | if (GET_OPCODE(uvcode[i]) == OP_GETUPVAL) { | ||
2359 | | mov UPVAL:edx, LCL:edi->upvals[GETARG_B(uvcode[i])] | ||
2360 | | mov LCL->upvals[i], UPVAL:edx | ||
2361 | } | ||
2362 | } | ||
2363 | /* Next find or create upvalues for our own stack slots. */ | ||
2364 | if (nup > uvuv) { | ||
2365 | | mov LCL:edi, LCL // Move new closure to callee-save register. */ | ||
2366 | /* LCL:edi->upvals (new closure) <-- upvalue for stack slot. */ | ||
2367 | for (i = 0; i < nup; i++) | ||
2368 | if (GET_OPCODE(uvcode[i]) == OP_MOVE) { | ||
2369 | int rb = GETARG_B(uvcode[i]); | ||
2370 | if (rb) { | ||
2371 | | lea eax, BASE[rb] | ||
2372 | | mov ARG2, eax | ||
2373 | } else { | ||
2374 | | mov ARG2, BASE | ||
2375 | } | ||
2376 | | call &luaF_findupval, L // , StkId level (ARG2) | ||
2377 | | mov LCL:edi->upvals[i], UPVAL:eax | ||
2378 | } | ||
2379 | } | ||
2380 | J->combine += nup; /* Skip pseudo-instructions. */ | ||
2381 | } | ||
2382 | jit_checkGC(J); | ||
2383 | } | ||
2384 | |||
2385 | /* ------------------------------------------------------------------------ */ | ||
2386 | |||
2387 | static void jit_op_vararg(jit_State *J, int dest, int num) | ||
2388 | { | ||
2389 | if (num < 0) { /* Copy all varargs. */ | ||
2390 | |// Copy [ci->func+1+pt->numparams, BASE) -> [BASE+dest, *). | ||
2391 | |1: | ||
2392 | | mov CI, L->ci | ||
2393 | | mov edx, CI->func | ||
2394 | | add edx, (1+J->pt->numparams)*#TVALUE // Start of varargs. | ||
2395 | | | ||
2396 | | // luaD_checkstack(L, nvararg) with nvararg = L->base - vastart. | ||
2397 | | // This is a slight overallocation (BASE[dest+nvararg] would be enough). | ||
2398 | | // We duplicate OP_VARARG behaviour so we can use luaD_growstack(). | ||
2399 | | lea eax, [BASE+BASE+J->pt->maxstacksize*#TVALUE] // L->base + L->top | ||
2400 | | sub eax, edx // L->top + (L->base - vastart) | ||
2401 | | cmp eax, L->stack_last | ||
2402 | | jae >5 // Need to grow stack? | ||
2403 | | | ||
2404 | | lea TOP, BASE[dest] | ||
2405 | | cmp edx, BASE | ||
2406 | | jnb >3 | ||
2407 | |2: // Copy loop. | ||
2408 | | mov eax, [edx] | ||
2409 | | add edx, aword*1 | ||
2410 | | mov [TOP], eax | ||
2411 | | add TOP, aword*1 | ||
2412 | | cmp edx, BASE | ||
2413 | | jb <2 | ||
2414 | |3: | ||
2415 | |// This is an open ins. Must keep TOP for next instruction. | ||
2416 | | | ||
2417 | |.tail | ||
2418 | |5: // Grow stack for varargs. | ||
2419 | | sub eax, L->top | ||
2420 | | TValuediv eax | ||
2421 | | call &luaD_growstack, L, eax | ||
2422 | | mov BASE, L->base | ||
2423 | | jmp <1 // Just restart op to avoid saving/restoring regs. | ||
2424 | |.code | ||
2425 | } else if (num > 0) { /* Copy limited number of varargs. */ | ||
2426 | |// Copy [ci->func+1+pt->numparams, BASE) -> [BASE+dest, BASE+dest+num). | ||
2427 | | mov CI, L->ci | ||
2428 | | mov edx, CI->func | ||
2429 | | add edx, (1+J->pt->numparams)*#TVALUE | ||
2430 | | lea TOP, BASE[dest] | ||
2431 | | lea ecx, BASE[dest+num] | ||
2432 | | cmp edx, BASE // No varargs present: only fill. | ||
2433 | | jnb >2 | ||
2434 | | | ||
2435 | |1: // Copy loop. | ||
2436 | | mov eax, [edx] | ||
2437 | | add edx, aword*1 | ||
2438 | | mov [TOP], eax | ||
2439 | | add TOP, aword*1 | ||
2440 | | cmp TOP, ecx // Stop if all dest slots got a vararg. | ||
2441 | | jnb >4 | ||
2442 | | cmp edx, BASE // Continue if more varargs present. | ||
2443 | | jb <1 | ||
2444 | | | ||
2445 | |2: // Fill remaining slots with nils. | ||
2446 | | xor eax, eax // Assumes: LUA_TNIL == 0 | ||
2447 | |3: // Fill loop. | ||
2448 | | settt TOP[0], eax | ||
2449 | | add TOP, #TVALUE | ||
2450 | | cmp TOP, ecx | ||
2451 | | jb <3 | ||
2452 | |4: | ||
2453 | } | ||
2454 | } | ||
2455 | |||
2456 | /* ------------------------------------------------------------------------ */ | ||
2457 | |||