aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/luajit-2.0/src/buildvm_x86.dasc
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/luajit-2.0/src/buildvm_x86.dasc')
-rw-r--r--libraries/luajit-2.0/src/buildvm_x86.dasc6458
1 files changed, 6458 insertions, 0 deletions
diff --git a/libraries/luajit-2.0/src/buildvm_x86.dasc b/libraries/luajit-2.0/src/buildvm_x86.dasc
new file mode 100644
index 0000000..d6dfde8
--- /dev/null
+++ b/libraries/luajit-2.0/src/buildvm_x86.dasc
@@ -0,0 +1,6458 @@
1|// Low-level VM code for x86 CPUs.
2|// Bytecode interpreter, fast functions and helper functions.
3|// Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
4|
5|.if X64
6|.arch x64
7|.else
8|.arch x86
9|.endif
10|.section code_op, code_sub
11|
12|.actionlist build_actionlist
13|.globals GLOB_
14|.globalnames globnames
15|.externnames extnames
16|
17|//-----------------------------------------------------------------------
18|
19|// Fixed register assignments for the interpreter.
20|// This is very fragile and has many dependencies. Caveat emptor.
21|.define BASE, edx // Not C callee-save, refetched anyway.
22|.if not X64
23|.define KBASE, edi // Must be C callee-save.
24|.define KBASEa, KBASE
25|.define PC, esi // Must be C callee-save.
26|.define PCa, PC
27|.define DISPATCH, ebx // Must be C callee-save.
28|.elif X64WIN
29|.define KBASE, edi // Must be C callee-save.
30|.define KBASEa, rdi
31|.define PC, esi // Must be C callee-save.
32|.define PCa, rsi
33|.define DISPATCH, ebx // Must be C callee-save.
34|.else
35|.define KBASE, r15d // Must be C callee-save.
36|.define KBASEa, r15
37|.define PC, ebx // Must be C callee-save.
38|.define PCa, rbx
39|.define DISPATCH, r14d // Must be C callee-save.
40|.endif
41|
42|.define RA, ecx
43|.define RAH, ch
44|.define RAL, cl
45|.define RB, ebp // Must be ebp (C callee-save).
46|.define RC, eax // Must be eax (fcomparepp and others).
47|.define RCW, ax
48|.define RCH, ah
49|.define RCL, al
50|.define OP, RB
51|.define RD, RC
52|.define RDW, RCW
53|.define RDL, RCL
54|.if X64
55|.define RAa, rcx
56|.define RBa, rbp
57|.define RCa, rax
58|.define RDa, rax
59|.else
60|.define RAa, RA
61|.define RBa, RB
62|.define RCa, RC
63|.define RDa, RD
64|.endif
65|
66|.if not X64
67|.define FCARG1, ecx // x86 fastcall arguments.
68|.define FCARG2, edx
69|.elif X64WIN
70|.define CARG1, rcx // x64/WIN64 C call arguments.
71|.define CARG2, rdx
72|.define CARG3, r8
73|.define CARG4, r9
74|.define CARG1d, ecx
75|.define CARG2d, edx
76|.define CARG3d, r8d
77|.define CARG4d, r9d
78|.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
79|.define FCARG2, CARG2d
80|.else
81|.define CARG1, rdi // x64/POSIX C call arguments.
82|.define CARG2, rsi
83|.define CARG3, rdx
84|.define CARG4, rcx
85|.define CARG5, r8
86|.define CARG6, r9
87|.define CARG1d, edi
88|.define CARG2d, esi
89|.define CARG3d, edx
90|.define CARG4d, ecx
91|.define CARG5d, r8d
92|.define CARG6d, r9d
93|.define FCARG1, CARG1d // Simulate x86 fastcall.
94|.define FCARG2, CARG2d
95|.endif
96|
97|// Type definitions. Some of these are only used for documentation.
98|.type L, lua_State
99|.type GL, global_State
100|.type TVALUE, TValue
101|.type GCOBJ, GCobj
102|.type STR, GCstr
103|.type TAB, GCtab
104|.type LFUNC, GCfuncL
105|.type CFUNC, GCfuncC
106|.type PROTO, GCproto
107|.type UPVAL, GCupval
108|.type NODE, Node
109|.type NARGS, int
110|.type TRACE, GCtrace
111|
112|// Stack layout while in interpreter. Must match with lj_frame.h.
113|//-----------------------------------------------------------------------
114|.if not X64 // x86 stack layout.
115|
116|.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
117|.macro saveregs_
118| push edi; push esi; push ebx
119| sub esp, CFRAME_SPACE
120|.endmacro
121|.macro saveregs
122| push ebp; saveregs_
123|.endmacro
124|.macro restoreregs
125| add esp, CFRAME_SPACE
126| pop ebx; pop esi; pop edi; pop ebp
127|.endmacro
128|
129|.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
130|.define SAVE_NRES, aword [esp+aword*14]
131|.define SAVE_CFRAME, aword [esp+aword*13]
132|.define SAVE_L, aword [esp+aword*12]
133|//----- 16 byte aligned, ^^^ arguments from C caller
134|.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
135|.define SAVE_R4, aword [esp+aword*10]
136|.define SAVE_R3, aword [esp+aword*9]
137|.define SAVE_R2, aword [esp+aword*8]
138|//----- 16 byte aligned
139|.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
140|.define SAVE_PC, aword [esp+aword*6]
141|.define TMP2, aword [esp+aword*5]
142|.define TMP1, aword [esp+aword*4]
143|//----- 16 byte aligned
144|.define ARG4, aword [esp+aword*3]
145|.define ARG3, aword [esp+aword*2]
146|.define ARG2, aword [esp+aword*1]
147|.define ARG1, aword [esp] //<-- esp while in interpreter.
148|//----- 16 byte aligned, ^^^ arguments for C callee
149|
150|// FPARGx overlaps ARGx and ARG(x+1) on x86.
151|.define FPARG3, qword [esp+qword*1]
152|.define FPARG1, qword [esp]
153|// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
154|.define TMPQ, qword [esp+aword*4]
155|.define TMP3, ARG4
156|.define ARG5, TMP1
157|.define TMPa, TMP1
158|.define MULTRES, TMP2
159|
160|// Arguments for vm_call and vm_pcall.
161|.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
162|
163|// Arguments for vm_cpcall.
164|.define INARG_CP_CALL, SAVE_ERRF
165|.define INARG_CP_UD, SAVE_NRES
166|.define INARG_CP_FUNC, SAVE_CFRAME
167|
168|//-----------------------------------------------------------------------
169|.elif X64WIN // x64/Windows stack layout
170|
171|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
172|.macro saveregs_
173| push rdi; push rsi; push rbx
174| sub rsp, CFRAME_SPACE
175|.endmacro
176|.macro saveregs
177| push rbp; saveregs_
178|.endmacro
179|.macro restoreregs
180| add rsp, CFRAME_SPACE
181| pop rbx; pop rsi; pop rdi; pop rbp
182|.endmacro
183|
184|.define SAVE_CFRAME, aword [rsp+aword*13]
185|.define SAVE_PC, dword [rsp+dword*25]
186|.define SAVE_L, dword [rsp+dword*24]
187|.define SAVE_ERRF, dword [rsp+dword*23]
188|.define SAVE_NRES, dword [rsp+dword*22]
189|.define TMP2, dword [rsp+dword*21]
190|.define TMP1, dword [rsp+dword*20]
191|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
192|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
193|.define SAVE_R4, aword [rsp+aword*8]
194|.define SAVE_R3, aword [rsp+aword*7]
195|.define SAVE_R2, aword [rsp+aword*6]
196|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
197|.define ARG5, aword [rsp+aword*4]
198|.define CSAVE_4, aword [rsp+aword*3]
199|.define CSAVE_3, aword [rsp+aword*2]
200|.define CSAVE_2, aword [rsp+aword*1]
201|.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
202|//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
203|
204|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
205|.define TMPQ, qword [rsp+aword*10]
206|.define MULTRES, TMP2
207|.define TMPa, ARG5
208|.define ARG5d, dword [rsp+aword*4]
209|.define TMP3, ARG5d
210|
211|//-----------------------------------------------------------------------
212|.else // x64/POSIX stack layout
213|
214|.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
215|.macro saveregs_
216| push rbx; push r15; push r14
217| sub rsp, CFRAME_SPACE
218|.endmacro
219|.macro saveregs
220| push rbp; saveregs_
221|.endmacro
222|.macro restoreregs
223| add rsp, CFRAME_SPACE
224| pop r14; pop r15; pop rbx; pop rbp
225|.endmacro
226|
227|//----- 16 byte aligned,
228|.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
229|.define SAVE_R4, aword [rsp+aword*8]
230|.define SAVE_R3, aword [rsp+aword*7]
231|.define SAVE_R2, aword [rsp+aword*6]
232|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
233|.define SAVE_CFRAME, aword [rsp+aword*4]
234|.define SAVE_PC, dword [rsp+dword*7]
235|.define SAVE_L, dword [rsp+dword*6]
236|.define SAVE_ERRF, dword [rsp+dword*5]
237|.define SAVE_NRES, dword [rsp+dword*4]
238|.define TMPa, aword [rsp+aword*1]
239|.define TMP2, dword [rsp+dword*1]
240|.define TMP1, dword [rsp] //<-- rsp while in interpreter.
241|//----- 16 byte aligned
242|
243|// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
244|.define TMPQ, qword [rsp]
245|.define TMP3, dword [rsp+aword*1]
246|.define MULTRES, TMP2
247|
248|.endif
249|
250|//-----------------------------------------------------------------------
251|
252|// Instruction headers.
253|.macro ins_A; .endmacro
254|.macro ins_AD; .endmacro
255|.macro ins_AJ; .endmacro
256|.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
257|.macro ins_AB_; movzx RB, RCH; .endmacro
258|.macro ins_A_C; movzx RC, RCL; .endmacro
259|.macro ins_AND; not RDa; .endmacro
260|
261|// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
262|.macro ins_NEXT
263| mov RC, [PC]
264| movzx RA, RCH
265| movzx OP, RCL
266| add PC, 4
267| shr RC, 16
268|.if X64
269| jmp aword [DISPATCH+OP*8]
270|.else
271| jmp aword [DISPATCH+OP*4]
272|.endif
273|.endmacro
274|
275|// Instruction footer.
276|.if 1
277| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
278| .define ins_next, ins_NEXT
279| .define ins_next_, ins_NEXT
280|.else
281| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
282| // Affects only certain kinds of benchmarks (and only with -j off).
283| // Around 10%-30% slower on Core2, a lot more slower on P4.
284| .macro ins_next
285| jmp ->ins_next
286| .endmacro
287| .macro ins_next_
288| ->ins_next:
289| ins_NEXT
290| .endmacro
291|.endif
292|
293|// Call decode and dispatch.
294|.macro ins_callt
295| // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
296| mov PC, LFUNC:RB->pc
297| mov RA, [PC]
298| movzx OP, RAL
299| movzx RA, RAH
300| add PC, 4
301|.if X64
302| jmp aword [DISPATCH+OP*8]
303|.else
304| jmp aword [DISPATCH+OP*4]
305|.endif
306|.endmacro
307|
308|.macro ins_call
309| // BASE = new base, RB = LFUNC, RD = nargs+1
310| mov [BASE-4], PC
311| ins_callt
312|.endmacro
313|
314|//-----------------------------------------------------------------------
315|
316|// Macros to test operand types.
317|.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
318|.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
319|.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
320|.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
321|.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
322|
323|// These operands must be used with movzx.
324|.define PC_OP, byte [PC-4]
325|.define PC_RA, byte [PC-3]
326|.define PC_RB, byte [PC-1]
327|.define PC_RC, byte [PC-2]
328|.define PC_RD, word [PC-2]
329|
330|.macro branchPC, reg
331| lea PC, [PC+reg*4-BCBIAS_J*4]
332|.endmacro
333|
334|// Assumes DISPATCH is relative to GL.
335#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
336#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
337|
338#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
339|
340|// Decrement hashed hotcount and trigger trace recorder if zero.
341|.macro hotloop, reg
342| mov reg, PC
343| shr reg, 1
344| and reg, HOTCOUNT_PCMASK
345| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
346| jb ->vm_hotloop
347|.endmacro
348|
349|.macro hotcall, reg
350| mov reg, PC
351| shr reg, 1
352| and reg, HOTCOUNT_PCMASK
353| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
354| jb ->vm_hotcall
355|.endmacro
356|
357|// Set current VM state.
358|.macro set_vmstate, st
359| mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
360|.endmacro
361|
362|// Annoying x87 stuff: support for two compare variants.
363|.macro fcomparepp // Compare and pop st0 >< st1.
364||if (cmov) {
365| fucomip st1
366| fpop
367||} else {
368| fucompp
369| fnstsw ax // eax modified!
370| sahf
371||}
372|.endmacro
373|
374|.macro fdup; fld st0; .endmacro
375|.macro fpop1; fstp st1; .endmacro
376|
377|// Synthesize SSE FP constants.
378|.macro sseconst_abs, reg, tmp // Synthesize abs mask.
379|.if X64
380| mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
381|.else
382| pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
383|.endif
384|.endmacro
385|
386|.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
387|.if X64
388| mov64 tmp, U64x(val,00000000); movd reg, tmp
389|.else
390| mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
391|.endif
392|.endmacro
393|
394|.macro sseconst_sign, reg, tmp // Synthesize sign mask.
395| sseconst_hi reg, tmp, 80000000
396|.endmacro
397|.macro sseconst_1, reg, tmp // Synthesize 1.0.
398| sseconst_hi reg, tmp, 3ff00000
399|.endmacro
400|.macro sseconst_m1, reg, tmp // Synthesize -1.0.
401| sseconst_hi reg, tmp, bff00000
402|.endmacro
403|.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
404| sseconst_hi reg, tmp, 43300000
405|.endmacro
406|.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
407| sseconst_hi reg, tmp, 43380000
408|.endmacro
409|
410|// Move table write barrier back. Overwrites reg.
411|.macro barrierback, tab, reg
412| and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
413| mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
414| mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
415| mov tab->gclist, reg
416|.endmacro
417|
418|//-----------------------------------------------------------------------
419
420/* Generate subroutines used by opcodes and other parts of the VM. */
421/* The .code_sub section should be last to help static branch prediction. */
422static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
423{
424 |.code_sub
425 |
426 |//-----------------------------------------------------------------------
427 |//-- Return handling ----------------------------------------------------
428 |//-----------------------------------------------------------------------
429 |
430 |->vm_returnp:
431 | test PC, FRAME_P
432 | jz ->cont_dispatch
433 |
434 | // Return from pcall or xpcall fast func.
435 | and PC, -8
436 | sub BASE, PC // Restore caller base.
437 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
438 | mov PC, [BASE-4] // Fetch PC of previous frame.
439 | // Prepending may overwrite the pcall frame, so do it at the end.
440 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
441 |
442 |->vm_returnc:
443 | add RD, 1 // RD = nresults+1
444 | mov MULTRES, RD
445 | test PC, FRAME_TYPE
446 | jz ->BC_RET_Z // Handle regular return to Lua.
447 |
448 |->vm_return:
449 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
450 | xor PC, FRAME_C
451 | test PC, FRAME_TYPE
452 | jnz ->vm_returnp
453 |
454 | // Return to C.
455 | set_vmstate C
456 | and PC, -8
457 | sub PC, BASE
458 | neg PC // Previous base = BASE - delta.
459 |
460 | sub RD, 1
461 | jz >2
462 |1: // Move results down.
463 |.if X64
464 | mov RBa, [BASE+RA]
465 | mov [BASE-8], RBa
466 |.else
467 | mov RB, [BASE+RA]
468 | mov [BASE-8], RB
469 | mov RB, [BASE+RA+4]
470 | mov [BASE-4], RB
471 |.endif
472 | add BASE, 8
473 | sub RD, 1
474 | jnz <1
475 |2:
476 | mov L:RB, SAVE_L
477 | mov L:RB->base, PC
478 |3:
479 | mov RD, MULTRES
480 | mov RA, SAVE_NRES // RA = wanted nresults+1
481 |4:
482 | cmp RA, RD
483 | jne >6 // More/less results wanted?
484 |5:
485 | sub BASE, 8
486 | mov L:RB->top, BASE
487 |
488 |->vm_leave_cp:
489 | mov RAa, SAVE_CFRAME // Restore previous C frame.
490 | mov L:RB->cframe, RAa
491 | xor eax, eax // Ok return status for vm_pcall.
492 |
493 |->vm_leave_unw:
494 | restoreregs
495 | ret
496 |
497 |6:
498 | jb >7 // Less results wanted?
499 | // More results wanted. Check stack size and fill up results with nil.
500 | cmp BASE, L:RB->maxstack
501 | ja >8
502 | mov dword [BASE-4], LJ_TNIL
503 | add BASE, 8
504 | add RD, 1
505 | jmp <4
506 |
507 |7: // Less results wanted.
508 | test RA, RA
509 | jz <5 // But check for LUA_MULTRET+1.
510 | sub RA, RD // Negative result!
511 | lea BASE, [BASE+RA*8] // Correct top.
512 | jmp <5
513 |
514 |8: // Corner case: need to grow stack for filling up results.
515 | // This can happen if:
516 | // - A C function grows the stack (a lot).
517 | // - The GC shrinks the stack in between.
518 | // - A return back from a lua_call() with (high) nresults adjustment.
519 | mov L:RB->top, BASE // Save current top held in BASE (yes).
520 | mov MULTRES, RD // Need to fill only remainder with nil.
521 | mov FCARG2, RA
522 | mov FCARG1, L:RB
523 | call extern lj_state_growstack@8 // (lua_State *L, int n)
524 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
525 | jmp <3
526 |
527 |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
528 | // (void *cframe, int errcode)
529 |.if X64
530 | mov eax, CARG2d // Error return status for vm_pcall.
531 | mov rsp, CARG1
532 |.else
533 | mov eax, FCARG2 // Error return status for vm_pcall.
534 | mov esp, FCARG1
535 |.endif
536 |->vm_unwind_c_eh: // Landing pad for external unwinder.
537 | mov L:RB, SAVE_L
538 | mov GL:RB, L:RB->glref
539 | mov dword GL:RB->vmstate, ~LJ_VMST_C
540 | jmp ->vm_leave_unw
541 |
542 |->vm_unwind_rethrow:
543 |.if X64 and not X64WIN
544 | mov FCARG1, SAVE_L
545 | mov FCARG2, eax
546 | restoreregs
547 | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
548 |.endif
549 |
550 |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
551 | // (void *cframe)
552 |.if X64
553 | and CARG1, CFRAME_RAWMASK
554 | mov rsp, CARG1
555 |.else
556 | and FCARG1, CFRAME_RAWMASK
557 | mov esp, FCARG1
558 |.endif
559 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
560 | mov L:RB, SAVE_L
561 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
562 | mov RD, 1+1 // Really 1+2 results, incr. later.
563 | mov BASE, L:RB->base
564 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
565 | add DISPATCH, GG_G2DISP
566 | mov PC, [BASE-4] // Fetch PC of previous frame.
567 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
568 | set_vmstate INTERP
569 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
570 |
571 |//-----------------------------------------------------------------------
572 |//-- Grow stack for calls -----------------------------------------------
573 |//-----------------------------------------------------------------------
574 |
575 |->vm_growstack_c: // Grow stack for C function.
576 | mov FCARG2, LUA_MINSTACK
577 | jmp >2
578 |
579 |->vm_growstack_v: // Grow stack for vararg Lua function.
580 | sub RD, 8
581 | jmp >1
582 |
583 |->vm_growstack_f: // Grow stack for fixarg Lua function.
584 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
585 | lea RD, [BASE+NARGS:RD*8-8]
586 |1:
587 | movzx RA, byte [PC-4+PC2PROTO(framesize)]
588 | add PC, 4 // Must point after first instruction.
589 | mov L:RB->base, BASE
590 | mov L:RB->top, RD
591 | mov SAVE_PC, PC
592 | mov FCARG2, RA
593 |2:
594 | // RB = L, L->base = new base, L->top = top
595 | mov FCARG1, L:RB
596 | call extern lj_state_growstack@8 // (lua_State *L, int n)
597 | mov BASE, L:RB->base
598 | mov RD, L:RB->top
599 | mov LFUNC:RB, [BASE-8]
600 | sub RD, BASE
601 | shr RD, 3
602 | add NARGS:RD, 1
603 | // BASE = new base, RB = LFUNC, RD = nargs+1
604 | ins_callt // Just retry the call.
605 |
606 |//-----------------------------------------------------------------------
607 |//-- Entry points into the assembler VM ---------------------------------
608 |//-----------------------------------------------------------------------
609 |
610 |->vm_resume: // Setup C frame and resume thread.
611 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
612 | saveregs
613 |.if X64
614 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
615 | mov SAVE_L, CARG1d
616 | mov RA, CARG2d
617 |.else
618 | mov L:RB, SAVE_L
619 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
620 |.endif
621 | mov PC, FRAME_CP
622 | xor RD, RD
623 | lea KBASEa, [esp+CFRAME_RESUME]
624 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
625 | add DISPATCH, GG_G2DISP
626 | mov L:RB->cframe, KBASEa
627 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
628 | mov SAVE_CFRAME, RDa
629 |.if X64
630 | mov SAVE_NRES, RD
631 | mov SAVE_ERRF, RD
632 |.endif
633 | cmp byte L:RB->status, RDL
634 | je >3 // Initial resume (like a call).
635 |
636 | // Resume after yield (like a return).
637 | set_vmstate INTERP
638 | mov byte L:RB->status, RDL
639 | mov BASE, L:RB->base
640 | mov RD, L:RB->top
641 | sub RD, RA
642 | shr RD, 3
643 | add RD, 1 // RD = nresults+1
644 | sub RA, BASE // RA = resultofs
645 | mov PC, [BASE-4]
646 | mov MULTRES, RD
647 | test PC, FRAME_TYPE
648 | jz ->BC_RET_Z
649 | jmp ->vm_return
650 |
651 |->vm_pcall: // Setup protected C frame and enter VM.
652 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
653 | saveregs
654 | mov PC, FRAME_CP
655 |.if X64
656 | mov SAVE_ERRF, CARG4d
657 |.endif
658 | jmp >1
659 |
660 |->vm_call: // Setup C frame and enter VM.
661 | // (lua_State *L, TValue *base, int nres1)
662 | saveregs
663 | mov PC, FRAME_C
664 |
665 |1: // Entry point for vm_pcall above (PC = ftype).
666 |.if X64
667 | mov SAVE_NRES, CARG3d
668 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
669 | mov SAVE_L, CARG1d
670 | mov RA, CARG2d
671 |.else
672 | mov L:RB, SAVE_L
673 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
674 |.endif
675 |
676 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
677 | mov SAVE_CFRAME, KBASEa
678 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
679 |.if X64
680 | mov L:RB->cframe, rsp
681 |.else
682 | mov L:RB->cframe, esp
683 |.endif
684 |
685 |2: // Entry point for vm_cpcall below (RA = base, RB = L, PC = ftype).
686 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
687 | add DISPATCH, GG_G2DISP
688 |
689 |3: // Entry point for vm_resume above (RA = base, RB = L, PC = ftype).
690 | set_vmstate INTERP
691 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
692 | add PC, RA
693 | sub PC, BASE // PC = frame delta + frame type
694 |
695 | mov RD, L:RB->top
696 | sub RD, RA
697 | shr NARGS:RD, 3
698 | add NARGS:RD, 1 // RD = nargs+1
699 |
700 |->vm_call_dispatch:
701 | mov LFUNC:RB, [RA-8]
702 | cmp dword [RA-4], LJ_TFUNC
703 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
704 |
705 |->vm_call_dispatch_f:
706 | mov BASE, RA
707 | ins_call
708 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
709 |
710 |->vm_cpcall: // Setup protected C frame, call C.
711 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
712 | saveregs
713 |.if X64
714 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
715 | mov SAVE_L, CARG1d
716 |.else
717 | mov L:RB, SAVE_L
718 | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
719 | mov RC, INARG_CP_UD // Get args before they are overwritten.
720 | mov RA, INARG_CP_FUNC
721 | mov BASE, INARG_CP_CALL
722 |.endif
723 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
724 |
725 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
726 | sub KBASE, L:RB->top
727 | mov SAVE_ERRF, 0 // No error function.
728 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
729 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
730 |
731 |.if X64
732 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
733 | mov SAVE_CFRAME, KBASEa
734 | mov L:RB->cframe, rsp
735 |
736 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
737 |.else
738 | mov ARG3, RC // Have to copy args downwards.
739 | mov ARG2, RA
740 | mov ARG1, L:RB
741 |
742 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
743 | mov SAVE_CFRAME, KBASE
744 | mov L:RB->cframe, esp
745 |
746 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
747 |.endif
748 | // TValue * (new base) or NULL returned in eax (RC).
749 | test RC, RC
750 | jz ->vm_leave_cp // No base? Just remove C frame.
751 | mov RA, RC
752 | mov PC, FRAME_CP
753 | jmp <2 // Else continue with the call.
754 |
755 |//-----------------------------------------------------------------------
756 |//-- Metamethod handling ------------------------------------------------
757 |//-----------------------------------------------------------------------
758 |
759 |//-- Continuation dispatch ----------------------------------------------
760 |
761 |->cont_dispatch:
762 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
763 | add RA, BASE
764 | and PC, -8
765 | mov RB, BASE
766 | sub BASE, PC // Restore caller BASE.
767 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
768 | mov RC, RA // ... in [RC]
769 | mov PC, [RB-12] // Restore PC from [cont|PC].
770 |.if X64
771 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
772#if LJ_HASFFI
773 | cmp RA, 1
774 | jbe >1
775#endif
776 | lea KBASEa, qword [=>0]
777 | add RAa, KBASEa
778 |.else
779 | mov RA, dword [RB-16]
780#if LJ_HASFFI
781 | cmp RA, 1
782 | jbe >1
783#endif
784 |.endif
785 | mov LFUNC:KBASE, [BASE-8]
786 | mov KBASE, LFUNC:KBASE->pc
787 | mov KBASE, [KBASE+PC2PROTO(k)]
788 | // BASE = base, RC = result, RB = meta base
789 | jmp RAa // Jump to continuation.
790 |
791#if LJ_HASFFI
792 |1:
793 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
794 | // cont = 0: Tail call from C function.
795 | sub RB, BASE
796 | shr RB, 3
797 | lea RD, [RB-1]
798 | jmp ->vm_call_tail
799#endif
800 |
801 |->cont_cat: // BASE = base, RC = result, RB = mbase
802 | movzx RA, PC_RB
803 | sub RB, 16
804 | lea RA, [BASE+RA*8]
805 | sub RA, RB
806 | je ->cont_ra
807 | neg RA
808 | shr RA, 3
809 |.if X64WIN
810 | mov CARG3d, RA
811 | mov L:CARG1d, SAVE_L
812 | mov L:CARG1d->base, BASE
813 | mov RCa, [RC]
814 | mov [RB], RCa
815 | mov CARG2d, RB
816 |.elif X64
817 | mov L:CARG1d, SAVE_L
818 | mov L:CARG1d->base, BASE
819 | mov CARG3d, RA
820 | mov RAa, [RC]
821 | mov [RB], RAa
822 | mov CARG2d, RB
823 |.else
824 | mov ARG3, RA
825 | mov RA, [RC+4]
826 | mov RC, [RC]
827 | mov [RB+4], RA
828 | mov [RB], RC
829 | mov ARG2, RB
830 |.endif
831 | jmp ->BC_CAT_Z
832 |
833 |//-- Table indexing metamethods -----------------------------------------
834 |
835 |->vmeta_tgets:
836 | mov TMP1, RC // RC = GCstr *
837 | mov TMP2, LJ_TSTR
838 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
839 | cmp PC_OP, BC_GGET
840 | jne >1
841 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
842 | mov [RA], TAB:RB // RB = GCtab *
843 | mov dword [RA+4], LJ_TTAB
844 | mov RB, RA
845 | jmp >2
846 |
847 |->vmeta_tgetb:
848 | movzx RC, PC_RC
849 if (LJ_DUALNUM) {
850 | mov TMP2, LJ_TISNUM
851 | mov TMP1, RC
852 } else if (sse) {
853 | cvtsi2sd xmm0, RC
854 | movsd TMPQ, xmm0
855 } else {
856 |.if not X64
857 | mov ARG4, RC
858 | fild ARG4
859 | fstp TMPQ
860 |.endif
861 }
862 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
863 | jmp >1
864 |
865 |->vmeta_tgetv:
866 | movzx RC, PC_RC // Reload TValue *k from RC.
867 | lea RC, [BASE+RC*8]
868 |1:
869 | movzx RB, PC_RB // Reload TValue *t from RB.
870 | lea RB, [BASE+RB*8]
871 |2:
872 |.if X64
873 | mov L:CARG1d, SAVE_L
874 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
875 | mov CARG2d, RB
876 | mov CARG3, RCa // May be 64 bit ptr to stack.
877 | mov L:RB, L:CARG1d
878 |.else
879 | mov ARG2, RB
880 | mov L:RB, SAVE_L
881 | mov ARG3, RC
882 | mov ARG1, L:RB
883 | mov L:RB->base, BASE
884 |.endif
885 | mov SAVE_PC, PC
886 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
887 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
888 | mov BASE, L:RB->base
889 | test RC, RC
890 | jz >3
891 |->cont_ra: // BASE = base, RC = result
892 | movzx RA, PC_RA
893 |.if X64
894 | mov RBa, [RC]
895 | mov [BASE+RA*8], RBa
896 |.else
897 | mov RB, [RC+4]
898 | mov RC, [RC]
899 | mov [BASE+RA*8+4], RB
900 | mov [BASE+RA*8], RC
901 |.endif
902 | ins_next
903 |
904 |3: // Call __index metamethod.
905 | // BASE = base, L->top = new base, stack = cont/func/t/k
906 | mov RA, L:RB->top
907 | mov [RA-12], PC // [cont|PC]
908 | lea PC, [RA+FRAME_CONT]
909 | sub PC, BASE
910 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
911 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
912 | jmp ->vm_call_dispatch_f
913 |
914 |//-----------------------------------------------------------------------
915 |
916 |->vmeta_tsets:
917 | mov TMP1, RC // RC = GCstr *
918 | mov TMP2, LJ_TSTR
919 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
920 | cmp PC_OP, BC_GSET
921 | jne >1
922 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
923 | mov [RA], TAB:RB // RB = GCtab *
924 | mov dword [RA+4], LJ_TTAB
925 | mov RB, RA
926 | jmp >2
927 |
928 |->vmeta_tsetb:
929 | movzx RC, PC_RC
930 if (LJ_DUALNUM) {
931 | mov TMP2, LJ_TISNUM
932 | mov TMP1, RC
933 } else if (sse) {
934 | cvtsi2sd xmm0, RC
935 | movsd TMPQ, xmm0
936 } else {
937 |.if not X64
938 | mov ARG4, RC
939 | fild ARG4
940 | fstp TMPQ
941 |.endif
942 }
943 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
944 | jmp >1
945 |
946 |->vmeta_tsetv:
947 | movzx RC, PC_RC // Reload TValue *k from RC.
948 | lea RC, [BASE+RC*8]
949 |1:
950 | movzx RB, PC_RB // Reload TValue *t from RB.
951 | lea RB, [BASE+RB*8]
952 |2:
953 |.if X64
954 | mov L:CARG1d, SAVE_L
955 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
956 | mov CARG2d, RB
957 | mov CARG3, RCa // May be 64 bit ptr to stack.
958 | mov L:RB, L:CARG1d
959 |.else
960 | mov ARG2, RB
961 | mov L:RB, SAVE_L
962 | mov ARG3, RC
963 | mov ARG1, L:RB
964 | mov L:RB->base, BASE
965 |.endif
966 | mov SAVE_PC, PC
967 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
968 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
969 | mov BASE, L:RB->base
970 | test RC, RC
971 | jz >3
972 | // NOBARRIER: lj_meta_tset ensures the table is not black.
973 | movzx RA, PC_RA
974 |.if X64
975 | mov RBa, [BASE+RA*8]
976 | mov [RC], RBa
977 |.else
978 | mov RB, [BASE+RA*8+4]
979 | mov RA, [BASE+RA*8]
980 | mov [RC+4], RB
981 | mov [RC], RA
982 |.endif
983 |->cont_nop: // BASE = base, (RC = result)
984 | ins_next
985 |
986 |3: // Call __newindex metamethod.
987 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
988 | mov RA, L:RB->top
989 | mov [RA-12], PC // [cont|PC]
990 | movzx RC, PC_RA
991 | // Copy value to third argument.
992 |.if X64
993 | mov RBa, [BASE+RC*8]
994 | mov [RA+16], RBa
995 |.else
996 | mov RB, [BASE+RC*8+4]
997 | mov RC, [BASE+RC*8]
998 | mov [RA+20], RB
999 | mov [RA+16], RC
1000 |.endif
1001 | lea PC, [RA+FRAME_CONT]
1002 | sub PC, BASE
1003 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
1004 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1005 | jmp ->vm_call_dispatch_f
1006 |
1007 |//-- Comparison metamethods ---------------------------------------------
1008 |
1009 |->vmeta_comp:
1010 |.if X64
1011 | mov L:RB, SAVE_L
1012 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
1013 |.if X64WIN
1014 | lea CARG3d, [BASE+RD*8]
1015 | lea CARG2d, [BASE+RA*8]
1016 |.else
1017 | lea CARG2d, [BASE+RA*8]
1018 | lea CARG3d, [BASE+RD*8]
1019 |.endif
1020 | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
1021 | movzx CARG4d, PC_OP
1022 |.else
1023 | movzx RB, PC_OP
1024 | lea RD, [BASE+RD*8]
1025 | lea RA, [BASE+RA*8]
1026 | mov ARG4, RB
1027 | mov L:RB, SAVE_L
1028 | mov ARG3, RD
1029 | mov ARG2, RA
1030 | mov ARG1, L:RB
1031 | mov L:RB->base, BASE
1032 |.endif
1033 | mov SAVE_PC, PC
1034 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
1035 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1036 |3:
1037 | mov BASE, L:RB->base
1038 | cmp RC, 1
1039 | ja ->vmeta_binop
1040 |4:
1041 | lea PC, [PC+4]
1042 | jb >6
1043 |5:
1044 | movzx RD, PC_RD
1045 | branchPC RD
1046 |6:
1047 | ins_next
1048 |
1049 |->cont_condt: // BASE = base, RC = result
1050 | add PC, 4
1051 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
1052 | jb <5
1053 | jmp <6
1054 |
1055 |->cont_condf: // BASE = base, RC = result
1056 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
1057 | jmp <4
1058 |
1059 |->vmeta_equal:
1060 | sub PC, 4
1061 |.if X64WIN
1062 | mov CARG3d, RD
1063 | mov CARG4d, RB
1064 | mov L:RB, SAVE_L
1065 | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
1066 | mov CARG2d, RA
1067 | mov CARG1d, L:RB // Caveat: CARG1d == RA.
1068 |.elif X64
1069 | mov CARG2d, RA
1070 | mov CARG4d, RB // Caveat: CARG4d == RA.
1071 | mov L:RB, SAVE_L
1072 | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
1073 | mov CARG3d, RD
1074 | mov CARG1d, L:RB
1075 |.else
1076 | mov ARG4, RB
1077 | mov L:RB, SAVE_L
1078 | mov ARG3, RD
1079 | mov ARG2, RA
1080 | mov ARG1, L:RB
1081 | mov L:RB->base, BASE
1082 |.endif
1083 | mov SAVE_PC, PC
1084 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
1085 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1086 | jmp <3
1087 |
1088 |->vmeta_equal_cd:
1089#if LJ_HASFFI
1090 | sub PC, 4
1091 | mov L:RB, SAVE_L
1092 | mov L:RB->base, BASE
1093 | mov FCARG1, L:RB
1094 | mov FCARG2, dword [PC-4]
1095 | mov SAVE_PC, PC
1096 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
1097 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1098 | jmp <3
1099#endif
1100 |
1101 |//-- Arithmetic metamethods ---------------------------------------------
1102 |
1103 |->vmeta_arith_vno:
1104#if LJ_DUALNUM
1105 | movzx RB, PC_RB
1106#endif
1107 |->vmeta_arith_vn:
1108 | lea RC, [KBASE+RC*8]
1109 | jmp >1
1110 |
1111 |->vmeta_arith_nvo:
1112#if LJ_DUALNUM
1113 | movzx RC, PC_RC
1114#endif
1115 |->vmeta_arith_nv:
1116 | lea RC, [KBASE+RC*8]
1117 | lea RB, [BASE+RB*8]
1118 | xchg RB, RC
1119 | jmp >2
1120 |
1121 |->vmeta_unm:
1122 | lea RC, [BASE+RD*8]
1123 | mov RB, RC
1124 | jmp >2
1125 |
1126 |->vmeta_arith_vvo:
1127#if LJ_DUALNUM
1128 | movzx RB, PC_RB
1129#endif
1130 |->vmeta_arith_vv:
1131 | lea RC, [BASE+RC*8]
1132 |1:
1133 | lea RB, [BASE+RB*8]
1134 |2:
1135 | lea RA, [BASE+RA*8]
1136 |.if X64WIN
1137 | mov CARG3d, RB
1138 | mov CARG4d, RC
1139 | movzx RC, PC_OP
1140 | mov ARG5d, RC
1141 | mov L:RB, SAVE_L
1142 | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
1143 | mov CARG2d, RA
1144 | mov CARG1d, L:RB // Caveat: CARG1d == RA.
1145 |.elif X64
1146 | movzx CARG5d, PC_OP
1147 | mov CARG2d, RA
1148 | mov CARG4d, RC // Caveat: CARG4d == RA.
1149 | mov L:CARG1d, SAVE_L
1150 | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
1151 | mov CARG3d, RB
1152 | mov L:RB, L:CARG1d
1153 |.else
1154 | mov ARG3, RB
1155 | mov L:RB, SAVE_L
1156 | mov ARG4, RC
1157 | movzx RC, PC_OP
1158 | mov ARG2, RA
1159 | mov ARG5, RC
1160 | mov ARG1, L:RB
1161 | mov L:RB->base, BASE
1162 |.endif
1163 | mov SAVE_PC, PC
1164 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1165 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1166 | mov BASE, L:RB->base
1167 | test RC, RC
1168 | jz ->cont_nop
1169 |
1170 | // Call metamethod for binary op.
1171 |->vmeta_binop:
1172 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1173 | mov RA, RC
1174 | sub RC, BASE
1175 | mov [RA-12], PC // [cont|PC]
1176 | lea PC, [RC+FRAME_CONT]
1177 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
1178 | jmp ->vm_call_dispatch
1179 |
1180 |->vmeta_len:
1181 | mov L:RB, SAVE_L
1182 | mov L:RB->base, BASE
1183 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
1184 | mov L:FCARG1, L:RB
1185 | mov SAVE_PC, PC
1186 | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
1187 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1188 | mov BASE, L:RB->base
1189#ifdef LUAJIT_ENABLE_LUA52COMPAT
1190 | test RC, RC
1191 | jne ->vmeta_binop // Binop call for compatibility.
1192 | movzx RD, PC_RD
1193 | mov TAB:FCARG1, [BASE+RD*8]
1194 | jmp ->BC_LEN_Z
1195#else
1196 | jmp ->vmeta_binop // Binop call for compatibility.
1197#endif
1198 |
1199 |//-- Call metamethod ----------------------------------------------------
1200 |
1201 |->vmeta_call_ra:
1202 | lea RA, [BASE+RA*8+8]
1203 |->vmeta_call: // Resolve and call __call metamethod.
1204 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1205 | mov TMP2, RA // Save RA, RC for us.
1206 | mov TMP1, NARGS:RD
1207 | sub RA, 8
1208 |.if X64
1209 | mov L:RB, SAVE_L
1210 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1211 | mov CARG2d, RA
1212 | lea CARG3d, [RA+NARGS:RD*8]
1213 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
1214 |.else
1215 | lea RC, [RA+NARGS:RD*8]
1216 | mov L:RB, SAVE_L
1217 | mov ARG2, RA
1218 | mov ARG3, RC
1219 | mov ARG1, L:RB
1220 | mov L:RB->base, BASE // This is the callers base!
1221 |.endif
1222 | mov SAVE_PC, PC
1223 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1224 | mov BASE, L:RB->base
1225 | mov RA, TMP2
1226 | mov NARGS:RD, TMP1
1227 | mov LFUNC:RB, [RA-8]
1228 | add NARGS:RD, 1
1229 | // This is fragile. L->base must not move, KBASE must always be defined.
1230 | cmp KBASE, BASE // Continue with CALLT if flag set.
1231 | je ->BC_CALLT_Z
1232 | mov BASE, RA
1233 | ins_call // Otherwise call resolved metamethod.
1234 |
1235 |//-- Argument coercion for 'for' statement ------------------------------
1236 |
1237 |->vmeta_for:
1238 | mov L:RB, SAVE_L
1239 | mov L:RB->base, BASE
1240 | mov FCARG2, RA // Caveat: FCARG2 == BASE
1241 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
1242 | mov SAVE_PC, PC
1243 | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
1244 | mov BASE, L:RB->base
1245 | mov RC, [PC-4]
1246 | movzx RA, RCH
1247 | movzx OP, RCL
1248 | shr RC, 16
1249 |.if X64
1250 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1251 |.else
1252 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
1253 |.endif
1254 |
1255 |//-----------------------------------------------------------------------
1256 |//-- Fast functions -----------------------------------------------------
1257 |//-----------------------------------------------------------------------
1258 |
1259 |.macro .ffunc, name
1260 |->ff_ .. name:
1261 |.endmacro
1262 |
1263 |.macro .ffunc_1, name
1264 |->ff_ .. name:
1265 | cmp NARGS:RD, 1+1; jb ->fff_fallback
1266 |.endmacro
1267 |
1268 |.macro .ffunc_2, name
1269 |->ff_ .. name:
1270 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1271 |.endmacro
1272 |
1273 |.macro .ffunc_n, name
1274 | .ffunc_1 name
1275 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1276 | fld qword [BASE]
1277 |.endmacro
1278 |
1279 |.macro .ffunc_n, name, op
1280 | .ffunc_1 name
1281 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1282 | op
1283 | fld qword [BASE]
1284 |.endmacro
1285 |
1286 |.macro .ffunc_nsse, name, op
1287 | .ffunc_1 name
1288 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1289 | op xmm0, qword [BASE]
1290 |.endmacro
1291 |
1292 |.macro .ffunc_nsse, name
1293 | .ffunc_nsse name, movsd
1294 |.endmacro
1295 |
1296 |.macro .ffunc_nn, name
1297 | .ffunc_2 name
1298 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1299 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1300 | fld qword [BASE]
1301 | fld qword [BASE+8]
1302 |.endmacro
1303 |
1304 |.macro .ffunc_nnsse, name
1305 | .ffunc_2 name
1306 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1307 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1308 | movsd xmm0, qword [BASE]
1309 | movsd xmm1, qword [BASE+8]
1310 |.endmacro
1311 |
1312 |.macro .ffunc_nnr, name
1313 | .ffunc_2 name
1314 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1315 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1316 | fld qword [BASE+8]
1317 | fld qword [BASE]
1318 |.endmacro
1319 |
1320 |// Inlined GC threshold check. Caveat: uses label 1.
1321 |.macro ffgccheck
1322 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1323 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1324 | jb >1
1325 | call ->fff_gcstep
1326 |1:
1327 |.endmacro
1328 |
1329 |//-- Base library: checks -----------------------------------------------
1330 |
1331 |.ffunc_1 assert
1332 | mov RB, [BASE+4]
1333 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
1334 | mov PC, [BASE-4]
1335 | mov MULTRES, RD
1336 | mov [BASE-4], RB
1337 | mov RB, [BASE]
1338 | mov [BASE-8], RB
1339 | sub RD, 2
1340 | jz >2
1341 | mov RA, BASE
1342 |1:
1343 | add RA, 8
1344 |.if X64
1345 | mov RBa, [RA]
1346 | mov [RA-8], RBa
1347 |.else
1348 | mov RB, [RA+4]
1349 | mov [RA-4], RB
1350 | mov RB, [RA]
1351 | mov [RA-8], RB
1352 |.endif
1353 | sub RD, 1
1354 | jnz <1
1355 |2:
1356 | mov RD, MULTRES
1357 | jmp ->fff_res_
1358 |
1359 |.ffunc_1 type
1360 | mov RB, [BASE+4]
1361 |.if X64
1362 | mov RA, RB
1363 | sar RA, 15
1364 | cmp RA, -2
1365 | je >3
1366 |.endif
1367 | mov RC, ~LJ_TNUMX
1368 | not RB
1369 | cmp RC, RB
1370 ||if (cmov) {
1371 | cmova RC, RB
1372 ||} else {
1373 | jbe >1; mov RC, RB; 1:
1374 ||}
1375 |2:
1376 | mov CFUNC:RB, [BASE-8]
1377 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1378 | mov PC, [BASE-4]
1379 | mov dword [BASE-4], LJ_TSTR
1380 | mov [BASE-8], STR:RC
1381 | jmp ->fff_res1
1382 |.if X64
1383 |3:
1384 | mov RC, ~LJ_TLIGHTUD
1385 | jmp <2
1386 |.endif
1387 |
1388 |//-- Base library: getters and setters ---------------------------------
1389 |
1390 |.ffunc_1 getmetatable
1391 | mov RB, [BASE+4]
1392 | mov PC, [BASE-4]
1393 | cmp RB, LJ_TTAB; jne >6
1394 |1: // Field metatable must be at same offset for GCtab and GCudata!
1395 | mov TAB:RB, [BASE]
1396 | mov TAB:RB, TAB:RB->metatable
1397 |2:
1398 | test TAB:RB, TAB:RB
1399 | mov dword [BASE-4], LJ_TNIL
1400 | jz ->fff_res1
1401 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
1402 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1403 | mov [BASE-8], TAB:RB
1404 | mov RA, TAB:RB->hmask
1405 | and RA, STR:RC->hash
1406 | imul RA, #NODE
1407 | add NODE:RA, TAB:RB->node
1408 |3: // Rearranged logic, because we expect _not_ to find the key.
1409 | cmp dword NODE:RA->key.it, LJ_TSTR
1410 | jne >4
1411 | cmp dword NODE:RA->key.gcr, STR:RC
1412 | je >5
1413 |4:
1414 | mov NODE:RA, NODE:RA->next
1415 | test NODE:RA, NODE:RA
1416 | jnz <3
1417 | jmp ->fff_res1 // Not found, keep default result.
1418 |5:
1419 | mov RB, [RA+4]
1420 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1421 | mov RC, [RA]
1422 | mov [BASE-4], RB // Return value of mt.__metatable.
1423 | mov [BASE-8], RC
1424 | jmp ->fff_res1
1425 |
1426 |6:
1427 | cmp RB, LJ_TUDATA; je <1
1428 |.if X64
1429 | cmp RB, LJ_TNUMX; ja >8
1430 | cmp RB, LJ_TISNUM; jbe >7
1431 | mov RB, LJ_TLIGHTUD
1432 | jmp >8
1433 |7:
1434 |.else
1435 | cmp RB, LJ_TISNUM; ja >8
1436 |.endif
1437 | mov RB, LJ_TNUMX
1438 |8:
1439 | not RB
1440 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1441 | jmp <2
1442 |
1443 |.ffunc_2 setmetatable
1444 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1445 | // Fast path: no mt for table yet and not clearing the mt.
1446 | mov TAB:RB, [BASE]
1447 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1448 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
1449 | mov TAB:RC, [BASE+8]
1450 | mov TAB:RB->metatable, TAB:RC
1451 | mov PC, [BASE-4]
1452 | mov dword [BASE-4], LJ_TTAB // Return original table.
1453 | mov [BASE-8], TAB:RB
1454 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1455 | jz >1
1456 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1457 | barrierback TAB:RB, RC
1458 |1:
1459 | jmp ->fff_res1
1460 |
1461 |.ffunc_2 rawget
1462 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1463 |.if X64WIN
1464 | mov RB, BASE // Save BASE.
1465 | lea CARG3d, [BASE+8]
1466 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1467 | mov CARG1d, SAVE_L
1468 |.elif X64
1469 | mov RB, BASE // Save BASE.
1470 | mov CARG2d, [BASE]
1471 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1472 | mov CARG1d, SAVE_L
1473 |.else
1474 | mov TAB:RD, [BASE]
1475 | mov L:RB, SAVE_L
1476 | mov ARG2, TAB:RD
1477 | mov ARG1, L:RB
1478 | mov RB, BASE // Save BASE.
1479 | add BASE, 8
1480 | mov ARG3, BASE
1481 |.endif
1482 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1483 | // cTValue * returned in eax (RD).
1484 | mov BASE, RB // Restore BASE.
1485 | // Copy table slot.
1486 |.if X64
1487 | mov RBa, [RD]
1488 | mov PC, [BASE-4]
1489 | mov [BASE-8], RBa
1490 |.else
1491 | mov RB, [RD]
1492 | mov RD, [RD+4]
1493 | mov PC, [BASE-4]
1494 | mov [BASE-8], RB
1495 | mov [BASE-4], RD
1496 |.endif
1497 | jmp ->fff_res1
1498 |
1499 |//-- Base library: conversions ------------------------------------------
1500 |
1501 |.ffunc tonumber
1502 | // Only handles the number case inline (without a base argument).
1503 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
1504 | cmp dword [BASE+4], LJ_TISNUM
1505 if (LJ_DUALNUM) {
1506 | jne >1
1507 | mov RB, dword [BASE]; jmp ->fff_resi
1508 |1:
1509 | ja ->fff_fallback
1510 } else {
1511 | jae ->fff_fallback
1512 }
1513 if (sse) {
1514 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1515 } else {
1516 | fld qword [BASE]; jmp ->fff_resn
1517 }
1518 |
1519 |.ffunc_1 tostring
1520 | // Only handles the string or number case inline.
1521 | mov PC, [BASE-4]
1522 | cmp dword [BASE+4], LJ_TSTR; jne >3
1523 | // A __tostring method in the string base metatable is ignored.
1524 | mov STR:RD, [BASE]
1525 |2:
1526 | mov dword [BASE-4], LJ_TSTR
1527 | mov [BASE-8], STR:RD
1528 | jmp ->fff_res1
1529 |3: // Handle numbers inline, unless a number base metatable is present.
1530 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1531 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1532 | jne ->fff_fallback
1533 | ffgccheck // Caveat: uses label 1.
1534 | mov L:RB, SAVE_L
1535 | mov L:RB->base, BASE // Add frame since C call can throw.
1536 | mov SAVE_PC, PC // Redundant (but a defined value).
1537 |.if X64 and not X64WIN
1538 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
1539 |.endif
1540 | mov L:FCARG1, L:RB
1541 if (LJ_DUALNUM) {
1542 | call extern lj_str_fromnumber@8 // (lua_State *L, cTValue *o)
1543 } else {
1544 | call extern lj_str_fromnum@8 // (lua_State *L, lua_Number *np)
1545 }
1546 | // GCstr returned in eax (RD).
1547 | mov BASE, L:RB->base
1548 | jmp <2
1549 |
1550 |//-- Base library: iterators -------------------------------------------
1551 |
1552 |.ffunc_1 next
1553 | je >2 // Missing 2nd arg?
1554 |1:
1555 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1556 | mov L:RB, SAVE_L
1557 | mov L:RB->base, BASE // Add frame since C call can throw.
1558 | mov L:RB->top, BASE // Dummy frame length is ok.
1559 | mov PC, [BASE-4]
1560 |.if X64WIN
1561 | lea CARG3d, [BASE+8]
1562 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1563 | mov CARG1d, L:RB
1564 |.elif X64
1565 | mov CARG2d, [BASE]
1566 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1567 | mov CARG1d, L:RB
1568 |.else
1569 | mov TAB:RD, [BASE]
1570 | mov ARG2, TAB:RD
1571 | mov ARG1, L:RB
1572 | add BASE, 8
1573 | mov ARG3, BASE
1574 |.endif
1575 | mov SAVE_PC, PC // Redundant (but a defined value).
1576 | call extern lj_tab_next // (lua_State *L, GCtab *t, TValue *key)
1577 | // Flag returned in eax (RD).
1578 | mov BASE, L:RB->base
1579 | test RD, RD; jz >3 // End of traversal?
1580 | // Copy key and value to results.
1581 |.if X64
1582 | mov RBa, [BASE+8]
1583 | mov RDa, [BASE+16]
1584 | mov [BASE-8], RBa
1585 | mov [BASE], RDa
1586 |.else
1587 | mov RB, [BASE+8]
1588 | mov RD, [BASE+12]
1589 | mov [BASE-8], RB
1590 | mov [BASE-4], RD
1591 | mov RB, [BASE+16]
1592 | mov RD, [BASE+20]
1593 | mov [BASE], RB
1594 | mov [BASE+4], RD
1595 |.endif
1596 |->fff_res2:
1597 | mov RD, 1+2
1598 | jmp ->fff_res
1599 |2: // Set missing 2nd arg to nil.
1600 | mov dword [BASE+12], LJ_TNIL
1601 | jmp <1
1602 |3: // End of traversal: return nil.
1603 | mov dword [BASE-4], LJ_TNIL
1604 | jmp ->fff_res1
1605 |
1606 |.ffunc_1 pairs
1607 | mov TAB:RB, [BASE]
1608 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1609#ifdef LUAJIT_ENABLE_LUA52COMPAT
1610 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1611#endif
1612 | mov CFUNC:RB, [BASE-8]
1613 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1614 | mov PC, [BASE-4]
1615 | mov dword [BASE-4], LJ_TFUNC
1616 | mov [BASE-8], CFUNC:RD
1617 | mov dword [BASE+12], LJ_TNIL
1618 | mov RD, 1+3
1619 | jmp ->fff_res
1620 |
1621 |.ffunc_1 ipairs_aux
1622 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1623 | cmp dword [BASE+12], LJ_TISNUM
1624 if (LJ_DUALNUM) {
1625 | jne ->fff_fallback
1626 } else {
1627 | jae ->fff_fallback
1628 }
1629 | mov PC, [BASE-4]
1630 if (LJ_DUALNUM) {
1631 | mov RD, dword [BASE+8]
1632 | add RD, 1
1633 | mov dword [BASE-4], LJ_TISNUM
1634 | mov dword [BASE-8], RD
1635 } else if (sse) {
1636 | movsd xmm0, qword [BASE+8]
1637 | sseconst_1 xmm1, RBa
1638 | addsd xmm0, xmm1
1639 | cvtsd2si RD, xmm0
1640 | movsd qword [BASE-8], xmm0
1641 } else {
1642 |.if not X64
1643 | fld qword [BASE+8]
1644 | fld1
1645 | faddp st1
1646 | fist ARG1
1647 | fstp qword [BASE-8]
1648 | mov RD, ARG1
1649 |.endif
1650 }
1651 | mov TAB:RB, [BASE]
1652 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
1653 | shl RD, 3
1654 | add RD, TAB:RB->array
1655 |1:
1656 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
1657 | // Copy array slot.
1658 |.if X64
1659 | mov RBa, [RD]
1660 | mov [BASE], RBa
1661 |.else
1662 | mov RB, [RD]
1663 | mov RD, [RD+4]
1664 | mov [BASE], RB
1665 | mov [BASE+4], RD
1666 |.endif
1667 | jmp ->fff_res2
1668 |2: // Check for empty hash part first. Otherwise call C function.
1669 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1670 | mov FCARG1, TAB:RB
1671 | mov RB, BASE // Save BASE.
1672 | mov FCARG2, RD // Caveat: FCARG2 == BASE
1673 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1674 | // cTValue * or NULL returned in eax (RD).
1675 | mov BASE, RB
1676 | test RD, RD
1677 | jnz <1
1678 |->fff_res0:
1679 | mov RD, 1+0
1680 | jmp ->fff_res
1681 |
1682 |.ffunc_1 ipairs
1683 | mov TAB:RB, [BASE]
1684 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1685#ifdef LUAJIT_ENABLE_LUA52COMPAT
1686 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1687#endif
1688 | mov CFUNC:RB, [BASE-8]
1689 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1690 | mov PC, [BASE-4]
1691 | mov dword [BASE-4], LJ_TFUNC
1692 | mov [BASE-8], CFUNC:RD
1693 if (LJ_DUALNUM) {
1694 | mov dword [BASE+12], LJ_TISNUM
1695 | mov dword [BASE+8], 0
1696 } else if (sse) {
1697 | xorps xmm0, xmm0
1698 | movsd qword [BASE+8], xmm0
1699 } else {
1700 | fldz
1701 | fstp qword [BASE+8]
1702 }
1703 | mov RD, 1+3
1704 | jmp ->fff_res
1705 |
1706 |//-- Base library: catch errors ----------------------------------------
1707 |
1708 |.ffunc_1 pcall
1709 | lea RA, [BASE+8]
1710 | sub NARGS:RD, 1
1711 | mov PC, 8+FRAME_PCALL
1712 |1:
1713 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
1714 | shr RB, HOOK_ACTIVE_SHIFT
1715 | and RB, 1
1716 | add PC, RB // Remember active hook before pcall.
1717 | jmp ->vm_call_dispatch
1718 |
1719 |.ffunc_2 xpcall
1720 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
1721 | mov RB, [BASE+4] // Swap function and traceback.
1722 | mov [BASE+12], RB
1723 | mov dword [BASE+4], LJ_TFUNC
1724 | mov LFUNC:RB, [BASE]
1725 | mov PC, [BASE+8]
1726 | mov [BASE+8], LFUNC:RB
1727 | mov [BASE], PC
1728 | lea RA, [BASE+16]
1729 | sub NARGS:RD, 2
1730 | mov PC, 16+FRAME_PCALL
1731 | jmp <1
1732 |
1733 |//-- Coroutine library --------------------------------------------------
1734 |
1735 |.macro coroutine_resume_wrap, resume
1736 |.if resume
1737 |.ffunc_1 coroutine_resume
1738 | mov L:RB, [BASE]
1739 |.else
1740 |.ffunc coroutine_wrap_aux
1741 | mov CFUNC:RB, [BASE-8]
1742 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1743 |.endif
1744 | mov PC, [BASE-4]
1745 | mov SAVE_PC, PC
1746 |.if X64
1747 | mov TMP1, L:RB
1748 |.else
1749 | mov ARG1, L:RB
1750 |.endif
1751 |.if resume
1752 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
1753 |.endif
1754 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1755 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1756 | mov RA, L:RB->top
1757 | je >1 // Status != LUA_YIELD (i.e. 0)?
1758 | cmp RA, L:RB->base // Check for presence of initial func.
1759 | je ->fff_fallback
1760 |1:
1761 |.if resume
1762 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1763 |.else
1764 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1765 |.endif
1766 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1767 | mov L:RB->top, PC
1768 |
1769 | mov L:RB, SAVE_L
1770 | mov L:RB->base, BASE
1771 |.if resume
1772 | add BASE, 8 // Keep resumed thread in stack for GC.
1773 |.endif
1774 | mov L:RB->top, BASE
1775 |.if resume
1776 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1777 |.else
1778 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1779 |.endif
1780 | sub RBa, PCa // Relative to PC.
1781 |
1782 | cmp PC, RA
1783 | je >3
1784 |2: // Move args to coroutine.
1785 |.if X64
1786 | mov RCa, [PC+RB]
1787 | mov [PC-8], RCa
1788 |.else
1789 | mov RC, [PC+RB+4]
1790 | mov [PC-4], RC
1791 | mov RC, [PC+RB]
1792 | mov [PC-8], RC
1793 |.endif
1794 | sub PC, 8
1795 | cmp PC, RA
1796 | jne <2
1797 |3:
1798 |.if X64
1799 | mov CARG2d, RA
1800 | mov CARG1d, TMP1
1801 |.else
1802 | mov ARG2, RA
1803 | xor RA, RA
1804 | mov ARG4, RA
1805 | mov ARG3, RA
1806 |.endif
1807 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1808 | set_vmstate INTERP
1809 |
1810 | mov L:RB, SAVE_L
1811 |.if X64
1812 | mov L:PC, TMP1
1813 |.else
1814 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1815 |.endif
1816 | mov BASE, L:RB->base
1817 | cmp eax, LUA_YIELD
1818 | ja >8
1819 |4:
1820 | mov RA, L:PC->base
1821 | mov KBASE, L:PC->top
1822 | mov L:PC->top, RA // Clear coroutine stack.
1823 | mov PC, KBASE
1824 | sub PC, RA
1825 | je >6 // No results?
1826 | lea RD, [BASE+PC]
1827 | shr PC, 3
1828 | cmp RD, L:RB->maxstack
1829 | ja >9 // Need to grow stack?
1830 |
1831 | mov RB, BASE
1832 | sub RBa, RAa
1833 |5: // Move results from coroutine.
1834 |.if X64
1835 | mov RDa, [RA]
1836 | mov [RA+RB], RDa
1837 |.else
1838 | mov RD, [RA]
1839 | mov [RA+RB], RD
1840 | mov RD, [RA+4]
1841 | mov [RA+RB+4], RD
1842 |.endif
1843 | add RA, 8
1844 | cmp RA, KBASE
1845 | jne <5
1846 |6:
1847 |.if resume
1848 | lea RD, [PC+2] // nresults+1 = 1 + true + results.
1849 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
1850 |.else
1851 | lea RD, [PC+1] // nresults+1 = 1 + results.
1852 |.endif
1853 |7:
1854 | mov PC, SAVE_PC
1855 | mov MULTRES, RD
1856 |.if resume
1857 | mov RAa, -8
1858 |.else
1859 | xor RA, RA
1860 |.endif
1861 | test PC, FRAME_TYPE
1862 | jz ->BC_RET_Z
1863 | jmp ->vm_return
1864 |
1865 |8: // Coroutine returned with error (at co->top-1).
1866 |.if resume
1867 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
1868 | mov RA, L:PC->top
1869 | sub RA, 8
1870 | mov L:PC->top, RA // Clear error from coroutine stack.
1871 | // Copy error message.
1872 |.if X64
1873 | mov RDa, [RA]
1874 | mov [BASE], RDa
1875 |.else
1876 | mov RD, [RA]
1877 | mov [BASE], RD
1878 | mov RD, [RA+4]
1879 | mov [BASE+4], RD
1880 |.endif
1881 | mov RD, 1+2 // nresults+1 = 1 + false + error.
1882 | jmp <7
1883 |.else
1884 | mov FCARG2, L:PC
1885 | mov FCARG1, L:RB
1886 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
1887 | // Error function does not return.
1888 |.endif
1889 |
1890 |9: // Handle stack expansion on return from yield.
1891 |.if X64
1892 | mov L:RA, TMP1
1893 |.else
1894 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1895 |.endif
1896 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1897 | mov FCARG2, PC
1898 | mov FCARG1, L:RB
1899 | call extern lj_state_growstack@8 // (lua_State *L, int n)
1900 |.if X64
1901 | mov L:PC, TMP1
1902 |.else
1903 | mov L:PC, ARG1
1904 |.endif
1905 | mov BASE, L:RB->base
1906 | jmp <4 // Retry the stack move.
1907 |.endmacro
1908 |
1909 | coroutine_resume_wrap 1 // coroutine.resume
1910 | coroutine_resume_wrap 0 // coroutine.wrap
1911 |
1912 |.ffunc coroutine_yield
1913 | mov L:RB, SAVE_L
1914 | test aword L:RB->cframe, CFRAME_RESUME
1915 | jz ->fff_fallback
1916 | mov L:RB->base, BASE
1917 | lea RD, [BASE+NARGS:RD*8-8]
1918 | mov L:RB->top, RD
1919 | xor RD, RD
1920 | mov aword L:RB->cframe, RDa
1921 | mov al, LUA_YIELD
1922 | mov byte L:RB->status, al
1923 | jmp ->vm_leave_unw
1924 |
1925 |//-- Math library -------------------------------------------------------
1926 |
1927 if (!LJ_DUALNUM) {
1928 |->fff_resi: // Dummy.
1929 }
1930 if (sse) {
1931 |->fff_resn:
1932 | mov PC, [BASE-4]
1933 | fstp qword [BASE-8]
1934 | jmp ->fff_res1
1935 }
1936 | .ffunc_1 math_abs
1937 if (LJ_DUALNUM) {
1938 | cmp dword [BASE+4], LJ_TISNUM; jne >2
1939 | mov RB, dword [BASE]
1940 | cmp RB, 0; jns ->fff_resi
1941 | neg RB; js >1
1942 |->fff_resbit:
1943 |->fff_resi:
1944 | mov PC, [BASE-4]
1945 | mov dword [BASE-4], LJ_TISNUM
1946 | mov dword [BASE-8], RB
1947 | jmp ->fff_res1
1948 |1:
1949 | mov PC, [BASE-4]
1950 | mov dword [BASE-4], 0x41e00000 // 2^31.
1951 | mov dword [BASE-8], 0
1952 | jmp ->fff_res1
1953 |2:
1954 | ja ->fff_fallback
1955 } else {
1956 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1957 }
1958 if (sse) {
1959 | movsd xmm0, qword [BASE]
1960 | sseconst_abs xmm1, RDa
1961 | andps xmm0, xmm1
1962 |->fff_resxmm0:
1963 | mov PC, [BASE-4]
1964 | movsd qword [BASE-8], xmm0
1965 | // fallthrough
1966 } else {
1967 | fld qword [BASE]
1968 | fabs
1969 | // fallthrough
1970 |->fff_resxmm0: // Dummy.
1971 |->fff_resn:
1972 | mov PC, [BASE-4]
1973 | fstp qword [BASE-8]
1974 }
1975 |->fff_res1:
1976 | mov RD, 1+1
1977 |->fff_res:
1978 | mov MULTRES, RD
1979 |->fff_res_:
1980 | test PC, FRAME_TYPE
1981 | jnz >7
1982 |5:
1983 | cmp PC_RB, RDL // More results expected?
1984 | ja >6
1985 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
1986 | movzx RA, PC_RA
1987 | not RAa // Note: ~RA = -(RA+1)
1988 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
1989 | ins_next
1990 |
1991 |6: // Fill up results with nil.
1992 | mov dword [BASE+RD*8-12], LJ_TNIL
1993 | add RD, 1
1994 | jmp <5
1995 |
1996 |7: // Non-standard return case.
1997 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
1998 | jmp ->vm_return
1999 |
2000 |.macro math_round, func
2001 | .ffunc math_ .. func
2002 ||if (LJ_DUALNUM) {
2003 | cmp dword [BASE+4], LJ_TISNUM; jne >1
2004 | mov RB, dword [BASE]; jmp ->fff_resi
2005 |1:
2006 | ja ->fff_fallback
2007 ||} else {
2008 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2009 ||}
2010 ||if (sse) {
2011 | movsd xmm0, qword [BASE]
2012 | call ->vm_ .. func
2013 || if (LJ_DUALNUM) {
2014 | cvtsd2si RB, xmm0
2015 | cmp RB, 0x80000000
2016 | jne ->fff_resi
2017 | cvtsi2sd xmm1, RB
2018 | ucomisd xmm0, xmm1
2019 | jp ->fff_resxmm0
2020 | je ->fff_resi
2021 || }
2022 | jmp ->fff_resxmm0
2023 ||} else {
2024 | fld qword [BASE]
2025 | call ->vm_ .. func
2026 || if (LJ_DUALNUM) {
2027 |.if not X64
2028 | fist ARG1
2029 | mov RB, ARG1
2030 | cmp RB, 0x80000000; jne >2
2031 | fdup
2032 | fild ARG1
2033 | fcomparepp
2034 | jp ->fff_resn
2035 | jne ->fff_resn
2036 |2:
2037 | fpop
2038 | jmp ->fff_resi
2039 |.endif
2040 || } else {
2041 | jmp ->fff_resn
2042 || }
2043 ||}
2044 |.endmacro
2045 |
2046 | math_round floor
2047 | math_round ceil
2048 |
2049 if (sse) {
2050 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2051 } else {
2052 |.ffunc_n math_sqrt; fsqrt; jmp ->fff_resn
2053 }
2054 |.ffunc_n math_log, fldln2; fyl2x; jmp ->fff_resn
2055 |.ffunc_n math_log10, fldlg2; fyl2x; jmp ->fff_resn
2056 |.ffunc_n math_exp; call ->vm_exp_x87; jmp ->fff_resn
2057 |
2058 |.ffunc_n math_sin; fsin; jmp ->fff_resn
2059 |.ffunc_n math_cos; fcos; jmp ->fff_resn
2060 |.ffunc_n math_tan; fptan; fpop; jmp ->fff_resn
2061 |
2062 |.ffunc_n math_asin
2063 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fpatan
2064 | jmp ->fff_resn
2065 |.ffunc_n math_acos
2066 | fdup; fmul st0; fld1; fsubrp st1; fsqrt; fxch; fpatan
2067 | jmp ->fff_resn
2068 |.ffunc_n math_atan; fld1; fpatan; jmp ->fff_resn
2069 |
2070 |.macro math_extern, func
2071 ||if (sse) {
2072 | .ffunc_nsse math_ .. func
2073 | .if not X64
2074 | movsd FPARG1, xmm0
2075 | .endif
2076 ||} else {
2077 | .if not X64
2078 | .ffunc_n math_ .. func
2079 | fstp FPARG1
2080 | .endif
2081 ||}
2082 | mov RB, BASE
2083 | call extern lj_vm_ .. func
2084 | mov BASE, RB
2085 | .if X64
2086 | jmp ->fff_resxmm0
2087 | .else
2088 | jmp ->fff_resn
2089 | .endif
2090 |.endmacro
2091 |
2092 | math_extern sinh
2093 | math_extern cosh
2094 | math_extern tanh
2095 |
2096 |->ff_math_deg:
2097 if (sse) {
2098 |.ffunc_nsse math_rad
2099 | mov CFUNC:RB, [BASE-8]
2100 | mulsd xmm0, qword CFUNC:RB->upvalue[0]
2101 | jmp ->fff_resxmm0
2102 } else {
2103 |.ffunc_n math_rad
2104 | mov CFUNC:RB, [BASE-8]
2105 | fmul qword CFUNC:RB->upvalue[0]
2106 | jmp ->fff_resn
2107 }
2108 |
2109 |.ffunc_nn math_atan2; fpatan; jmp ->fff_resn
2110 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2111 |
2112 |.ffunc_1 math_frexp
2113 | mov RB, [BASE+4]
2114 | cmp RB, LJ_TISNUM; jae ->fff_fallback
2115 | mov PC, [BASE-4]
2116 | mov RC, [BASE]
2117 | mov [BASE-4], RB; mov [BASE-8], RC
2118 | shl RB, 1; cmp RB, 0xffe00000; jae >3
2119 | or RC, RB; jz >3
2120 | mov RC, 1022
2121 | cmp RB, 0x00200000; jb >4
2122 |1:
2123 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2124 if (sse) {
2125 | cvtsi2sd xmm0, RB
2126 } else {
2127 | mov TMP1, RB; fild TMP1
2128 }
2129 | mov RB, [BASE-4]
2130 | and RB, 0x800fffff // Mask off exponent.
2131 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2132 | mov [BASE-4], RB
2133 |2:
2134 if (sse) {
2135 | movsd qword [BASE], xmm0
2136 } else {
2137 | fstp qword [BASE]
2138 }
2139 | mov RD, 1+2
2140 | jmp ->fff_res
2141 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2142 if (sse) {
2143 | xorps xmm0, xmm0; jmp <2
2144 } else {
2145 | fldz; jmp <2
2146 }
2147 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2148 if (sse) {
2149 | movsd xmm0, qword [BASE]
2150 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2151 | mulsd xmm0, xmm1
2152 | movsd qword [BASE-8], xmm0
2153 } else {
2154 | fld qword [BASE]
2155 | mov TMP1, 0x5a800000; fmul TMP1 // x = x*2^54
2156 | fstp qword [BASE-8]
2157 }
2158 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2159 |
2160 if (sse) {
2161 |.ffunc_nsse math_modf
2162 } else {
2163 |.ffunc_n math_modf
2164 }
2165 | mov RB, [BASE+4]
2166 | mov PC, [BASE-4]
2167 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2168 if (sse) {
2169 | movaps xmm4, xmm0
2170 | call ->vm_trunc
2171 | subsd xmm4, xmm0
2172 |1:
2173 | movsd qword [BASE-8], xmm0
2174 | movsd qword [BASE], xmm4
2175 } else {
2176 | fdup
2177 | call ->vm_trunc
2178 | fsub st1, st0
2179 |1:
2180 | fstp qword [BASE-8]
2181 | fstp qword [BASE]
2182 }
2183 | mov RC, [BASE-4]; mov RB, [BASE+4]
2184 | xor RC, RB; js >3 // Need to adjust sign?
2185 |2:
2186 | mov RD, 1+2
2187 | jmp ->fff_res
2188 |3:
2189 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2190 | jmp <2
2191 |4:
2192 if (sse) {
2193 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2194 } else {
2195 | fldz; fxch; jmp <1 // Return +-Inf and +-0.
2196 }
2197 |
2198 |.ffunc_nnr math_fmod
2199 |1: ; fprem; fnstsw ax; sahf; jp <1
2200 | fpop1
2201 | jmp ->fff_resn
2202 |
2203 if (sse) {
2204 |.ffunc_nnsse math_pow; call ->vm_pow; jmp ->fff_resxmm0
2205 } else {
2206 |.ffunc_nn math_pow; call ->vm_pow; jmp ->fff_resn
2207 }
2208 |
2209 |.macro math_minmax, name, cmovop, fcmovop, nofcmovop, sseop
2210 | .ffunc name
2211 | mov RA, 2
2212 | cmp dword [BASE+4], LJ_TISNUM
2213 ||if (LJ_DUALNUM) {
2214 | jne >4
2215 | mov RB, dword [BASE]
2216 |1: // Handle integers.
2217 | cmp RA, RD; jae ->fff_resi
2218 | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
2219 | cmp RB, dword [BASE+RA*8-8]
2220 | cmovop RB, dword [BASE+RA*8-8]
2221 | add RA, 1
2222 | jmp <1
2223 |3:
2224 | ja ->fff_fallback
2225 | // Convert intermediate result to number and continue below.
2226 ||if (sse) {
2227 | cvtsi2sd xmm0, RB
2228 ||} else {
2229 |.if not X64
2230 | mov TMP1, RB
2231 | fild TMP1
2232 |.endif
2233 ||}
2234 | jmp >6
2235 |4:
2236 | ja ->fff_fallback
2237 ||} else {
2238 | jae ->fff_fallback
2239 ||}
2240 |
2241 ||if (sse) {
2242 | movsd xmm0, qword [BASE]
2243 |5: // Handle numbers or integers.
2244 | cmp RA, RD; jae ->fff_resxmm0
2245 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2246 ||if (LJ_DUALNUM) {
2247 | jb >6
2248 | ja ->fff_fallback
2249 | cvtsi2sd xmm1, dword [BASE+RA*8-8]
2250 | jmp >7
2251 ||} else {
2252 | jae ->fff_fallback
2253 ||}
2254 |6:
2255 | movsd xmm1, qword [BASE+RA*8-8]
2256 |7:
2257 | sseop xmm0, xmm1
2258 | add RA, 1
2259 | jmp <5
2260 ||} else {
2261 |.if not X64
2262 | fld qword [BASE]
2263 |5: // Handle numbers or integers.
2264 | cmp RA, RD; jae ->fff_resn
2265 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2266 ||if (LJ_DUALNUM) {
2267 | jb >6
2268 | ja >9
2269 | fild dword [BASE+RA*8-8]
2270 | jmp >7
2271 ||} else {
2272 | jae >9
2273 ||}
2274 |6:
2275 | fld qword [BASE+RA*8-8]
2276 |7:
2277 ||if (cmov) {
2278 | fucomi st1; fcmovop st1; fpop1
2279 ||} else {
2280 | push eax
2281 | fucom st1; fnstsw ax; test ah, 1; nofcmovop >2; fxch; 2: ; fpop
2282 | pop eax
2283 ||}
2284 | add RA, 1
2285 | jmp <5
2286 |.endif
2287 ||}
2288 |.endmacro
2289 |
2290 | math_minmax math_min, cmovg, fcmovnbe, jz, minsd
2291 | math_minmax math_max, cmovl, fcmovbe, jnz, maxsd
2292 if (!sse) {
2293 |9:
2294 | fpop; jmp ->fff_fallback
2295 }
2296 |
2297 |//-- String library -----------------------------------------------------
2298 |
2299 |.ffunc_1 string_len
2300 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2301 | mov STR:RB, [BASE]
2302 if (LJ_DUALNUM) {
2303 | mov RB, dword STR:RB->len; jmp ->fff_resi
2304 } else if (sse) {
2305 | cvtsi2sd xmm0, dword STR:RB->len; jmp ->fff_resxmm0
2306 } else {
2307 | fild dword STR:RB->len; jmp ->fff_resn
2308 }
2309 |
2310 |.ffunc string_byte // Only handle the 1-arg case here.
2311 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2312 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2313 | mov STR:RB, [BASE]
2314 | mov PC, [BASE-4]
2315 | cmp dword STR:RB->len, 1
2316 | jb ->fff_res0 // Return no results for empty string.
2317 | movzx RB, byte STR:RB[1]
2318 if (LJ_DUALNUM) {
2319 | jmp ->fff_resi
2320 } else if (sse) {
2321 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2322 } else {
2323 | mov TMP1, RB; fild TMP1; jmp ->fff_resn
2324 }
2325 |
2326 |.ffunc string_char // Only handle the 1-arg case here.
2327 | ffgccheck
2328 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
2329 | cmp dword [BASE+4], LJ_TISNUM
2330 if (LJ_DUALNUM) {
2331 | jne ->fff_fallback
2332 | mov RB, dword [BASE]
2333 | cmp RB, 255; ja ->fff_fallback
2334 | mov TMP2, RB
2335 } else if (sse) {
2336 | jae ->fff_fallback
2337 | cvttsd2si RB, qword [BASE]
2338 | cmp RB, 255; ja ->fff_fallback
2339 | mov TMP2, RB
2340 } else {
2341 | jae ->fff_fallback
2342 | fld qword [BASE]
2343 | fistp TMP2
2344 | cmp TMP2, 255; ja ->fff_fallback
2345 }
2346 |.if X64
2347 | mov TMP3, 1
2348 |.else
2349 | mov ARG3, 1
2350 |.endif
2351 | lea RDa, TMP2 // Points to stack. Little-endian.
2352 |->fff_newstr:
2353 | mov L:RB, SAVE_L
2354 | mov L:RB->base, BASE
2355 |.if X64
2356 | mov CARG3d, TMP3 // Zero-extended to size_t.
2357 | mov CARG2, RDa // May be 64 bit ptr to stack.
2358 | mov CARG1d, L:RB
2359 |.else
2360 | mov ARG2, RD
2361 | mov ARG1, L:RB
2362 |.endif
2363 | mov SAVE_PC, PC
2364 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2365 | // GCstr * returned in eax (RD).
2366 | mov BASE, L:RB->base
2367 | mov PC, [BASE-4]
2368 | mov dword [BASE-4], LJ_TSTR
2369 | mov [BASE-8], STR:RD
2370 | jmp ->fff_res1
2371 |
2372 |.ffunc string_sub
2373 | ffgccheck
2374 | mov TMP2, -1
2375 | cmp NARGS:RD, 1+2; jb ->fff_fallback
2376 | jna >1
2377 | cmp dword [BASE+20], LJ_TISNUM
2378 if (LJ_DUALNUM) {
2379 | jne ->fff_fallback
2380 | mov RB, dword [BASE+16]
2381 | mov TMP2, RB
2382 } else if (sse) {
2383 | jae ->fff_fallback
2384 | cvttsd2si RB, qword [BASE+16]
2385 | mov TMP2, RB
2386 } else {
2387 | jae ->fff_fallback
2388 | fld qword [BASE+16]
2389 | fistp TMP2
2390 }
2391 |1:
2392 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2393 | cmp dword [BASE+12], LJ_TISNUM
2394 if (LJ_DUALNUM) {
2395 | jne ->fff_fallback
2396 } else {
2397 | jae ->fff_fallback
2398 }
2399 | mov STR:RB, [BASE]
2400 | mov TMP3, STR:RB
2401 | mov RB, STR:RB->len
2402 if (LJ_DUALNUM) {
2403 | mov RA, dword [BASE+8]
2404 } else if (sse) {
2405 | cvttsd2si RA, qword [BASE+8]
2406 } else {
2407 |.if not X64
2408 | fld qword [BASE+8]
2409 | fistp ARG3
2410 | mov RA, ARG3
2411 |.endif
2412 }
2413 | mov RC, TMP2
2414 | cmp RB, RC // len < end? (unsigned compare)
2415 | jb >5
2416 |2:
2417 | test RA, RA // start <= 0?
2418 | jle >7
2419 |3:
2420 | mov STR:RB, TMP3
2421 | sub RC, RA // start > end?
2422 | jl ->fff_emptystr
2423 | lea RB, [STR:RB+RA+#STR-1]
2424 | add RC, 1
2425 |4:
2426 |.if X64
2427 | mov TMP3, RC
2428 |.else
2429 | mov ARG3, RC
2430 |.endif
2431 | mov RD, RB
2432 | jmp ->fff_newstr
2433 |
2434 |5: // Negative end or overflow.
2435 | jl >6
2436 | lea RC, [RC+RB+1] // end = end+(len+1)
2437 | jmp <2
2438 |6: // Overflow.
2439 | mov RC, RB // end = len
2440 | jmp <2
2441 |
2442 |7: // Negative start or underflow.
2443 | je >8
2444 | add RA, RB // start = start+(len+1)
2445 | add RA, 1
2446 | jg <3 // start > 0?
2447 |8: // Underflow.
2448 | mov RA, 1 // start = 1
2449 | jmp <3
2450 |
2451 |->fff_emptystr: // Range underflow.
2452 | xor RC, RC // Zero length. Any ptr in RB is ok.
2453 | jmp <4
2454 |
2455 |.ffunc_2 string_rep // Only handle the 1-char case inline.
2456 | ffgccheck
2457 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2458 | cmp dword [BASE+12], LJ_TISNUM
2459 | mov STR:RB, [BASE]
2460 if (LJ_DUALNUM) {
2461 | jne ->fff_fallback
2462 | mov RC, dword [BASE+8]
2463 } else if (sse) {
2464 | jae ->fff_fallback
2465 | cvttsd2si RC, qword [BASE+8]
2466 } else {
2467 | jae ->fff_fallback
2468 | fld qword [BASE+8]
2469 | fistp TMP2
2470 | mov RC, TMP2
2471 }
2472 | test RC, RC
2473 | jle ->fff_emptystr // Count <= 0? (or non-int)
2474 | cmp dword STR:RB->len, 1
2475 | jb ->fff_emptystr // Zero length string?
2476 | jne ->fff_fallback_2 // Fallback for > 1-char strings.
2477 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_2
2478 | movzx RA, byte STR:RB[1]
2479 | mov RB, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2480 |.if X64
2481 | mov TMP3, RC
2482 |.else
2483 | mov ARG3, RC
2484 |.endif
2485 |1: // Fill buffer with char. Yes, this is suboptimal code (do you care?).
2486 | mov [RB], RAL
2487 | add RB, 1
2488 | sub RC, 1
2489 | jnz <1
2490 | mov RD, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2491 | jmp ->fff_newstr
2492 |
2493 |.ffunc_1 string_reverse
2494 | ffgccheck
2495 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2496 | mov STR:RB, [BASE]
2497 | mov RC, STR:RB->len
2498 | test RC, RC
2499 | jz ->fff_emptystr // Zero length string?
2500 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2501 | add RB, #STR
2502 | mov TMP2, PC // Need another temp register.
2503 |.if X64
2504 | mov TMP3, RC
2505 |.else
2506 | mov ARG3, RC
2507 |.endif
2508 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2509 |1:
2510 | movzx RA, byte [RB]
2511 | add RB, 1
2512 | sub RC, 1
2513 | mov [PC+RC], RAL
2514 | jnz <1
2515 | mov RD, PC
2516 | mov PC, TMP2
2517 | jmp ->fff_newstr
2518 |
2519 |.macro ffstring_case, name, lo, hi
2520 | .ffunc_1 name
2521 | ffgccheck
2522 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2523 | mov STR:RB, [BASE]
2524 | mov RC, STR:RB->len
2525 | cmp [DISPATCH+DISPATCH_GL(tmpbuf.sz)], RC; jb ->fff_fallback_1
2526 | add RB, #STR
2527 | mov TMP2, PC // Need another temp register.
2528 |.if X64
2529 | mov TMP3, RC
2530 |.else
2531 | mov ARG3, RC
2532 |.endif
2533 | mov PC, [DISPATCH+DISPATCH_GL(tmpbuf.buf)]
2534 | jmp >3
2535 |1: // ASCII case conversion. Yes, this is suboptimal code (do you care?).
2536 | movzx RA, byte [RB+RC]
2537 | cmp RA, lo
2538 | jb >2
2539 | cmp RA, hi
2540 | ja >2
2541 | xor RA, 0x20
2542 |2:
2543 | mov [PC+RC], RAL
2544 |3:
2545 | sub RC, 1
2546 | jns <1
2547 | mov RD, PC
2548 | mov PC, TMP2
2549 | jmp ->fff_newstr
2550 |.endmacro
2551 |
2552 |ffstring_case string_lower, 0x41, 0x5a
2553 |ffstring_case string_upper, 0x61, 0x7a
2554 |
2555 |//-- Table library ------------------------------------------------------
2556 |
2557 |.ffunc_1 table_getn
2558 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
2559 | mov RB, BASE // Save BASE.
2560 | mov TAB:FCARG1, [BASE]
2561 | call extern lj_tab_len@4 // LJ_FASTCALL (GCtab *t)
2562 | // Length of table returned in eax (RD).
2563 | mov BASE, RB // Restore BASE.
2564 if (LJ_DUALNUM) {
2565 | mov RB, RD; jmp ->fff_resi
2566 } else if (sse) {
2567 | cvtsi2sd xmm0, RD; jmp ->fff_resxmm0
2568 } else {
2569 |.if not X64
2570 | mov ARG1, RD; fild ARG1; jmp ->fff_resn
2571 |.endif
2572 }
2573 |
2574 |//-- Bit library --------------------------------------------------------
2575 |
2576 |.define TOBIT_BIAS, 0x59c00000 // 2^52 + 2^51 (float, not double!).
2577 |
2578 |.macro .ffunc_bit, name, kind
2579 | .ffunc_1 name
2580 |.if kind == 2
2581 ||if (sse) {
2582 | sseconst_tobit xmm1, RBa
2583 ||} else {
2584 | mov TMP1, TOBIT_BIAS
2585 ||}
2586 |.endif
2587 | cmp dword [BASE+4], LJ_TISNUM
2588 ||if (LJ_DUALNUM) {
2589 | jne >1
2590 | mov RB, dword [BASE]
2591 |.if kind > 0
2592 | jmp >2
2593 |.else
2594 | jmp ->fff_resbit
2595 |.endif
2596 |1:
2597 | ja ->fff_fallback
2598 ||} else {
2599 | jae ->fff_fallback
2600 ||}
2601 ||if (sse) {
2602 | movsd xmm0, qword [BASE]
2603 |.if kind < 2
2604 | sseconst_tobit xmm1, RBa
2605 |.endif
2606 | addsd xmm0, xmm1
2607 | movd RB, xmm0
2608 ||} else {
2609 |.if not X64
2610 | fld qword [BASE]
2611 |.if kind < 2
2612 | mov TMP1, TOBIT_BIAS
2613 |.endif
2614 | fadd TMP1
2615 | fstp FPARG1
2616 |.if kind > 0
2617 | mov RB, ARG1
2618 |.endif
2619 |.endif
2620 ||}
2621 |2:
2622 |.endmacro
2623 |
2624 |.ffunc_bit bit_tobit, 0
2625 if (LJ_DUALNUM || sse) {
2626 if (!sse) {
2627 |.if not X64
2628 | mov RB, ARG1
2629 |.endif
2630 }
2631 | jmp ->fff_resbit
2632 } else {
2633 |.if not X64
2634 | fild ARG1
2635 | jmp ->fff_resn
2636 |.endif
2637 }
2638 |
2639 |.macro .ffunc_bit_op, name, ins
2640 | .ffunc_bit name, 2
2641 | mov TMP2, NARGS:RD // Save for fallback.
2642 | lea RD, [BASE+NARGS:RD*8-16]
2643 |1:
2644 | cmp RD, BASE
2645 | jbe ->fff_resbit
2646 | cmp dword [RD+4], LJ_TISNUM
2647 ||if (LJ_DUALNUM) {
2648 | jne >2
2649 | ins RB, dword [RD]
2650 | sub RD, 8
2651 | jmp <1
2652 |2:
2653 | ja ->fff_fallback_bit_op
2654 ||} else {
2655 | jae ->fff_fallback_bit_op
2656 ||}
2657 ||if (sse) {
2658 | movsd xmm0, qword [RD]
2659 | addsd xmm0, xmm1
2660 | movd RA, xmm0
2661 | ins RB, RA
2662 ||} else {
2663 |.if not X64
2664 | fld qword [RD]
2665 | fadd TMP1
2666 | fstp FPARG1
2667 | ins RB, ARG1
2668 |.endif
2669 ||}
2670 | sub RD, 8
2671 | jmp <1
2672 |.endmacro
2673 |
2674 |.ffunc_bit_op bit_band, and
2675 |.ffunc_bit_op bit_bor, or
2676 |.ffunc_bit_op bit_bxor, xor
2677 |
2678 |.ffunc_bit bit_bswap, 1
2679 | bswap RB
2680 | jmp ->fff_resbit
2681 |
2682 |.ffunc_bit bit_bnot, 1
2683 | not RB
2684 if (LJ_DUALNUM) {
2685 | jmp ->fff_resbit
2686 } else if (sse) {
2687 |->fff_resbit:
2688 | cvtsi2sd xmm0, RB
2689 | jmp ->fff_resxmm0
2690 } else {
2691 |.if not X64
2692 |->fff_resbit:
2693 | mov ARG1, RB
2694 | fild ARG1
2695 | jmp ->fff_resn
2696 |.endif
2697 }
2698 |
2699 |->fff_fallback_bit_op:
2700 | mov NARGS:RD, TMP2 // Restore for fallback
2701 | jmp ->fff_fallback
2702 |
2703 |.macro .ffunc_bit_sh, name, ins
2704 ||if (LJ_DUALNUM) {
2705 | .ffunc_bit name, 1
2706 | // Note: no inline conversion from number for 2nd argument!
2707 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2708 | mov RA, dword [BASE+8]
2709 ||} else if (sse) {
2710 | .ffunc_nnsse name
2711 | sseconst_tobit xmm2, RBa
2712 | addsd xmm0, xmm2
2713 | addsd xmm1, xmm2
2714 | movd RB, xmm0
2715 | movd RA, xmm1
2716 ||} else {
2717 |.if not X64
2718 | .ffunc_nn name
2719 | mov TMP1, TOBIT_BIAS
2720 | fadd TMP1
2721 | fstp FPARG3
2722 | fadd TMP1
2723 | fstp FPARG1
2724 | mov RA, ARG3
2725 | mov RB, ARG1
2726 |.endif
2727 ||}
2728 | ins RB, cl // Assumes RA is ecx.
2729 | jmp ->fff_resbit
2730 |.endmacro
2731 |
2732 |.ffunc_bit_sh bit_lshift, shl
2733 |.ffunc_bit_sh bit_rshift, shr
2734 |.ffunc_bit_sh bit_arshift, sar
2735 |.ffunc_bit_sh bit_rol, rol
2736 |.ffunc_bit_sh bit_ror, ror
2737 |
2738 |//-----------------------------------------------------------------------
2739 |
2740 |->fff_fallback_2:
2741 | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
2742 | jmp ->fff_fallback
2743 |->fff_fallback_1:
2744 | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
2745 |->fff_fallback: // Call fast function fallback handler.
2746 | // BASE = new base, RD = nargs+1
2747 | mov L:RB, SAVE_L
2748 | mov PC, [BASE-4] // Fallback may overwrite PC.
2749 | mov SAVE_PC, PC // Redundant (but a defined value).
2750 | mov L:RB->base, BASE
2751 | lea RD, [BASE+NARGS:RD*8-8]
2752 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2753 | mov L:RB->top, RD
2754 | mov CFUNC:RD, [BASE-8]
2755 | cmp RA, L:RB->maxstack
2756 | ja >5 // Need to grow stack.
2757 |.if X64
2758 | mov CARG1d, L:RB
2759 |.else
2760 | mov ARG1, L:RB
2761 |.endif
2762 | call aword CFUNC:RD->f // (lua_State *L)
2763 | mov BASE, L:RB->base
2764 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2765 | test RD, RD; jg ->fff_res // Returned nresults+1?
2766 |1:
2767 | mov RA, L:RB->top
2768 | sub RA, BASE
2769 | shr RA, 3
2770 | test RD, RD
2771 | lea NARGS:RD, [RA+1]
2772 | mov LFUNC:RB, [BASE-8]
2773 | jne ->vm_call_tail // Returned -1?
2774 | ins_callt // Returned 0: retry fast path.
2775 |
2776 |// Reconstruct previous base for vmeta_call during tailcall.
2777 |->vm_call_tail:
2778 | mov RA, BASE
2779 | test PC, FRAME_TYPE
2780 | jnz >3
2781 | movzx RB, PC_RA
2782 | not RBa // Note: ~RB = -(RB+1)
2783 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
2784 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2785 |3:
2786 | mov RB, PC
2787 | and RB, -8
2788 | sub BASE, RB
2789 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2790 |
2791 |5: // Grow stack for fallback handler.
2792 | mov FCARG2, LUA_MINSTACK
2793 | mov FCARG1, L:RB
2794 | call extern lj_state_growstack@8 // (lua_State *L, int n)
2795 | mov BASE, L:RB->base
2796 | xor RD, RD // Simulate a return 0.
2797 | jmp <1 // Dumb retry (goes through ff first).
2798 |
2799 |->fff_gcstep: // Call GC step function.
2800 | // BASE = new base, RD = nargs+1
2801 | pop RBa // Must keep stack at same level.
2802 | mov TMPa, RBa // Save return address
2803 | mov L:RB, SAVE_L
2804 | mov SAVE_PC, PC // Redundant (but a defined value).
2805 | mov L:RB->base, BASE
2806 | lea RD, [BASE+NARGS:RD*8-8]
2807 | mov FCARG1, L:RB
2808 | mov L:RB->top, RD
2809 | call extern lj_gc_step@4 // (lua_State *L)
2810 | mov BASE, L:RB->base
2811 | mov RD, L:RB->top
2812 | sub RD, BASE
2813 | shr RD, 3
2814 | add NARGS:RD, 1
2815 | mov RBa, TMPa
2816 | push RBa // Restore return address.
2817 | ret
2818 |
2819 |//-----------------------------------------------------------------------
2820 |//-- Special dispatch targets -------------------------------------------
2821 |//-----------------------------------------------------------------------
2822 |
2823 |->vm_record: // Dispatch target for recording phase.
2824#if LJ_HASJIT
2825 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2826 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2827 | jnz >5
2828 | // Decrement the hookcount for consistency, but always do the call.
2829 | test RDL, HOOK_ACTIVE
2830 | jnz >1
2831 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2832 | jz >1
2833 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2834 | jmp >1
2835#endif
2836 |
2837 |->vm_rethook: // Dispatch target for return hooks.
2838 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2839 | test RDL, HOOK_ACTIVE // Hook already active?
2840 | jnz >5
2841 | jmp >1
2842 |
2843 |->vm_inshook: // Dispatch target for instr/line hooks.
2844 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2845 | test RDL, HOOK_ACTIVE // Hook already active?
2846 | jnz >5
2847 |
2848 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2849 | jz >5
2850 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2851 | jz >1
2852 | test RDL, LUA_MASKLINE
2853 | jz >5
2854 |1:
2855 | mov L:RB, SAVE_L
2856 | mov L:RB->base, BASE
2857 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2858 | mov FCARG1, L:RB
2859 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2860 | call extern lj_dispatch_ins@8 // (lua_State *L, BCIns *pc)
2861 |3:
2862 | mov BASE, L:RB->base
2863 |4:
2864 | movzx RA, PC_RA
2865 |5:
2866 | movzx OP, PC_OP
2867 | movzx RD, PC_RD
2868 |.if X64
2869 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2870 |.else
2871 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
2872 |.endif
2873 |
2874 |->cont_hook: // Continue from hook yield.
2875 | add PC, 4
2876 | mov RA, [RB-24]
2877 | mov MULTRES, RA // Restore MULTRES for *M ins.
2878 | jmp <4
2879 |
2880 |->vm_hotloop: // Hot loop counter underflow.
2881#if LJ_HASJIT
2882 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
2883 | mov RB, LFUNC:RB->pc
2884 | movzx RD, byte [RB+PC2PROTO(framesize)]
2885 | lea RD, [BASE+RD*8]
2886 | mov L:RB, SAVE_L
2887 | mov L:RB->base, BASE
2888 | mov L:RB->top, RD
2889 | mov FCARG2, PC
2890 | lea FCARG1, [DISPATCH+GG_DISP2J]
2891 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2892 | mov SAVE_PC, PC
2893 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2894 | jmp <3
2895#endif
2896 |
2897 |->vm_callhook: // Dispatch target for call hooks.
2898 | mov SAVE_PC, PC
2899#if LJ_HASJIT
2900 | jmp >1
2901#endif
2902 |
2903 |->vm_hotcall: // Hot call counter underflow.
2904#if LJ_HASJIT
2905 | mov SAVE_PC, PC
2906 | or PC, 1 // Marker for hot call.
2907 |1:
2908#endif
2909 | lea RD, [BASE+NARGS:RD*8-8]
2910 | mov L:RB, SAVE_L
2911 | mov L:RB->base, BASE
2912 | mov L:RB->top, RD
2913 | mov FCARG2, PC
2914 | mov FCARG1, L:RB
2915 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
2916 | // ASMFunction returned in eax/rax (RDa).
2917 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2918#if LJ_HASJIT
2919 | and PC, -2
2920#endif
2921 | mov BASE, L:RB->base
2922 | mov RAa, RDa
2923 | mov RD, L:RB->top
2924 | sub RD, BASE
2925 | mov RBa, RAa
2926 | movzx RA, PC_RA
2927 | shr RD, 3
2928 | add NARGS:RD, 1
2929 | jmp RBa
2930 |
2931 |//-----------------------------------------------------------------------
2932 |//-- Trace exit handler -------------------------------------------------
2933 |//-----------------------------------------------------------------------
2934 |
2935 |// Called from an exit stub with the exit number on the stack.
2936 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2937 |->vm_exit_handler:
2938#if LJ_HASJIT
2939 |.if X64
2940 | push r13; push r12
2941 | push r11; push r10; push r9; push r8
2942 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2943 | push rbx; push rdx; push rcx; push rax
2944 | movzx RC, byte [rbp-8] // Reconstruct exit number.
2945 | mov RCH, byte [rbp-16]
2946 | mov [rbp-8], r15; mov [rbp-16], r14
2947 |.else
2948 | push ebp; lea ebp, [esp+12]; push ebp
2949 | push ebx; push edx; push ecx; push eax
2950 | movzx RC, byte [ebp-4] // Reconstruct exit number.
2951 | mov RCH, byte [ebp-8]
2952 | mov [ebp-4], edi; mov [ebp-8], esi
2953 |.endif
2954 | // Caveat: DISPATCH is ebx.
2955 | mov DISPATCH, [ebp]
2956 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2957 | set_vmstate EXIT
2958 | mov [DISPATCH+DISPATCH_J(exitno)], RC
2959 | mov [DISPATCH+DISPATCH_J(parent)], RA
2960 |.if X64
2961 |.if X64WIN
2962 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2963 |.else
2964 | sub rsp, 16*8 // Room for SSE regs.
2965 |.endif
2966 | add rbp, -128
2967 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2968 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2969 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2970 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2971 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2972 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2973 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2974 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2975 |.else
2976 | sub esp, 8*8+16 // Room for SSE regs + args.
2977 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
2978 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
2979 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
2980 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2981 |.endif
2982 | // Caveat: RB is ebp.
2983 | mov L:RB, [DISPATCH+DISPATCH_GL(jit_L)]
2984 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2985 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2986 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
2987 | mov L:RB->base, BASE
2988 |.if X64WIN
2989 | lea CARG2, [rsp+4*8]
2990 |.elif X64
2991 | mov CARG2, rsp
2992 |.else
2993 | lea FCARG2, [esp+16]
2994 |.endif
2995 | lea FCARG1, [DISPATCH+GG_DISP2J]
2996 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2997 | // MULTRES or negated error code returned in eax (RD).
2998 | mov RAa, L:RB->cframe
2999 | and RAa, CFRAME_RAWMASK
3000 |.if X64WIN
3001 | // Reposition stack later.
3002 |.elif X64
3003 | mov rsp, RAa // Reposition stack to C frame.
3004 |.else
3005 | mov esp, RAa // Reposition stack to C frame.
3006 |.endif
3007 | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
3008 | mov BASE, L:RB->base
3009 | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
3010 |.if X64
3011 | jmp >1
3012 |.endif
3013#endif
3014 |->vm_exit_interp:
3015 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
3016#if LJ_HASJIT
3017 |.if X64
3018 | // Restore additional callee-save registers only used in compiled code.
3019 |.if X64WIN
3020 | lea RAa, [rsp+9*16+4*8]
3021 |1:
3022 | movdqa xmm15, [RAa-9*16]
3023 | movdqa xmm14, [RAa-8*16]
3024 | movdqa xmm13, [RAa-7*16]
3025 | movdqa xmm12, [RAa-6*16]
3026 | movdqa xmm11, [RAa-5*16]
3027 | movdqa xmm10, [RAa-4*16]
3028 | movdqa xmm9, [RAa-3*16]
3029 | movdqa xmm8, [RAa-2*16]
3030 | movdqa xmm7, [RAa-1*16]
3031 | mov rsp, RAa // Reposition stack to C frame.
3032 | movdqa xmm6, [RAa]
3033 | mov r15, CSAVE_3
3034 | mov r14, CSAVE_4
3035 |.else
3036 | add rsp, 16 // Reposition stack to C frame.
3037 |1:
3038 |.endif
3039 | mov r13, TMPa
3040 | mov r12, TMPQ
3041 |.endif
3042 | test RD, RD; js >3 // Check for error from exit.
3043 | mov MULTRES, RD
3044 | mov LFUNC:KBASE, [BASE-8]
3045 | mov KBASE, LFUNC:KBASE->pc
3046 | mov KBASE, [KBASE+PC2PROTO(k)]
3047 | mov dword [DISPATCH+DISPATCH_GL(jit_L)], 0
3048 | set_vmstate INTERP
3049 | // Modified copy of ins_next which handles function header dispatch, too.
3050 | mov RC, [PC]
3051 | movzx RA, RCH
3052 | movzx OP, RCL
3053 | add PC, 4
3054 | shr RC, 16
3055 | cmp OP, BC_FUNCF // Function header?
3056 | jb >2
3057 | mov RC, MULTRES // RC/RD holds nres+1.
3058 |2:
3059 |.if X64
3060 | jmp aword [DISPATCH+OP*8]
3061 |.else
3062 | jmp aword [DISPATCH+OP*4]
3063 |.endif
3064 |
3065 |3: // Rethrow error from the right C frame.
3066 | neg RD
3067 | mov FCARG1, L:RB
3068 | mov FCARG2, RD
3069 | call extern lj_err_throw@8 // (lua_State *L, int errcode)
3070#endif
3071 |
3072 |//-----------------------------------------------------------------------
3073 |//-- Math helper functions ----------------------------------------------
3074 |//-----------------------------------------------------------------------
3075 |
3076 |// FP value rounding. Called by math.floor/math.ceil fast functions
3077 |// and from JIT code.
3078 |
3079 |// x87 variant: Arg/ret on x87 stack. No int/xmm registers modified.
3080 |.macro vm_round_x87, mode1, mode2
3081 | fnstcw word [esp+4] // Caveat: overwrites ARG1 and ARG2.
3082 | mov [esp+8], eax
3083 | mov ax, mode1
3084 | or ax, [esp+4]
3085 |.if mode2 ~= 0xffff
3086 | and ax, mode2
3087 |.endif
3088 | mov [esp+6], ax
3089 | fldcw word [esp+6]
3090 | frndint
3091 | fldcw word [esp+4]
3092 | mov eax, [esp+8]
3093 | ret
3094 |.endmacro
3095 |
3096 |// SSE variant: arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
3097 |.macro vm_round_sse, mode
3098 | sseconst_abs xmm2, RDa
3099 | sseconst_2p52 xmm3, RDa
3100 | movaps xmm1, xmm0
3101 | andpd xmm1, xmm2 // |x|
3102 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
3103 | jbe >1
3104 | andnpd xmm2, xmm0 // Isolate sign bit.
3105 |.if mode == 2 // trunc(x)?
3106 | movaps xmm0, xmm1
3107 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
3108 | subsd xmm1, xmm3
3109 | sseconst_1 xmm3, RDa
3110 | cmpsd xmm0, xmm1, 1 // |x| < result?
3111 | andpd xmm0, xmm3
3112 | subsd xmm1, xmm0 // If yes, subtract -1.
3113 | orpd xmm1, xmm2 // Merge sign bit back in.
3114 |.else
3115 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
3116 | subsd xmm1, xmm3
3117 | orpd xmm1, xmm2 // Merge sign bit back in.
3118 | .if mode == 1 // ceil(x)?
3119 | sseconst_m1 xmm2, RDa // Must subtract -1 to preserve -0.
3120 | cmpsd xmm0, xmm1, 6 // x > result?
3121 | .else // floor(x)?
3122 | sseconst_1 xmm2, RDa
3123 | cmpsd xmm0, xmm1, 1 // x < result?
3124 | .endif
3125 | andpd xmm0, xmm2
3126 | subsd xmm1, xmm0 // If yes, subtract +-1.
3127 |.endif
3128 | movaps xmm0, xmm1
3129 |1:
3130 | ret
3131 |.endmacro
3132 |
3133 |.macro vm_round, name, ssemode, mode1, mode2
3134 |->name:
3135 ||if (!sse) {
3136 | vm_round_x87 mode1, mode2
3137 ||}
3138 |->name .. _sse:
3139 | vm_round_sse ssemode
3140 |.endmacro
3141 |
3142 | vm_round vm_floor, 0, 0x0400, 0xf7ff
3143 | vm_round vm_ceil, 1, 0x0800, 0xfbff
3144 | vm_round vm_trunc, 2, 0x0c00, 0xffff
3145 |
3146 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3147 |->vm_mod:
3148 if (sse) {
3149 |// Args in xmm0/xmm1, return value in xmm0.
3150 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3151 | movaps xmm5, xmm0
3152 | divsd xmm0, xmm1
3153 | sseconst_abs xmm2, RDa
3154 | sseconst_2p52 xmm3, RDa
3155 | movaps xmm4, xmm0
3156 | andpd xmm4, xmm2 // |x/y|
3157 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
3158 | jbe >1
3159 | andnpd xmm2, xmm0 // Isolate sign bit.
3160 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
3161 | subsd xmm4, xmm3
3162 | orpd xmm4, xmm2 // Merge sign bit back in.
3163 | sseconst_1 xmm2, RDa
3164 | cmpsd xmm0, xmm4, 1 // x/y < result?
3165 | andpd xmm0, xmm2
3166 | subsd xmm4, xmm0 // If yes, subtract 1.0.
3167 | movaps xmm0, xmm5
3168 | mulsd xmm1, xmm4
3169 | subsd xmm0, xmm1
3170 | ret
3171 |1:
3172 | mulsd xmm1, xmm0
3173 | movaps xmm0, xmm5
3174 | subsd xmm0, xmm1
3175 | ret
3176 } else {
3177 |// Args/ret on x87 stack (y on top). No xmm registers modified.
3178 |// Caveat: needs 3 slots on x87 stack! RC (eax) modified!
3179 | fld st1
3180 | fdiv st1
3181 | fnstcw word [esp+4]
3182 | mov ax, 0x0400
3183 | or ax, [esp+4]
3184 | and ax, 0xf7ff
3185 | mov [esp+6], ax
3186 | fldcw word [esp+6]
3187 | frndint
3188 | fldcw word [esp+4]
3189 | fmulp st1
3190 | fsubp st1
3191 | ret
3192 }
3193 |
3194 |// FP exponentiation e^x and 2^x. Called by math.exp fast function and
3195 |// from JIT code. Arg/ret on x87 stack. No int/xmm regs modified.
3196 |// Caveat: needs 3 slots on x87 stack!
3197 |->vm_exp_x87:
3198 | fldl2e; fmulp st1 // e^x ==> 2^(x*log2(e))
3199 |->vm_exp2_x87:
3200 | .if X64WIN
3201 | .define expscratch, dword [rsp+8] // Use scratch area.
3202 | .elif X64
3203 | .define expscratch, dword [rsp-8] // Use red zone.
3204 | .else
3205 | .define expscratch, dword [esp+4] // Needs 4 byte scratch area.
3206 | .endif
3207 | fst expscratch // Caveat: overwrites ARG1.
3208 | cmp expscratch, 0x7f800000; je >1 // Special case: e^+Inf = +Inf
3209 | cmp expscratch, 0xff800000; je >2 // Special case: e^-Inf = 0
3210 |->vm_exp2raw: // Entry point for vm_pow. Without +-Inf check.
3211 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3212 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3213 |1:
3214 | ret
3215 |2:
3216 | fpop; fldz; ret
3217 |
3218 |// Generic power function x^y. Called by BC_POW, math.pow fast function,
3219 |// and vm_arith.
3220 if (!sse) {
3221 |.if not X64
3222 |// Args/ret on x87 stack (y on top). RC (eax) modified.
3223 |// Caveat: needs 3 slots on x87 stack!
3224 |->vm_pow:
3225 | fist dword [esp+4] // Store/reload int before comparison.
3226 | fild dword [esp+4] // Integral exponent used in vm_powi.
3227 ||if (cmov) {
3228 | fucomip st1
3229 ||} else {
3230 | fucomp st1; fnstsw ax; sahf
3231 ||}
3232 | jnz >8 // Branch for FP exponents.
3233 | jp >9 // Branch for NaN exponent.
3234 | fpop // Pop y and fallthrough to vm_powi.
3235 |
3236 |// FP/int power function x^i. Arg1/ret on x87 stack.
3237 |// Arg2 (int) on C stack. RC (eax) modified.
3238 |// Caveat: needs 2 slots on x87 stack!
3239 | mov eax, [esp+4]
3240 | cmp eax, 1; jle >6 // i<=1?
3241 | // Now 1 < (unsigned)i <= 0x80000000.
3242 |1: // Handle leading zeros.
3243 | test eax, 1; jnz >2
3244 | fmul st0
3245 | shr eax, 1
3246 | jmp <1
3247 |2:
3248 | shr eax, 1; jz >5
3249 | fdup
3250 |3: // Handle trailing bits.
3251 | fmul st0
3252 | shr eax, 1; jz >4
3253 | jnc <3
3254 | fmul st1, st0
3255 | jmp <3
3256 |4:
3257 | fmulp st1
3258 |5:
3259 | ret
3260 |6:
3261 | je <5 // x^1 ==> x
3262 | jb >7
3263 | fld1; fdivrp st1
3264 | neg eax
3265 | cmp eax, 1; je <5 // x^-1 ==> 1/x
3266 | jmp <1 // x^-i ==> (1/x)^i
3267 |7:
3268 | fpop; fld1 // x^0 ==> 1
3269 | ret
3270 |
3271 |8: // FP/FP power function x^y.
3272 | fst dword [esp+4]
3273 | fxch
3274 | fst dword [esp+8]
3275 | mov eax, [esp+4]; shl eax, 1
3276 | cmp eax, 0xff000000; je >2 // x^+-Inf?
3277 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3278 | cmp eax, 0xff000000; je >4 // +-Inf^y?
3279 | fyl2x
3280 | jmp ->vm_exp2raw
3281 |
3282 |9: // Handle x^NaN.
3283 | fld1
3284 ||if (cmov) {
3285 | fucomip st2
3286 ||} else {
3287 | fucomp st2; fnstsw ax; sahf
3288 ||}
3289 | je >1 // 1^NaN ==> 1
3290 | fxch // x^NaN ==> NaN
3291 |1:
3292 | fpop
3293 | ret
3294 |
3295 |2: // Handle x^+-Inf.
3296 | fabs
3297 | fld1
3298 ||if (cmov) {
3299 | fucomip st1
3300 ||} else {
3301 | fucomp st1; fnstsw ax; sahf
3302 ||}
3303 | je >3 // +-1^+-Inf ==> 1
3304 | fpop; fabs; fldz; mov eax, 0; setc al
3305 | ror eax, 1; xor eax, [esp+4]; jns >3 // |x|<>1, x^+-Inf ==> +Inf/0
3306 | fxch
3307 |3:
3308 | fpop1; fabs
3309 | ret
3310 |
3311 |4: // Handle +-0^y or +-Inf^y.
3312 | cmp dword [esp+4], 0; jge <3 // y >= 0, x^y ==> |x|
3313 | fpop; fpop
3314 | test eax, eax; jz >5 // y < 0, +-0^y ==> +Inf
3315 | fldz // y < 0, +-Inf^y ==> 0
3316 | ret
3317 |5:
3318 | mov dword [esp+4], 0x7f800000 // Return +Inf.
3319 | fld dword [esp+4]
3320 | ret
3321 |.endif
3322 } else {
3323 |->vm_pow:
3324 }
3325 |
3326 |// Args in xmm0/xmm1. Ret in xmm0. xmm0-xmm2 and RC (eax) modified.
3327 |// Needs 16 byte scratch area for x86. Also called from JIT code.
3328 |->vm_pow_sse:
3329 | cvtsd2si eax, xmm1
3330 | cvtsi2sd xmm2, eax
3331 | ucomisd xmm1, xmm2
3332 | jnz >8 // Branch for FP exponents.
3333 | jp >9 // Branch for NaN exponent.
3334 | // Fallthrough to vm_powi_sse.
3335 |
3336 |// Args in xmm0/eax. Ret in xmm0. xmm0-xmm1 and eax modified.
3337 |->vm_powi_sse:
3338 | cmp eax, 1; jle >6 // i<=1?
3339 | // Now 1 < (unsigned)i <= 0x80000000.
3340 |1: // Handle leading zeros.
3341 | test eax, 1; jnz >2
3342 | mulsd xmm0, xmm0
3343 | shr eax, 1
3344 | jmp <1
3345 |2:
3346 | shr eax, 1; jz >5
3347 | movaps xmm1, xmm0
3348 |3: // Handle trailing bits.
3349 | mulsd xmm0, xmm0
3350 | shr eax, 1; jz >4
3351 | jnc <3
3352 | mulsd xmm1, xmm0
3353 | jmp <3
3354 |4:
3355 | mulsd xmm0, xmm1
3356 |5:
3357 | ret
3358 |6:
3359 | je <5 // x^1 ==> x
3360 | jb >7 // x^0 ==> 1
3361 | neg eax
3362 | call <1
3363 | sseconst_1 xmm1, RDa
3364 | divsd xmm1, xmm0
3365 | movaps xmm0, xmm1
3366 | ret
3367 |7:
3368 | sseconst_1 xmm0, RDa
3369 | ret
3370 |
3371 |8: // FP/FP power function x^y.
3372 |.if X64
3373 | movd rax, xmm1; shl rax, 1
3374 | rol rax, 12; cmp rax, 0xffe; je >2 // x^+-Inf?
3375 | movd rax, xmm0; shl rax, 1; je >4 // +-0^y?
3376 | rol rax, 12; cmp rax, 0xffe; je >5 // +-Inf^y?
3377 | .if X64WIN
3378 | movsd qword [rsp+16], xmm1 // Use scratch area.
3379 | movsd qword [rsp+8], xmm0
3380 | fld qword [rsp+16]
3381 | fld qword [rsp+8]
3382 | .else
3383 | movsd qword [rsp-16], xmm1 // Use red zone.
3384 | movsd qword [rsp-8], xmm0
3385 | fld qword [rsp-16]
3386 | fld qword [rsp-8]
3387 | .endif
3388 |.else
3389 | movsd qword [esp+12], xmm1 // Needs 16 byte scratch area.
3390 | movsd qword [esp+4], xmm0
3391 | cmp dword [esp+12], 0; jne >1
3392 | mov eax, [esp+16]; shl eax, 1
3393 | cmp eax, 0xffe00000; je >2 // x^+-Inf?
3394 |1:
3395 | cmp dword [esp+4], 0; jne >1
3396 | mov eax, [esp+8]; shl eax, 1; je >4 // +-0^y?
3397 | cmp eax, 0xffe00000; je >5 // +-Inf^y?
3398 |1:
3399 | fld qword [esp+12]
3400 | fld qword [esp+4]
3401 |.endif
3402 | fyl2x // y*log2(x)
3403 | fdup; frndint; fsub st1, st0; fxch // Split into frac/int part.
3404 | f2xm1; fld1; faddp st1; fscale; fpop1 // ==> (2^frac-1 +1) << int
3405 |.if X64WIN
3406 | fstp qword [rsp+8] // Use scratch area.
3407 | movsd xmm0, qword [rsp+8]
3408 |.elif X64
3409 | fstp qword [rsp-8] // Use red zone.
3410 | movsd xmm0, qword [rsp-8]
3411 |.else
3412 | fstp qword [esp+4] // Needs 8 byte scratch area.
3413 | movsd xmm0, qword [esp+4]
3414 |.endif
3415 | ret
3416 |
3417 |9: // Handle x^NaN.
3418 | sseconst_1 xmm2, RDa
3419 | ucomisd xmm0, xmm2; je >1 // 1^NaN ==> 1
3420 | movaps xmm0, xmm1 // x^NaN ==> NaN
3421 |1:
3422 | ret
3423 |
3424 |2: // Handle x^+-Inf.
3425 | sseconst_abs xmm2, RDa
3426 | andpd xmm0, xmm2 // |x|
3427 | sseconst_1 xmm2, RDa
3428 | ucomisd xmm0, xmm2; je <1 // +-1^+-Inf ==> 1
3429 | movmskpd eax, xmm1
3430 | xorps xmm0, xmm0
3431 | mov ah, al; setc al; xor al, ah; jne <1 // |x|<>1, x^+-Inf ==> +Inf/0
3432 |3:
3433 | sseconst_hi xmm0, RDa, 7ff00000 // +Inf
3434 | ret
3435 |
3436 |4: // Handle +-0^y.
3437 | movmskpd eax, xmm1; test eax, eax; jnz <3 // y < 0, +-0^y ==> +Inf
3438 | xorps xmm0, xmm0 // y >= 0, +-0^y ==> 0
3439 | ret
3440 |
3441 |5: // Handle +-Inf^y.
3442 | movmskpd eax, xmm1; test eax, eax; jz <3 // y >= 0, +-Inf^y ==> +Inf
3443 | xorps xmm0, xmm0 // y < 0, +-Inf^y ==> 0
3444 | ret
3445 |
3446 |// Callable from C: double lj_vm_foldfpm(double x, int fpm)
3447 |// Computes fpm(x) for extended math functions. ORDER FPM.
3448 |->vm_foldfpm:
3449#if LJ_HASJIT
3450 if (sse) {
3451 |.if X64
3452 |
3453 | .if X64WIN
3454 | .define fpmop, CARG2d
3455 | .else
3456 | .define fpmop, CARG1d
3457 | .endif
3458 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3459 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3460 | sqrtsd xmm0, xmm0; ret
3461 |2:
3462 | .if X64WIN
3463 | movsd qword [rsp+8], xmm0 // Use scratch area.
3464 | fld qword [rsp+8]
3465 | .else
3466 | movsd qword [rsp-8], xmm0 // Use red zone.
3467 | fld qword [rsp-8]
3468 | .endif
3469 | cmp fpmop, 5; ja >2
3470 | .if X64WIN; pop rax; .endif
3471 | je >1
3472 | call ->vm_exp_x87
3473 | .if X64WIN; push rax; .endif
3474 | jmp >7
3475 |1:
3476 | call ->vm_exp2_x87
3477 | .if X64WIN; push rax; .endif
3478 | jmp >7
3479 |2: ; cmp fpmop, 7; je >1; ja >2
3480 | fldln2; fxch; fyl2x; jmp >7
3481 |1: ; fld1; fxch; fyl2x; jmp >7
3482 |2: ; cmp fpmop, 9; je >1; ja >2
3483 | fldlg2; fxch; fyl2x; jmp >7
3484 |1: ; fsin; jmp >7
3485 |2: ; cmp fpmop, 11; je >1; ja >9
3486 | fcos; jmp >7
3487 |1: ; fptan; fpop
3488 |7:
3489 | .if X64WIN
3490 | fstp qword [rsp+8] // Use scratch area.
3491 | movsd xmm0, qword [rsp+8]
3492 | .else
3493 | fstp qword [rsp-8] // Use red zone.
3494 | movsd xmm0, qword [rsp-8]
3495 | .endif
3496 | ret
3497 |
3498 |.else // x86 calling convention.
3499 |
3500 | .define fpmop, eax
3501 | mov fpmop, [esp+12]
3502 | movsd xmm0, qword [esp+4]
3503 | cmp fpmop, 1; je >1; ja >2
3504 | call ->vm_floor; jmp >7
3505 |1: ; call ->vm_ceil; jmp >7
3506 |2: ; cmp fpmop, 3; je >1; ja >2
3507 | call ->vm_trunc; jmp >7
3508 |1:
3509 | sqrtsd xmm0, xmm0
3510 |7:
3511 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3512 | fld qword [esp+4]
3513 | ret
3514 |2: ; fld qword [esp+4]
3515 | cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3516 |2: ; cmp fpmop, 7; je >1; ja >2
3517 | fldln2; fxch; fyl2x; ret
3518 |1: ; fld1; fxch; fyl2x; ret
3519 |2: ; cmp fpmop, 9; je >1; ja >2
3520 | fldlg2; fxch; fyl2x; ret
3521 |1: ; fsin; ret
3522 |2: ; cmp fpmop, 11; je >1; ja >9
3523 | fcos; ret
3524 |1: ; fptan; fpop; ret
3525 |
3526 |.endif
3527 } else {
3528 | mov fpmop, [esp+12]
3529 | fld qword [esp+4]
3530 | cmp fpmop, 1; jb ->vm_floor; je ->vm_ceil
3531 | cmp fpmop, 3; jb ->vm_trunc; ja >2
3532 | fsqrt; ret
3533 |2: ; cmp fpmop, 5; jb ->vm_exp_x87; je ->vm_exp2_x87
3534 | cmp fpmop, 7; je >1; ja >2
3535 | fldln2; fxch; fyl2x; ret
3536 |1: ; fld1; fxch; fyl2x; ret
3537 |2: ; cmp fpmop, 9; je >1; ja >2
3538 | fldlg2; fxch; fyl2x; ret
3539 |1: ; fsin; ret
3540 |2: ; cmp fpmop, 11; je >1; ja >9
3541 | fcos; ret
3542 |1: ; fptan; fpop; ret
3543 }
3544 |9: ; int3 // Bad fpm.
3545#endif
3546 |
3547 |// Callable from C: double lj_vm_foldarith(double x, double y, int op)
3548 |// Compute x op y for basic arithmetic operators (+ - * / % ^ and unary -)
3549 |// and basic math functions. ORDER ARITH
3550 |->vm_foldarith:
3551 if (sse) {
3552 |.if X64
3553 |
3554 | .if X64WIN
3555 | .define foldop, CARG3d
3556 | .else
3557 | .define foldop, CARG1d
3558 | .endif
3559 | cmp foldop, 1; je >1; ja >2
3560 | addsd xmm0, xmm1; ret
3561 |1: ; subsd xmm0, xmm1; ret
3562 |2: ; cmp foldop, 3; je >1; ja >2
3563 | mulsd xmm0, xmm1; ret
3564 |1: ; divsd xmm0, xmm1; ret
3565 |2: ; cmp foldop, 5; jb ->vm_mod; je ->vm_pow
3566 | cmp foldop, 7; je >1; ja >2
3567 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; ret
3568 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; ret
3569 |2: ; cmp foldop, 9; ja >2
3570 |.if X64WIN
3571 | movsd qword [rsp+8], xmm0 // Use scratch area.
3572 | movsd qword [rsp+16], xmm1
3573 | fld qword [rsp+8]
3574 | fld qword [rsp+16]
3575 |.else
3576 | movsd qword [rsp-8], xmm0 // Use red zone.
3577 | movsd qword [rsp-16], xmm1
3578 | fld qword [rsp-8]
3579 | fld qword [rsp-16]
3580 |.endif
3581 | je >1
3582 | fpatan
3583 |7:
3584 |.if X64WIN
3585 | fstp qword [rsp+8] // Use scratch area.
3586 | movsd xmm0, qword [rsp+8]
3587 |.else
3588 | fstp qword [rsp-8] // Use red zone.
3589 | movsd xmm0, qword [rsp-8]
3590 |.endif
3591 | ret
3592 |1: ; fxch; fscale; fpop1; jmp <7
3593 |2: ; cmp foldop, 11; je >1; ja >9
3594 | minsd xmm0, xmm1; ret
3595 |1: ; maxsd xmm0, xmm1; ret
3596 |9: ; int3 // Bad op.
3597 |
3598 |.else // x86 calling convention.
3599 |
3600 | .define foldop, eax
3601 | mov foldop, [esp+20]
3602 | movsd xmm0, qword [esp+4]
3603 | movsd xmm1, qword [esp+12]
3604 | cmp foldop, 1; je >1; ja >2
3605 | addsd xmm0, xmm1
3606 |7:
3607 | movsd qword [esp+4], xmm0 // Overwrite callee-owned args.
3608 | fld qword [esp+4]
3609 | ret
3610 |1: ; subsd xmm0, xmm1; jmp <7
3611 |2: ; cmp foldop, 3; je >1; ja >2
3612 | mulsd xmm0, xmm1; jmp <7
3613 |1: ; divsd xmm0, xmm1; jmp <7
3614 |2: ; cmp foldop, 5
3615 | je >1; ja >2
3616 | call ->vm_mod; jmp <7
3617 |1: ; pop edx; call ->vm_pow; push edx; jmp <7 // Writes to scratch area.
3618 |2: ; cmp foldop, 7; je >1; ja >2
3619 | sseconst_sign xmm1, RDa; xorps xmm0, xmm1; jmp <7
3620 |1: ; sseconst_abs xmm1, RDa; andps xmm0, xmm1; jmp <7
3621 |2: ; cmp foldop, 9; ja >2
3622 | fld qword [esp+4] // Reload from stack
3623 | fld qword [esp+12]
3624 | je >1
3625 | fpatan; ret
3626 |1: ; fxch; fscale; fpop1; ret
3627 |2: ; cmp foldop, 11; je >1; ja >9
3628 | minsd xmm0, xmm1; jmp <7
3629 |1: ; maxsd xmm0, xmm1; jmp <7
3630 |9: ; int3 // Bad op.
3631 |
3632 |.endif
3633 } else {
3634 | mov eax, [esp+20]
3635 | fld qword [esp+4]
3636 | fld qword [esp+12]
3637 | cmp eax, 1; je >1; ja >2
3638 | faddp st1; ret
3639 |1: ; fsubp st1; ret
3640 |2: ; cmp eax, 3; je >1; ja >2
3641 | fmulp st1; ret
3642 |1: ; fdivp st1; ret
3643 |2: ; cmp eax, 5; jb ->vm_mod; je ->vm_pow
3644 | cmp eax, 7; je >1; ja >2
3645 | fpop; fchs; ret
3646 |1: ; fpop; fabs; ret
3647 |2: ; cmp eax, 9; je >1; ja >2
3648 | fpatan; ret
3649 |1: ; fxch; fscale; fpop1; ret
3650 |2: ; cmp eax, 11; je >1; ja >9
3651 ||if (cmov) {
3652 | fucomi st1; fcmovnbe st1; fpop1; ret
3653 |1: ; fucomi st1; fcmovbe st1; fpop1; ret
3654 ||} else {
3655 | fucom st1; fnstsw ax; test ah, 1; jz >2; fxch; 2: ; fpop; ret
3656 |1: ; fucom st1; fnstsw ax; test ah, 1; jnz >2; fxch; 2: ; fpop; ret
3657 ||}
3658 |9: ; int3 // Bad op.
3659 }
3660 |
3661 |//-----------------------------------------------------------------------
3662 |//-- Miscellaneous functions --------------------------------------------
3663 |//-----------------------------------------------------------------------
3664 |
3665 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
3666 |->vm_cpuid:
3667 |.if X64
3668 | mov eax, CARG1d
3669 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3670 | push rbx
3671 | cpuid
3672 | mov [rsi], eax
3673 | mov [rsi+4], ebx
3674 | mov [rsi+8], ecx
3675 | mov [rsi+12], edx
3676 | pop rbx
3677 | .if X64WIN; pop rsi; .endif
3678 | ret
3679 |.else
3680 | pushfd
3681 | pop edx
3682 | mov ecx, edx
3683 | xor edx, 0x00200000 // Toggle ID bit in flags.
3684 | push edx
3685 | popfd
3686 | pushfd
3687 | pop edx
3688 | xor eax, eax // Zero means no features supported.
3689 | cmp ecx, edx
3690 | jz >1 // No ID toggle means no CPUID support.
3691 | mov eax, [esp+4] // Argument 1 is function number.
3692 | push edi
3693 | push ebx
3694 | cpuid
3695 | mov edi, [esp+16] // Argument 2 is result area.
3696 | mov [edi], eax
3697 | mov [edi+4], ebx
3698 | mov [edi+8], ecx
3699 | mov [edi+12], edx
3700 | pop ebx
3701 | pop edi
3702 |1:
3703 | ret
3704 |.endif
3705 |
3706 |//-----------------------------------------------------------------------
3707 |//-- Assertions ---------------------------------------------------------
3708 |//-----------------------------------------------------------------------
3709 |
3710 |->assert_bad_for_arg_type:
3711#ifdef LUA_USE_ASSERT
3712 | int3
3713#endif
3714 | int3
3715 |
3716 |//-----------------------------------------------------------------------
3717 |//-- FFI helper functions -----------------------------------------------
3718 |//-----------------------------------------------------------------------
3719 |
3720 |// Handler for callback functions. Callback slot number in ah/al.
3721 |->vm_ffi_callback:
3722#if LJ_HASFFI
3723 |.type CTSTATE, CTState, PC
3724 |.if not X64
3725 | sub esp, 16 // Leave room for SAVE_ERRF etc.
3726 |.endif
3727 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
3728 | lea DISPATCH, [ebp+GG_G2DISP]
3729 | mov CTSTATE, GL:ebp->ctype_state
3730 | movzx eax, ax
3731 | mov CTSTATE->cb.slot, eax
3732 |.if X64
3733 | mov CTSTATE->cb.gpr[0], CARG1
3734 | mov CTSTATE->cb.gpr[1], CARG2
3735 | mov CTSTATE->cb.gpr[2], CARG3
3736 | mov CTSTATE->cb.gpr[3], CARG4
3737 | movsd qword CTSTATE->cb.fpr[0], xmm0
3738 | movsd qword CTSTATE->cb.fpr[1], xmm1
3739 | movsd qword CTSTATE->cb.fpr[2], xmm2
3740 | movsd qword CTSTATE->cb.fpr[3], xmm3
3741 |.if X64WIN
3742 | lea rax, [rsp+CFRAME_SIZE+4*8]
3743 |.else
3744 | lea rax, [rsp+CFRAME_SIZE]
3745 | mov CTSTATE->cb.gpr[4], CARG5
3746 | mov CTSTATE->cb.gpr[5], CARG6
3747 | movsd qword CTSTATE->cb.fpr[4], xmm4
3748 | movsd qword CTSTATE->cb.fpr[5], xmm5
3749 | movsd qword CTSTATE->cb.fpr[6], xmm6
3750 | movsd qword CTSTATE->cb.fpr[7], xmm7
3751 |.endif
3752 | mov CTSTATE->cb.stack, rax
3753 | mov CARG2, rsp
3754 |.else
3755 | lea eax, [esp+CFRAME_SIZE+16]
3756 | mov CTSTATE->cb.gpr[0], FCARG1
3757 | mov CTSTATE->cb.gpr[1], FCARG2
3758 | mov CTSTATE->cb.stack, eax
3759 | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
3760 | mov FCARG2, [esp+CFRAME_SIZE+8]
3761 | mov SAVE_RET, FCARG1
3762 | mov SAVE_R4, FCARG2
3763 | mov FCARG2, esp
3764 |.endif
3765 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
3766 | mov FCARG1, CTSTATE
3767 | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
3768 | // lua_State * returned in eax (RD).
3769 | set_vmstate INTERP
3770 | mov BASE, L:RD->base
3771 | mov RD, L:RD->top
3772 | sub RD, BASE
3773 | mov LFUNC:RB, [BASE-8]
3774 | shr RD, 3
3775 | add RD, 1
3776 | ins_callt
3777#endif
3778 |
3779 |->cont_ffi_callback: // Return from FFI callback.
3780#if LJ_HASFFI
3781 | mov L:RA, SAVE_L
3782 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
3783 | mov aword CTSTATE->L, L:RAa
3784 | mov L:RA->base, BASE
3785 | mov L:RA->top, RB
3786 | mov FCARG1, CTSTATE
3787 | mov FCARG2, RC
3788 | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
3789 |.if X64
3790 | mov rax, CTSTATE->cb.gpr[0]
3791 | movsd xmm0, qword CTSTATE->cb.fpr[0]
3792 | jmp ->vm_leave_unw
3793 |.else
3794 | mov L:RB, SAVE_L
3795 | mov eax, CTSTATE->cb.gpr[0]
3796 | mov edx, CTSTATE->cb.gpr[1]
3797 | cmp dword CTSTATE->cb.gpr[2], 1
3798 | jb >7
3799 | je >6
3800 | fld qword CTSTATE->cb.fpr[0].d
3801 | jmp >7
3802 |6:
3803 | fld dword CTSTATE->cb.fpr[0].f
3804 |7:
3805 | mov ecx, L:RB->top
3806 | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
3807 | mov SAVE_L, ecx // Must be one slot above SAVE_RET
3808 | restoreregs
3809 | pop ecx // Move return addr from SAVE_RET.
3810 | add esp, [esp] // Adjust stack.
3811 | add esp, 16
3812 | push ecx
3813 | ret
3814 |.endif
3815#endif
3816 |
3817 |->vm_ffi_call@4: // Call C function via FFI.
3818 | // Caveat: needs special frame unwinding, see below.
3819#if LJ_HASFFI
3820 |.if X64
3821 | .type CCSTATE, CCallState, rbx
3822 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
3823 |.else
3824 | .type CCSTATE, CCallState, ebx
3825 | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
3826 |.endif
3827 |
3828 | // Readjust stack.
3829 |.if X64
3830 | mov eax, CCSTATE->spadj
3831 | sub rsp, rax
3832 |.else
3833 | sub esp, CCSTATE->spadj
3834#if LJ_TARGET_WINDOWS
3835 | mov CCSTATE->spadj, esp
3836#endif
3837 |.endif
3838 |
3839 | // Copy stack slots.
3840 | movzx ecx, byte CCSTATE->nsp
3841 | sub ecx, 1
3842 | js >2
3843 |1:
3844 |.if X64
3845 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
3846 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
3847 |.else
3848 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
3849 | mov [esp+ecx*4], eax
3850 |.endif
3851 | sub ecx, 1
3852 | jns <1
3853 |2:
3854 |
3855 |.if X64
3856 | movzx eax, byte CCSTATE->nfpr
3857 | mov CARG1, CCSTATE->gpr[0]
3858 | mov CARG2, CCSTATE->gpr[1]
3859 | mov CARG3, CCSTATE->gpr[2]
3860 | mov CARG4, CCSTATE->gpr[3]
3861 |.if not X64WIN
3862 | mov CARG5, CCSTATE->gpr[4]
3863 | mov CARG6, CCSTATE->gpr[5]
3864 |.endif
3865 | test eax, eax; jz >5
3866 | movaps xmm0, CCSTATE->fpr[0]
3867 | movaps xmm1, CCSTATE->fpr[1]
3868 | movaps xmm2, CCSTATE->fpr[2]
3869 | movaps xmm3, CCSTATE->fpr[3]
3870 |.if not X64WIN
3871 | cmp eax, 4; jbe >5
3872 | movaps xmm4, CCSTATE->fpr[4]
3873 | movaps xmm5, CCSTATE->fpr[5]
3874 | movaps xmm6, CCSTATE->fpr[6]
3875 | movaps xmm7, CCSTATE->fpr[7]
3876 |.endif
3877 |5:
3878 |.else
3879 | mov FCARG1, CCSTATE->gpr[0]
3880 | mov FCARG2, CCSTATE->gpr[1]
3881 |.endif
3882 |
3883 | call aword CCSTATE->func
3884 |
3885 |.if X64
3886 | mov CCSTATE->gpr[0], rax
3887 | movaps CCSTATE->fpr[0], xmm0
3888 |.if not X64WIN
3889 | mov CCSTATE->gpr[1], rdx
3890 | movaps CCSTATE->fpr[1], xmm1
3891 |.endif
3892 |.else
3893 | mov CCSTATE->gpr[0], eax
3894 | mov CCSTATE->gpr[1], edx
3895 | cmp byte CCSTATE->resx87, 1
3896 | jb >7
3897 | je >6
3898 | fstp qword CCSTATE->fpr[0].d[0]
3899 | jmp >7
3900 |6:
3901 | fstp dword CCSTATE->fpr[0].f[0]
3902 |7:
3903#if LJ_TARGET_WINDOWS
3904 | sub CCSTATE->spadj, esp
3905#endif
3906 |.endif
3907 |
3908 |.if X64
3909 | mov rbx, [rbp-8]; leave; ret
3910 |.else
3911 | mov ebx, [ebp-4]; leave; ret
3912 |.endif
3913#endif
3914 |// Note: vm_ffi_call must be the last function in this object file!
3915 |
3916 |//-----------------------------------------------------------------------
3917}
3918
3919/* Generate the code for a single instruction. */
3920static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
3921{
3922 int vk = 0;
3923 |// Note: aligning all instructions does not pay off.
3924 |=>defop:
3925
3926 switch (op) {
3927
3928 /* -- Comparison ops ---------------------------------------------------- */
3929
3930 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3931
3932 |.macro jmp_comp, lt, ge, le, gt, target
3933 ||switch (op) {
3934 ||case BC_ISLT:
3935 | lt target
3936 ||break;
3937 ||case BC_ISGE:
3938 | ge target
3939 ||break;
3940 ||case BC_ISLE:
3941 | le target
3942 ||break;
3943 ||case BC_ISGT:
3944 | gt target
3945 ||break;
3946 ||default: break; /* Shut up GCC. */
3947 ||}
3948 |.endmacro
3949
3950 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3951 | // RA = src1, RD = src2, JMP with RD = target
3952 | ins_AD
3953 if (LJ_DUALNUM) {
3954 | checkint RA, >7
3955 | checkint RD, >8
3956 | mov RB, dword [BASE+RA*8]
3957 | add PC, 4
3958 | cmp RB, dword [BASE+RD*8]
3959 | jmp_comp jge, jl, jg, jle, >9
3960 |6:
3961 | movzx RD, PC_RD
3962 | branchPC RD
3963 |9:
3964 | ins_next
3965 |
3966 |7: // RA is not an integer.
3967 | ja ->vmeta_comp
3968 | // RA is a number.
3969 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3970 | // RA is a number, RD is an integer.
3971 if (sse) {
3972 | cvtsi2sd xmm0, dword [BASE+RD*8]
3973 | jmp >2
3974 } else {
3975 | fld qword [BASE+RA*8]
3976 | fild dword [BASE+RD*8]
3977 | jmp >3
3978 }
3979 |
3980 |8: // RA is an integer, RD is not an integer.
3981 | ja ->vmeta_comp
3982 | // RA is an integer, RD is a number.
3983 if (sse) {
3984 | cvtsi2sd xmm1, dword [BASE+RA*8]
3985 | movsd xmm0, qword [BASE+RD*8]
3986 | add PC, 4
3987 | ucomisd xmm0, xmm1
3988 | jmp_comp jbe, ja, jb, jae, <9
3989 | jmp <6
3990 } else {
3991 | fild dword [BASE+RA*8]
3992 | jmp >2
3993 }
3994 } else {
3995 | checknum RA, ->vmeta_comp
3996 | checknum RD, ->vmeta_comp
3997 }
3998 if (sse) {
3999 |1:
4000 | movsd xmm0, qword [BASE+RD*8]
4001 |2:
4002 | add PC, 4
4003 | ucomisd xmm0, qword [BASE+RA*8]
4004 |3:
4005 } else {
4006 |1:
4007 | fld qword [BASE+RA*8] // Reverse order, i.e like cmp D, A.
4008 |2:
4009 | fld qword [BASE+RD*8]
4010 |3:
4011 | add PC, 4
4012 | fcomparepp // eax (RD) modified!
4013 }
4014 | // Unordered: all of ZF CF PF set, ordered: PF clear.
4015 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
4016 if (LJ_DUALNUM) {
4017 | jmp_comp jbe, ja, jb, jae, <9
4018 | jmp <6
4019 } else {
4020 | jmp_comp jbe, ja, jb, jae, >1
4021 | movzx RD, PC_RD
4022 | branchPC RD
4023 |1:
4024 | ins_next
4025 }
4026 break;
4027
4028 case BC_ISEQV: case BC_ISNEV:
4029 vk = op == BC_ISEQV;
4030 | ins_AD // RA = src1, RD = src2, JMP with RD = target
4031 | mov RB, [BASE+RD*8+4]
4032 | add PC, 4
4033 if (LJ_DUALNUM) {
4034 | cmp RB, LJ_TISNUM; jne >7
4035 | checkint RA, >8
4036 | mov RB, dword [BASE+RD*8]
4037 | cmp RB, dword [BASE+RA*8]
4038 if (vk) {
4039 | jne >9
4040 } else {
4041 | je >9
4042 }
4043 | movzx RD, PC_RD
4044 | branchPC RD
4045 |9:
4046 | ins_next
4047 |
4048 |7: // RD is not an integer.
4049 | ja >5
4050 | // RD is a number.
4051 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
4052 | // RD is a number, RA is an integer.
4053 if (sse) {
4054 | cvtsi2sd xmm0, dword [BASE+RA*8]
4055 } else {
4056 | fild dword [BASE+RA*8]
4057 }
4058 | jmp >2
4059 |
4060 |8: // RD is an integer, RA is not an integer.
4061 | ja >5
4062 | // RD is an integer, RA is a number.
4063 if (sse) {
4064 | cvtsi2sd xmm0, dword [BASE+RD*8]
4065 | ucomisd xmm0, qword [BASE+RA*8]
4066 } else {
4067 | fild dword [BASE+RD*8]
4068 | fld qword [BASE+RA*8]
4069 }
4070 | jmp >4
4071 |
4072 } else {
4073 | cmp RB, LJ_TISNUM; jae >5
4074 | checknum RA, >5
4075 }
4076 if (sse) {
4077 |1:
4078 | movsd xmm0, qword [BASE+RA*8]
4079 |2:
4080 | ucomisd xmm0, qword [BASE+RD*8]
4081 |4:
4082 } else {
4083 |1:
4084 | fld qword [BASE+RA*8]
4085 |2:
4086 | fld qword [BASE+RD*8]
4087 |4:
4088 | fcomparepp // eax (RD) modified!
4089 }
4090 iseqne_fp:
4091 if (vk) {
4092 | jp >2 // Unordered means not equal.
4093 | jne >2
4094 } else {
4095 | jp >2 // Unordered means not equal.
4096 | je >1
4097 }
4098 iseqne_end:
4099 if (vk) {
4100 |1: // EQ: Branch to the target.
4101 | movzx RD, PC_RD
4102 | branchPC RD
4103 |2: // NE: Fallthrough to next instruction.
4104 if (!LJ_HASFFI) {
4105 |3:
4106 }
4107 } else {
4108 if (!LJ_HASFFI) {
4109 |3:
4110 }
4111 |2: // NE: Branch to the target.
4112 | movzx RD, PC_RD
4113 | branchPC RD
4114 |1: // EQ: Fallthrough to next instruction.
4115 }
4116 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
4117 op == BC_ISEQN || op == BC_ISNEN)) {
4118 | jmp <9
4119 } else {
4120 | ins_next
4121 }
4122 |
4123 if (op == BC_ISEQV || op == BC_ISNEV) {
4124 |5: // Either or both types are not numbers.
4125 if (LJ_HASFFI) {
4126 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
4127 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
4128 }
4129 | checktp RA, RB // Compare types.
4130 | jne <2 // Not the same type?
4131 | cmp RB, LJ_TISPRI
4132 | jae <1 // Same type and primitive type?
4133 |
4134 | // Same types and not a primitive type. Compare GCobj or pvalue.
4135 | mov RA, [BASE+RA*8]
4136 | mov RD, [BASE+RD*8]
4137 | cmp RA, RD
4138 | je <1 // Same GCobjs or pvalues?
4139 | cmp RB, LJ_TISTABUD
4140 | ja <2 // Different objects and not table/ud?
4141 |.if X64
4142 | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
4143 | jb <2
4144 |.endif
4145 |
4146 | // Different tables or userdatas. Need to check __eq metamethod.
4147 | // Field metatable must be at same offset for GCtab and GCudata!
4148 | mov TAB:RB, TAB:RA->metatable
4149 | test TAB:RB, TAB:RB
4150 | jz <2 // No metatable?
4151 | test byte TAB:RB->nomm, 1<<MM_eq
4152 | jnz <2 // Or 'no __eq' flag set?
4153 if (vk) {
4154 | xor RB, RB // ne = 0
4155 } else {
4156 | mov RB, 1 // ne = 1
4157 }
4158 | jmp ->vmeta_equal // Handle __eq metamethod.
4159 } else if (LJ_HASFFI) {
4160 |3:
4161 | cmp RB, LJ_TCDATA
4162 if (LJ_DUALNUM && vk) {
4163 | jne <9
4164 } else {
4165 | jne <2
4166 }
4167 | jmp ->vmeta_equal_cd
4168 }
4169 break;
4170 case BC_ISEQS: case BC_ISNES:
4171 vk = op == BC_ISEQS;
4172 | ins_AND // RA = src, RD = str const, JMP with RD = target
4173 | mov RB, [BASE+RA*8+4]
4174 | add PC, 4
4175 | cmp RB, LJ_TSTR; jne >3
4176 | mov RA, [BASE+RA*8]
4177 | cmp RA, [KBASE+RD*4]
4178 iseqne_test:
4179 if (vk) {
4180 | jne >2
4181 } else {
4182 | je >1
4183 }
4184 goto iseqne_end;
4185 case BC_ISEQN: case BC_ISNEN:
4186 vk = op == BC_ISEQN;
4187 | ins_AD // RA = src, RD = num const, JMP with RD = target
4188 | mov RB, [BASE+RA*8+4]
4189 | add PC, 4
4190 if (LJ_DUALNUM) {
4191 | cmp RB, LJ_TISNUM; jne >7
4192 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
4193 | mov RB, dword [KBASE+RD*8]
4194 | cmp RB, dword [BASE+RA*8]
4195 if (vk) {
4196 | jne >9
4197 } else {
4198 | je >9
4199 }
4200 | movzx RD, PC_RD
4201 | branchPC RD
4202 |9:
4203 | ins_next
4204 |
4205 |7: // RA is not an integer.
4206 | ja >3
4207 | // RA is a number.
4208 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
4209 | // RA is a number, RD is an integer.
4210 if (sse) {
4211 | cvtsi2sd xmm0, dword [KBASE+RD*8]
4212 } else {
4213 | fild dword [KBASE+RD*8]
4214 }
4215 | jmp >2
4216 |
4217 |8: // RA is an integer, RD is a number.
4218 if (sse) {
4219 | cvtsi2sd xmm0, dword [BASE+RA*8]
4220 | ucomisd xmm0, qword [KBASE+RD*8]
4221 } else {
4222 | fild dword [BASE+RA*8]
4223 | fld qword [KBASE+RD*8]
4224 }
4225 | jmp >4
4226 } else {
4227 | cmp RB, LJ_TISNUM; jae >3
4228 }
4229 if (sse) {
4230 |1:
4231 | movsd xmm0, qword [KBASE+RD*8]
4232 |2:
4233 | ucomisd xmm0, qword [BASE+RA*8]
4234 |4:
4235 } else {
4236 |1:
4237 | fld qword [KBASE+RD*8]
4238 |2:
4239 | fld qword [BASE+RA*8]
4240 |4:
4241 | fcomparepp // eax (RD) modified!
4242 }
4243 goto iseqne_fp;
4244 case BC_ISEQP: case BC_ISNEP:
4245 vk = op == BC_ISEQP;
4246 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
4247 | mov RB, [BASE+RA*8+4]
4248 | add PC, 4
4249 | cmp RB, RD
4250 if (!LJ_HASFFI) goto iseqne_test;
4251 if (vk) {
4252 | jne >3
4253 | movzx RD, PC_RD
4254 | branchPC RD
4255 |2:
4256 | ins_next
4257 |3:
4258 | cmp RB, LJ_TCDATA; jne <2
4259 | jmp ->vmeta_equal_cd
4260 } else {
4261 | je >2
4262 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
4263 | movzx RD, PC_RD
4264 | branchPC RD
4265 |2:
4266 | ins_next
4267 }
4268 break;
4269
4270 /* -- Unary test and copy ops ------------------------------------------- */
4271
4272 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
4273 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
4274 | mov RB, [BASE+RD*8+4]
4275 | add PC, 4
4276 | cmp RB, LJ_TISTRUECOND
4277 if (op == BC_IST || op == BC_ISTC) {
4278 | jae >1
4279 } else {
4280 | jb >1
4281 }
4282 if (op == BC_ISTC || op == BC_ISFC) {
4283 | mov [BASE+RA*8+4], RB
4284 | mov RB, [BASE+RD*8]
4285 | mov [BASE+RA*8], RB
4286 }
4287 | movzx RD, PC_RD
4288 | branchPC RD
4289 |1: // Fallthrough to the next instruction.
4290 | ins_next
4291 break;
4292
4293 /* -- Unary ops --------------------------------------------------------- */
4294
4295 case BC_MOV:
4296 | ins_AD // RA = dst, RD = src
4297 |.if X64
4298 | mov RBa, [BASE+RD*8]
4299 | mov [BASE+RA*8], RBa
4300 |.else
4301 | mov RB, [BASE+RD*8+4]
4302 | mov RD, [BASE+RD*8]
4303 | mov [BASE+RA*8+4], RB
4304 | mov [BASE+RA*8], RD
4305 |.endif
4306 | ins_next_
4307 break;
4308 case BC_NOT:
4309 | ins_AD // RA = dst, RD = src
4310 | xor RB, RB
4311 | checktp RD, LJ_TISTRUECOND
4312 | adc RB, LJ_TTRUE
4313 | mov [BASE+RA*8+4], RB
4314 | ins_next
4315 break;
4316 case BC_UNM:
4317 | ins_AD // RA = dst, RD = src
4318 if (LJ_DUALNUM) {
4319 | checkint RD, >5
4320 | mov RB, [BASE+RD*8]
4321 | neg RB
4322 | jo >4
4323 | mov dword [BASE+RA*8+4], LJ_TISNUM
4324 | mov dword [BASE+RA*8], RB
4325 |9:
4326 | ins_next
4327 |4:
4328 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
4329 | mov dword [BASE+RA*8], 0
4330 | jmp <9
4331 |5:
4332 | ja ->vmeta_unm
4333 } else {
4334 | checknum RD, ->vmeta_unm
4335 }
4336 if (sse) {
4337 | movsd xmm0, qword [BASE+RD*8]
4338 | sseconst_sign xmm1, RDa
4339 | xorps xmm0, xmm1
4340 | movsd qword [BASE+RA*8], xmm0
4341 } else {
4342 | fld qword [BASE+RD*8]
4343 | fchs
4344 | fstp qword [BASE+RA*8]
4345 }
4346 if (LJ_DUALNUM) {
4347 | jmp <9
4348 } else {
4349 | ins_next
4350 }
4351 break;
4352 case BC_LEN:
4353 | ins_AD // RA = dst, RD = src
4354 | checkstr RD, >2
4355 | mov STR:RD, [BASE+RD*8]
4356 if (LJ_DUALNUM) {
4357 | mov RD, dword STR:RD->len
4358 |1:
4359 | mov dword [BASE+RA*8+4], LJ_TISNUM
4360 | mov dword [BASE+RA*8], RD
4361 } else if (sse) {
4362 | xorps xmm0, xmm0
4363 | cvtsi2sd xmm0, dword STR:RD->len
4364 |1:
4365 | movsd qword [BASE+RA*8], xmm0
4366 } else {
4367 | fild dword STR:RD->len
4368 |1:
4369 | fstp qword [BASE+RA*8]
4370 }
4371 | ins_next
4372 |2:
4373 | checktab RD, ->vmeta_len
4374 | mov TAB:FCARG1, [BASE+RD*8]
4375#ifdef LUAJIT_ENABLE_LUA52COMPAT
4376 | mov TAB:RB, TAB:FCARG1->metatable
4377 | cmp TAB:RB, 0
4378 | jnz >9
4379 |3:
4380#endif
4381 |->BC_LEN_Z:
4382 | mov RB, BASE // Save BASE.
4383 | call extern lj_tab_len@4 // (GCtab *t)
4384 | // Length of table returned in eax (RD).
4385 if (LJ_DUALNUM) {
4386 | // Nothing to do.
4387 } else if (sse) {
4388 | cvtsi2sd xmm0, RD
4389 } else {
4390 |.if not X64
4391 | mov ARG1, RD
4392 | fild ARG1
4393 |.endif
4394 }
4395 | mov BASE, RB // Restore BASE.
4396 | movzx RA, PC_RA
4397 | jmp <1
4398#ifdef LUAJIT_ENABLE_LUA52COMPAT
4399 |9: // Check for __len.
4400 | test byte TAB:RB->nomm, 1<<MM_len
4401 | jnz <3
4402 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
4403#endif
4404 break;
4405
4406 /* -- Binary ops -------------------------------------------------------- */
4407
4408 |.macro ins_arithpre, x87ins, sseins, ssereg
4409 | ins_ABC
4410 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4411 ||switch (vk) {
4412 ||case 0:
4413 | checknum RB, ->vmeta_arith_vn
4414 ||if (LJ_DUALNUM) {
4415 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
4416 ||}
4417 ||if (sse) {
4418 | movsd xmm0, qword [BASE+RB*8]
4419 | sseins ssereg, qword [KBASE+RC*8]
4420 ||} else {
4421 | fld qword [BASE+RB*8]
4422 | x87ins qword [KBASE+RC*8]
4423 ||}
4424 || break;
4425 ||case 1:
4426 | checknum RB, ->vmeta_arith_nv
4427 ||if (LJ_DUALNUM) {
4428 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
4429 ||}
4430 ||if (sse) {
4431 | movsd xmm0, qword [KBASE+RC*8]
4432 | sseins ssereg, qword [BASE+RB*8]
4433 ||} else {
4434 | fld qword [KBASE+RC*8]
4435 | x87ins qword [BASE+RB*8]
4436 ||}
4437 || break;
4438 ||default:
4439 | checknum RB, ->vmeta_arith_vv
4440 | checknum RC, ->vmeta_arith_vv
4441 ||if (sse) {
4442 | movsd xmm0, qword [BASE+RB*8]
4443 | sseins ssereg, qword [BASE+RC*8]
4444 ||} else {
4445 | fld qword [BASE+RB*8]
4446 | x87ins qword [BASE+RC*8]
4447 ||}
4448 || break;
4449 ||}
4450 |.endmacro
4451 |
4452 |.macro ins_arithdn, intins
4453 | ins_ABC
4454 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
4455 ||switch (vk) {
4456 ||case 0:
4457 | checkint RB, ->vmeta_arith_vn
4458 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
4459 | mov RB, [BASE+RB*8]
4460 | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
4461 || break;
4462 ||case 1:
4463 | checkint RB, ->vmeta_arith_nv
4464 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
4465 | mov RC, [KBASE+RC*8]
4466 | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
4467 || break;
4468 ||default:
4469 | checkint RB, ->vmeta_arith_vv
4470 | checkint RC, ->vmeta_arith_vv
4471 | mov RB, [BASE+RB*8]
4472 | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
4473 || break;
4474 ||}
4475 | mov dword [BASE+RA*8+4], LJ_TISNUM
4476 ||if (vk == 1) {
4477 | mov dword [BASE+RA*8], RC
4478 ||} else {
4479 | mov dword [BASE+RA*8], RB
4480 ||}
4481 | ins_next
4482 |.endmacro
4483 |
4484 |.macro ins_arithpost
4485 ||if (sse) {
4486 | movsd qword [BASE+RA*8], xmm0
4487 ||} else {
4488 | fstp qword [BASE+RA*8]
4489 ||}
4490 |.endmacro
4491 |
4492 |.macro ins_arith, x87ins, sseins
4493 | ins_arithpre x87ins, sseins, xmm0
4494 | ins_arithpost
4495 | ins_next
4496 |.endmacro
4497 |
4498 |.macro ins_arith, intins, x87ins, sseins
4499 ||if (LJ_DUALNUM) {
4500 | ins_arithdn intins
4501 ||} else {
4502 | ins_arith, x87ins, sseins
4503 ||}
4504 |.endmacro
4505
4506 | // RA = dst, RB = src1 or num const, RC = src2 or num const
4507 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
4508 | ins_arith add, fadd, addsd
4509 break;
4510 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
4511 | ins_arith sub, fsub, subsd
4512 break;
4513 case BC_MULVN: case BC_MULNV: case BC_MULVV:
4514 | ins_arith imul, fmul, mulsd
4515 break;
4516 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
4517 | ins_arith fdiv, divsd
4518 break;
4519 case BC_MODVN:
4520 | ins_arithpre fld, movsd, xmm1
4521 |->BC_MODVN_Z:
4522 | call ->vm_mod
4523 | ins_arithpost
4524 | ins_next
4525 break;
4526 case BC_MODNV: case BC_MODVV:
4527 | ins_arithpre fld, movsd, xmm1
4528 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
4529 break;
4530 case BC_POW:
4531 | ins_arithpre fld, movsd, xmm1
4532 | call ->vm_pow
4533 | ins_arithpost
4534 | ins_next
4535 break;
4536
4537 case BC_CAT:
4538 | ins_ABC // RA = dst, RB = src_start, RC = src_end
4539 |.if X64
4540 | mov L:CARG1d, SAVE_L
4541 | mov L:CARG1d->base, BASE
4542 | lea CARG2d, [BASE+RC*8]
4543 | mov CARG3d, RC
4544 | sub CARG3d, RB
4545 |->BC_CAT_Z:
4546 | mov L:RB, L:CARG1d
4547 |.else
4548 | lea RA, [BASE+RC*8]
4549 | sub RC, RB
4550 | mov ARG2, RA
4551 | mov ARG3, RC
4552 |->BC_CAT_Z:
4553 | mov L:RB, SAVE_L
4554 | mov ARG1, L:RB
4555 | mov L:RB->base, BASE
4556 |.endif
4557 | mov SAVE_PC, PC
4558 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
4559 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
4560 | mov BASE, L:RB->base
4561 | test RC, RC
4562 | jnz ->vmeta_binop
4563 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
4564 | movzx RA, PC_RA
4565 |.if X64
4566 | mov RCa, [BASE+RB*8]
4567 | mov [BASE+RA*8], RCa
4568 |.else
4569 | mov RC, [BASE+RB*8+4]
4570 | mov RB, [BASE+RB*8]
4571 | mov [BASE+RA*8+4], RC
4572 | mov [BASE+RA*8], RB
4573 |.endif
4574 | ins_next
4575 break;
4576
4577 /* -- Constant ops ------------------------------------------------------ */
4578
4579 case BC_KSTR:
4580 | ins_AND // RA = dst, RD = str const (~)
4581 | mov RD, [KBASE+RD*4]
4582 | mov dword [BASE+RA*8+4], LJ_TSTR
4583 | mov [BASE+RA*8], RD
4584 | ins_next
4585 break;
4586 case BC_KCDATA:
4587#if LJ_HASFFI
4588 | ins_AND // RA = dst, RD = cdata const (~)
4589 | mov RD, [KBASE+RD*4]
4590 | mov dword [BASE+RA*8+4], LJ_TCDATA
4591 | mov [BASE+RA*8], RD
4592 | ins_next
4593#endif
4594 break;
4595 case BC_KSHORT:
4596 | ins_AD // RA = dst, RD = signed int16 literal
4597 if (LJ_DUALNUM) {
4598 | movsx RD, RDW
4599 | mov dword [BASE+RA*8+4], LJ_TISNUM
4600 | mov dword [BASE+RA*8], RD
4601 } else if (sse) {
4602 | movsx RD, RDW // Sign-extend literal.
4603 | cvtsi2sd xmm0, RD
4604 | movsd qword [BASE+RA*8], xmm0
4605 } else {
4606 | fild PC_RD // Refetch signed RD from instruction.
4607 | fstp qword [BASE+RA*8]
4608 }
4609 | ins_next
4610 break;
4611 case BC_KNUM:
4612 | ins_AD // RA = dst, RD = num const
4613 if (sse) {
4614 | movsd xmm0, qword [KBASE+RD*8]
4615 | movsd qword [BASE+RA*8], xmm0
4616 } else {
4617 | fld qword [KBASE+RD*8]
4618 | fstp qword [BASE+RA*8]
4619 }
4620 | ins_next
4621 break;
4622 case BC_KPRI:
4623 | ins_AND // RA = dst, RD = primitive type (~)
4624 | mov [BASE+RA*8+4], RD
4625 | ins_next
4626 break;
4627 case BC_KNIL:
4628 | ins_AD // RA = dst_start, RD = dst_end
4629 | lea RA, [BASE+RA*8+12]
4630 | lea RD, [BASE+RD*8+4]
4631 | mov RB, LJ_TNIL
4632 | mov [RA-8], RB // Sets minimum 2 slots.
4633 |1:
4634 | mov [RA], RB
4635 | add RA, 8
4636 | cmp RA, RD
4637 | jbe <1
4638 | ins_next
4639 break;
4640
4641 /* -- Upvalue and function ops ------------------------------------------ */
4642
4643 case BC_UGET:
4644 | ins_AD // RA = dst, RD = upvalue #
4645 | mov LFUNC:RB, [BASE-8]
4646 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
4647 | mov RB, UPVAL:RB->v
4648 |.if X64
4649 | mov RDa, [RB]
4650 | mov [BASE+RA*8], RDa
4651 |.else
4652 | mov RD, [RB+4]
4653 | mov RB, [RB]
4654 | mov [BASE+RA*8+4], RD
4655 | mov [BASE+RA*8], RB
4656 |.endif
4657 | ins_next
4658 break;
4659 case BC_USETV:
4660#define TV2MARKOFS \
4661 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
4662 | ins_AD // RA = upvalue #, RD = src
4663 | mov LFUNC:RB, [BASE-8]
4664 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4665 | cmp byte UPVAL:RB->closed, 0
4666 | mov RB, UPVAL:RB->v
4667 | mov RA, [BASE+RD*8]
4668 | mov RD, [BASE+RD*8+4]
4669 | mov [RB], RA
4670 | mov [RB+4], RD
4671 | jz >1
4672 | // Check barrier for closed upvalue.
4673 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
4674 | jnz >2
4675 |1:
4676 | ins_next
4677 |
4678 |2: // Upvalue is black. Check if new value is collectable and white.
4679 | sub RD, LJ_TISGCV
4680 | cmp RD, LJ_TISNUM - LJ_TISGCV // tvisgcv(v)
4681 | jbe <1
4682 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
4683 | jz <1
4684 | // Crossed a write barrier. Move the barrier forward.
4685 |.if X64 and not X64WIN
4686 | mov FCARG2, RB
4687 | mov RB, BASE // Save BASE.
4688 |.else
4689 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
4690 |.endif
4691 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
4692 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
4693 | mov BASE, RB // Restore BASE.
4694 | jmp <1
4695 break;
4696#undef TV2MARKOFS
4697 case BC_USETS:
4698 | ins_AND // RA = upvalue #, RD = str const (~)
4699 | mov LFUNC:RB, [BASE-8]
4700 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4701 | mov GCOBJ:RA, [KBASE+RD*4]
4702 | mov RD, UPVAL:RB->v
4703 | mov [RD], GCOBJ:RA
4704 | mov dword [RD+4], LJ_TSTR
4705 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
4706 | jnz >2
4707 |1:
4708 | ins_next
4709 |
4710 |2: // Check if string is white and ensure upvalue is closed.
4711 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
4712 | jz <1
4713 | cmp byte UPVAL:RB->closed, 0
4714 | jz <1
4715 | // Crossed a write barrier. Move the barrier forward.
4716 | mov RB, BASE // Save BASE (FCARG2 == BASE).
4717 | mov FCARG2, RD
4718 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
4719 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
4720 | mov BASE, RB // Restore BASE.
4721 | jmp <1
4722 break;
4723 case BC_USETN:
4724 | ins_AD // RA = upvalue #, RD = num const
4725 | mov LFUNC:RB, [BASE-8]
4726 if (sse) {
4727 | movsd xmm0, qword [KBASE+RD*8]
4728 } else {
4729 | fld qword [KBASE+RD*8]
4730 }
4731 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4732 | mov RA, UPVAL:RB->v
4733 if (sse) {
4734 | movsd qword [RA], xmm0
4735 } else {
4736 | fstp qword [RA]
4737 }
4738 | ins_next
4739 break;
4740 case BC_USETP:
4741 | ins_AND // RA = upvalue #, RD = primitive type (~)
4742 | mov LFUNC:RB, [BASE-8]
4743 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4744 | mov RA, UPVAL:RB->v
4745 | mov [RA+4], RD
4746 | ins_next
4747 break;
4748 case BC_UCLO:
4749 | ins_AD // RA = level, RD = target
4750 | branchPC RD // Do this first to free RD.
4751 | mov L:RB, SAVE_L
4752 | cmp dword L:RB->openupval, 0
4753 | je >1
4754 | mov L:RB->base, BASE
4755 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
4756 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
4757 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
4758 | mov BASE, L:RB->base
4759 |1:
4760 | ins_next
4761 break;
4762
4763 case BC_FNEW:
4764 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
4765 |.if X64
4766 | mov L:RB, SAVE_L
4767 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
4768 | mov CARG3d, [BASE-8]
4769 | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
4770 | mov CARG1d, L:RB
4771 |.else
4772 | mov LFUNC:RA, [BASE-8]
4773 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
4774 | mov L:RB, SAVE_L
4775 | mov ARG3, LFUNC:RA
4776 | mov ARG2, PROTO:RD
4777 | mov ARG1, L:RB
4778 | mov L:RB->base, BASE
4779 |.endif
4780 | mov SAVE_PC, PC
4781 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
4782 | call extern lj_func_newL_gc
4783 | // GCfuncL * returned in eax (RC).
4784 | mov BASE, L:RB->base
4785 | movzx RA, PC_RA
4786 | mov [BASE+RA*8], LFUNC:RC
4787 | mov dword [BASE+RA*8+4], LJ_TFUNC
4788 | ins_next
4789 break;
4790
4791 /* -- Table ops --------------------------------------------------------- */
4792
4793 case BC_TNEW:
4794 | ins_AD // RA = dst, RD = hbits|asize
4795 | mov L:RB, SAVE_L
4796 | mov L:RB->base, BASE
4797 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
4798 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
4799 | mov SAVE_PC, PC
4800 | jae >5
4801 |1:
4802 |.if X64
4803 | mov CARG3d, RD
4804 | and RD, 0x7ff
4805 | shr CARG3d, 11
4806 |.else
4807 | mov RA, RD
4808 | and RD, 0x7ff
4809 | shr RA, 11
4810 | mov ARG3, RA
4811 |.endif
4812 | cmp RD, 0x7ff
4813 | je >3
4814 |2:
4815 |.if X64
4816 | mov L:CARG1d, L:RB
4817 | mov CARG2d, RD
4818 |.else
4819 | mov ARG1, L:RB
4820 | mov ARG2, RD
4821 |.endif
4822 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
4823 | // Table * returned in eax (RC).
4824 | mov BASE, L:RB->base
4825 | movzx RA, PC_RA
4826 | mov [BASE+RA*8], TAB:RC
4827 | mov dword [BASE+RA*8+4], LJ_TTAB
4828 | ins_next
4829 |3: // Turn 0x7ff into 0x801.
4830 | mov RD, 0x801
4831 | jmp <2
4832 |5:
4833 | mov L:FCARG1, L:RB
4834 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
4835 | movzx RD, PC_RD
4836 | jmp <1
4837 break;
4838 case BC_TDUP:
4839 | ins_AND // RA = dst, RD = table const (~) (holding template table)
4840 | mov L:RB, SAVE_L
4841 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
4842 | mov SAVE_PC, PC
4843 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
4844 | mov L:RB->base, BASE
4845 | jae >3
4846 |2:
4847 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
4848 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
4849 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
4850 | // Table * returned in eax (RC).
4851 | mov BASE, L:RB->base
4852 | movzx RA, PC_RA
4853 | mov [BASE+RA*8], TAB:RC
4854 | mov dword [BASE+RA*8+4], LJ_TTAB
4855 | ins_next
4856 |3:
4857 | mov L:FCARG1, L:RB
4858 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
4859 | movzx RD, PC_RD // Need to reload RD.
4860 | not RDa
4861 | jmp <2
4862 break;
4863
4864 case BC_GGET:
4865 | ins_AND // RA = dst, RD = str const (~)
4866 | mov LFUNC:RB, [BASE-8]
4867 | mov TAB:RB, LFUNC:RB->env
4868 | mov STR:RC, [KBASE+RD*4]
4869 | jmp ->BC_TGETS_Z
4870 break;
4871 case BC_GSET:
4872 | ins_AND // RA = src, RD = str const (~)
4873 | mov LFUNC:RB, [BASE-8]
4874 | mov TAB:RB, LFUNC:RB->env
4875 | mov STR:RC, [KBASE+RD*4]
4876 | jmp ->BC_TSETS_Z
4877 break;
4878
4879 case BC_TGETV:
4880 | ins_ABC // RA = dst, RB = table, RC = key
4881 | checktab RB, ->vmeta_tgetv
4882 | mov TAB:RB, [BASE+RB*8]
4883 |
4884 | // Integer key?
4885 if (LJ_DUALNUM) {
4886 | checkint RC, >5
4887 | mov RC, dword [BASE+RC*8]
4888 } else {
4889 | // Convert number to int and back and compare.
4890 | checknum RC, >5
4891 if (sse) {
4892 | movsd xmm0, qword [BASE+RC*8]
4893 | cvtsd2si RC, xmm0
4894 | cvtsi2sd xmm1, RC
4895 | ucomisd xmm0, xmm1
4896 } else {
4897 |.if not X64
4898 | fld qword [BASE+RC*8]
4899 | fist ARG1
4900 | fild ARG1
4901 | fcomparepp // eax (RC) modified!
4902 | mov RC, ARG1
4903 |.endif
4904 }
4905 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4906 }
4907 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4908 | jae ->vmeta_tgetv // Not in array part? Use fallback.
4909 | shl RC, 3
4910 | add RC, TAB:RB->array
4911 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
4912 | je >2
4913 | // Get array slot.
4914 |.if X64
4915 | mov RBa, [RC]
4916 | mov [BASE+RA*8], RBa
4917 |.else
4918 | mov RB, [RC]
4919 | mov RC, [RC+4]
4920 | mov [BASE+RA*8], RB
4921 | mov [BASE+RA*8+4], RC
4922 |.endif
4923 |1:
4924 | ins_next
4925 |
4926 |2: // Check for __index if table value is nil.
4927 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4928 | jz >3
4929 | mov TAB:RA, TAB:RB->metatable
4930 | test byte TAB:RA->nomm, 1<<MM_index
4931 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
4932 | movzx RA, PC_RA // Restore RA.
4933 |3:
4934 | mov dword [BASE+RA*8+4], LJ_TNIL
4935 | jmp <1
4936 |
4937 |5: // String key?
4938 | checkstr RC, ->vmeta_tgetv
4939 | mov STR:RC, [BASE+RC*8]
4940 | jmp ->BC_TGETS_Z
4941 break;
4942 case BC_TGETS:
4943 | ins_ABC // RA = dst, RB = table, RC = str const (~)
4944 | not RCa
4945 | mov STR:RC, [KBASE+RC*4]
4946 | checktab RB, ->vmeta_tgets
4947 | mov TAB:RB, [BASE+RB*8]
4948 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4949 | mov RA, TAB:RB->hmask
4950 | and RA, STR:RC->hash
4951 | imul RA, #NODE
4952 | add NODE:RA, TAB:RB->node
4953 |1:
4954 | cmp dword NODE:RA->key.it, LJ_TSTR
4955 | jne >4
4956 | cmp dword NODE:RA->key.gcr, STR:RC
4957 | jne >4
4958 | // Ok, key found. Assumes: offsetof(Node, val) == 0
4959 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
4960 | je >5 // Key found, but nil value?
4961 | movzx RC, PC_RA
4962 | // Get node value.
4963 |.if X64
4964 | mov RBa, [RA]
4965 | mov [BASE+RC*8], RBa
4966 |.else
4967 | mov RB, [RA]
4968 | mov RA, [RA+4]
4969 | mov [BASE+RC*8], RB
4970 | mov [BASE+RC*8+4], RA
4971 |.endif
4972 |2:
4973 | ins_next
4974 |
4975 |3:
4976 | movzx RC, PC_RA
4977 | mov dword [BASE+RC*8+4], LJ_TNIL
4978 | jmp <2
4979 |
4980 |4: // Follow hash chain.
4981 | mov NODE:RA, NODE:RA->next
4982 | test NODE:RA, NODE:RA
4983 | jnz <1
4984 | // End of hash chain: key not found, nil result.
4985 |
4986 |5: // Check for __index if table value is nil.
4987 | mov TAB:RA, TAB:RB->metatable
4988 | test TAB:RA, TAB:RA
4989 | jz <3 // No metatable: done.
4990 | test byte TAB:RA->nomm, 1<<MM_index
4991 | jnz <3 // 'no __index' flag set: done.
4992 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
4993 break;
4994 case BC_TGETB:
4995 | ins_ABC // RA = dst, RB = table, RC = byte literal
4996 | checktab RB, ->vmeta_tgetb
4997 | mov TAB:RB, [BASE+RB*8]
4998 | cmp RC, TAB:RB->asize
4999 | jae ->vmeta_tgetb
5000 | shl RC, 3
5001 | add RC, TAB:RB->array
5002 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
5003 | je >2
5004 | // Get array slot.
5005 |.if X64
5006 | mov RBa, [RC]
5007 | mov [BASE+RA*8], RBa
5008 |.else
5009 | mov RB, [RC]
5010 | mov RC, [RC+4]
5011 | mov [BASE+RA*8], RB
5012 | mov [BASE+RA*8+4], RC
5013 |.endif
5014 |1:
5015 | ins_next
5016 |
5017 |2: // Check for __index if table value is nil.
5018 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
5019 | jz >3
5020 | mov TAB:RA, TAB:RB->metatable
5021 | test byte TAB:RA->nomm, 1<<MM_index
5022 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
5023 | movzx RA, PC_RA // Restore RA.
5024 |3:
5025 | mov dword [BASE+RA*8+4], LJ_TNIL
5026 | jmp <1
5027 break;
5028
5029 case BC_TSETV:
5030 | ins_ABC // RA = src, RB = table, RC = key
5031 | checktab RB, ->vmeta_tsetv
5032 | mov TAB:RB, [BASE+RB*8]
5033 |
5034 | // Integer key?
5035 if (LJ_DUALNUM) {
5036 | checkint RC, >5
5037 | mov RC, dword [BASE+RC*8]
5038 } else {
5039 | // Convert number to int and back and compare.
5040 | checknum RC, >5
5041 if (sse) {
5042 | movsd xmm0, qword [BASE+RC*8]
5043 | cvtsd2si RC, xmm0
5044 | cvtsi2sd xmm1, RC
5045 | ucomisd xmm0, xmm1
5046 } else {
5047 |.if not X64
5048 | fld qword [BASE+RC*8]
5049 | fist ARG1
5050 | fild ARG1
5051 | fcomparepp // eax (RC) modified!
5052 | mov RC, ARG1
5053 |.endif
5054 }
5055 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
5056 }
5057 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
5058 | jae ->vmeta_tsetv
5059 | shl RC, 3
5060 | add RC, TAB:RB->array
5061 | cmp dword [RC+4], LJ_TNIL
5062 | je >3 // Previous value is nil?
5063 |1:
5064 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
5065 | jnz >7
5066 |2: // Set array slot.
5067 |.if X64
5068 | mov RBa, [BASE+RA*8]
5069 | mov [RC], RBa
5070 |.else
5071 | mov RB, [BASE+RA*8+4]
5072 | mov RA, [BASE+RA*8]
5073 | mov [RC+4], RB
5074 | mov [RC], RA
5075 |.endif
5076 | ins_next
5077 |
5078 |3: // Check for __newindex if previous value is nil.
5079 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
5080 | jz <1
5081 | mov TAB:RA, TAB:RB->metatable
5082 | test byte TAB:RA->nomm, 1<<MM_newindex
5083 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
5084 | movzx RA, PC_RA // Restore RA.
5085 | jmp <1
5086 |
5087 |5: // String key?
5088 | checkstr RC, ->vmeta_tsetv
5089 | mov STR:RC, [BASE+RC*8]
5090 | jmp ->BC_TSETS_Z
5091 |
5092 |7: // Possible table write barrier for the value. Skip valiswhite check.
5093 | barrierback TAB:RB, RA
5094 | movzx RA, PC_RA // Restore RA.
5095 | jmp <2
5096 break;
5097 case BC_TSETS:
5098 | ins_ABC // RA = src, RB = table, RC = str const (~)
5099 | not RCa
5100 | mov STR:RC, [KBASE+RC*4]
5101 | checktab RB, ->vmeta_tsets
5102 | mov TAB:RB, [BASE+RB*8]
5103 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
5104 | mov RA, TAB:RB->hmask
5105 | and RA, STR:RC->hash
5106 | imul RA, #NODE
5107 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
5108 | add NODE:RA, TAB:RB->node
5109 |1:
5110 | cmp dword NODE:RA->key.it, LJ_TSTR
5111 | jne >5
5112 | cmp dword NODE:RA->key.gcr, STR:RC
5113 | jne >5
5114 | // Ok, key found. Assumes: offsetof(Node, val) == 0
5115 | cmp dword [RA+4], LJ_TNIL
5116 | je >4 // Previous value is nil?
5117 |2:
5118 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
5119 | jnz >7
5120 |3: // Set node value.
5121 | movzx RC, PC_RA
5122 |.if X64
5123 | mov RBa, [BASE+RC*8]
5124 | mov [RA], RBa
5125 |.else
5126 | mov RB, [BASE+RC*8+4]
5127 | mov RC, [BASE+RC*8]
5128 | mov [RA+4], RB
5129 | mov [RA], RC
5130 |.endif
5131 | ins_next
5132 |
5133 |4: // Check for __newindex if previous value is nil.
5134 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
5135 | jz <2
5136 | mov TMP1, RA // Save RA.
5137 | mov TAB:RA, TAB:RB->metatable
5138 | test byte TAB:RA->nomm, 1<<MM_newindex
5139 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
5140 | mov RA, TMP1 // Restore RA.
5141 | jmp <2
5142 |
5143 |5: // Follow hash chain.
5144 | mov NODE:RA, NODE:RA->next
5145 | test NODE:RA, NODE:RA
5146 | jnz <1
5147 | // End of hash chain: key not found, add a new one.
5148 |
5149 | // But check for __newindex first.
5150 | mov TAB:RA, TAB:RB->metatable
5151 | test TAB:RA, TAB:RA
5152 | jz >6 // No metatable: continue.
5153 | test byte TAB:RA->nomm, 1<<MM_newindex
5154 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
5155 |6:
5156 | mov TMP1, STR:RC
5157 | mov TMP2, LJ_TSTR
5158 | mov TMP3, TAB:RB // Save TAB:RB for us.
5159 |.if X64
5160 | mov L:CARG1d, SAVE_L
5161 | mov L:CARG1d->base, BASE
5162 | lea CARG3, TMP1
5163 | mov CARG2d, TAB:RB
5164 | mov L:RB, L:CARG1d
5165 |.else
5166 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
5167 | mov ARG2, TAB:RB
5168 | mov L:RB, SAVE_L
5169 | mov ARG3, RC
5170 | mov ARG1, L:RB
5171 | mov L:RB->base, BASE
5172 |.endif
5173 | mov SAVE_PC, PC
5174 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
5175 | // Handles write barrier for the new key. TValue * returned in eax (RC).
5176 | mov BASE, L:RB->base
5177 | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
5178 | mov RA, eax
5179 | jmp <2 // Must check write barrier for value.
5180 |
5181 |7: // Possible table write barrier for the value. Skip valiswhite check.
5182 | barrierback TAB:RB, RC // Destroys STR:RC.
5183 | jmp <3
5184 break;
5185 case BC_TSETB:
5186 | ins_ABC // RA = src, RB = table, RC = byte literal
5187 | checktab RB, ->vmeta_tsetb
5188 | mov TAB:RB, [BASE+RB*8]
5189 | cmp RC, TAB:RB->asize
5190 | jae ->vmeta_tsetb
5191 | shl RC, 3
5192 | add RC, TAB:RB->array
5193 | cmp dword [RC+4], LJ_TNIL
5194 | je >3 // Previous value is nil?
5195 |1:
5196 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
5197 | jnz >7
5198 |2: // Set array slot.
5199 |.if X64
5200 | mov RAa, [BASE+RA*8]
5201 | mov [RC], RAa
5202 |.else
5203 | mov RB, [BASE+RA*8+4]
5204 | mov RA, [BASE+RA*8]
5205 | mov [RC+4], RB
5206 | mov [RC], RA
5207 |.endif
5208 | ins_next
5209 |
5210 |3: // Check for __newindex if previous value is nil.
5211 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
5212 | jz <1
5213 | mov TAB:RA, TAB:RB->metatable
5214 | test byte TAB:RA->nomm, 1<<MM_newindex
5215 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
5216 | movzx RA, PC_RA // Restore RA.
5217 | jmp <1
5218 |
5219 |7: // Possible table write barrier for the value. Skip valiswhite check.
5220 | barrierback TAB:RB, RA
5221 | movzx RA, PC_RA // Restore RA.
5222 | jmp <2
5223 break;
5224
5225 case BC_TSETM:
5226 | ins_AD // RA = base (table at base-1), RD = num const (start index)
5227 | mov TMP1, KBASE // Need one more free register.
5228 | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
5229 |1:
5230 | lea RA, [BASE+RA*8]
5231 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
5232 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
5233 | jnz >7
5234 |2:
5235 | mov RD, MULTRES
5236 | sub RD, 1
5237 | jz >4 // Nothing to copy?
5238 | add RD, KBASE // Compute needed size.
5239 | cmp RD, TAB:RB->asize
5240 | ja >5 // Doesn't fit into array part?
5241 | sub RD, KBASE
5242 | shl KBASE, 3
5243 | add KBASE, TAB:RB->array
5244 |3: // Copy result slots to table.
5245 |.if X64
5246 | mov RBa, [RA]
5247 | add RA, 8
5248 | mov [KBASE], RBa
5249 |.else
5250 | mov RB, [RA]
5251 | mov [KBASE], RB
5252 | mov RB, [RA+4]
5253 | add RA, 8
5254 | mov [KBASE+4], RB
5255 |.endif
5256 | add KBASE, 8
5257 | sub RD, 1
5258 | jnz <3
5259 |4:
5260 | mov KBASE, TMP1
5261 | ins_next
5262 |
5263 |5: // Need to resize array part.
5264 |.if X64
5265 | mov L:CARG1d, SAVE_L
5266 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
5267 | mov CARG2d, TAB:RB
5268 | mov CARG3d, RD
5269 | mov L:RB, L:CARG1d
5270 |.else
5271 | mov ARG2, TAB:RB
5272 | mov L:RB, SAVE_L
5273 | mov L:RB->base, BASE
5274 | mov ARG3, RD
5275 | mov ARG1, L:RB
5276 |.endif
5277 | mov SAVE_PC, PC
5278 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
5279 | mov BASE, L:RB->base
5280 | movzx RA, PC_RA // Restore RA.
5281 | jmp <1 // Retry.
5282 |
5283 |7: // Possible table write barrier for any value. Skip valiswhite check.
5284 | barrierback TAB:RB, RD
5285 | jmp <2
5286 break;
5287
5288 /* -- Calls and vararg handling ----------------------------------------- */
5289
5290 case BC_CALL: case BC_CALLM:
5291 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
5292 if (op == BC_CALLM) {
5293 | add NARGS:RD, MULTRES
5294 }
5295 | cmp dword [BASE+RA*8+4], LJ_TFUNC
5296 | mov LFUNC:RB, [BASE+RA*8]
5297 | jne ->vmeta_call_ra
5298 | lea BASE, [BASE+RA*8+8]
5299 | ins_call
5300 break;
5301
5302 case BC_CALLMT:
5303 | ins_AD // RA = base, RD = extra_nargs
5304 | add NARGS:RD, MULTRES
5305 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
5306 break;
5307 case BC_CALLT:
5308 | ins_AD // RA = base, RD = nargs+1
5309 | lea RA, [BASE+RA*8+8]
5310 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
5311 | mov LFUNC:RB, [RA-8]
5312 | cmp dword [RA-4], LJ_TFUNC
5313 | jne ->vmeta_call
5314 |->BC_CALLT_Z:
5315 | mov PC, [BASE-4]
5316 | test PC, FRAME_TYPE
5317 | jnz >7
5318 |1:
5319 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
5320 | mov MULTRES, NARGS:RD
5321 | sub NARGS:RD, 1
5322 | jz >3
5323 |2: // Move args down.
5324 |.if X64
5325 | mov RBa, [RA]
5326 | add RA, 8
5327 | mov [KBASE], RBa
5328 |.else
5329 | mov RB, [RA]
5330 | mov [KBASE], RB
5331 | mov RB, [RA+4]
5332 | add RA, 8
5333 | mov [KBASE+4], RB
5334 |.endif
5335 | add KBASE, 8
5336 | sub NARGS:RD, 1
5337 | jnz <2
5338 |
5339 | mov LFUNC:RB, [BASE-8]
5340 |3:
5341 | mov NARGS:RD, MULTRES
5342 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
5343 | ja >5
5344 |4:
5345 | ins_callt
5346 |
5347 |5: // Tailcall to a fast function.
5348 | test PC, FRAME_TYPE // Lua frame below?
5349 | jnz <4
5350 | movzx RA, PC_RA
5351 | not RAa
5352 | lea RA, [BASE+RA*8]
5353 | mov LFUNC:KBASE, [RA-8] // Need to prepare KBASE.
5354 | mov KBASE, LFUNC:KBASE->pc
5355 | mov KBASE, [KBASE+PC2PROTO(k)]
5356 | jmp <4
5357 |
5358 |7: // Tailcall from a vararg function.
5359 | sub PC, FRAME_VARG
5360 | test PC, FRAME_TYPEP
5361 | jnz >8 // Vararg frame below?
5362 | sub BASE, PC // Need to relocate BASE/KBASE down.
5363 | mov KBASE, BASE
5364 | mov PC, [BASE-4]
5365 | jmp <1
5366 |8:
5367 | add PC, FRAME_VARG
5368 | jmp <1
5369 break;
5370
5371 case BC_ITERC:
5372 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
5373 | lea RA, [BASE+RA*8+8] // fb = base+1
5374 |.if X64
5375 | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
5376 | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
5377 | mov [RA], RBa
5378 | mov [RA+8], RCa
5379 |.else
5380 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
5381 | mov RC, [RA-20]
5382 | mov [RA], RB
5383 | mov [RA+4], RC
5384 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
5385 | mov RC, [RA-12]
5386 | mov [RA+8], RB
5387 | mov [RA+12], RC
5388 |.endif
5389 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
5390 | mov RC, [RA-28]
5391 | mov [RA-8], LFUNC:RB
5392 | mov [RA-4], RC
5393 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
5394 | mov NARGS:RD, 2+1
5395 | jne ->vmeta_call
5396 | mov BASE, RA
5397 | ins_call
5398 break;
5399
5400 case BC_ITERN:
5401 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
5402#if LJ_HASJIT
5403 | // NYI: add hotloop, record BC_ITERN.
5404#endif
5405 | mov TMP1, KBASE // Need two more free registers.
5406 | mov TMP2, DISPATCH
5407 | mov TAB:RB, [BASE+RA*8-16]
5408 | mov RC, [BASE+RA*8-8] // Get index from control var.
5409 | mov DISPATCH, TAB:RB->asize
5410 | add PC, 4
5411 | mov KBASE, TAB:RB->array
5412 |1: // Traverse array part.
5413 | cmp RC, DISPATCH; jae >5 // Index points after array part?
5414 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
5415 if (LJ_DUALNUM) {
5416 | mov dword [BASE+RA*8+4], LJ_TISNUM
5417 | mov dword [BASE+RA*8], RC
5418 } else if (sse) {
5419 | cvtsi2sd xmm0, RC
5420 } else {
5421 | fild dword [BASE+RA*8-8]
5422 }
5423 | // Copy array slot to returned value.
5424 |.if X64
5425 | mov RBa, [KBASE+RC*8]
5426 | mov [BASE+RA*8+8], RBa
5427 |.else
5428 | mov RB, [KBASE+RC*8+4]
5429 | mov [BASE+RA*8+12], RB
5430 | mov RB, [KBASE+RC*8]
5431 | mov [BASE+RA*8+8], RB
5432 |.endif
5433 | add RC, 1
5434 | // Return array index as a numeric key.
5435 if (LJ_DUALNUM) {
5436 | // See above.
5437 } else if (sse) {
5438 | movsd qword [BASE+RA*8], xmm0
5439 } else {
5440 | fstp qword [BASE+RA*8]
5441 }
5442 | mov [BASE+RA*8-8], RC // Update control var.
5443 |2:
5444 | movzx RD, PC_RD // Get target from ITERL.
5445 | branchPC RD
5446 |3:
5447 | mov DISPATCH, TMP2
5448 | mov KBASE, TMP1
5449 | ins_next
5450 |
5451 |4: // Skip holes in array part.
5452 | add RC, 1
5453 if (!LJ_DUALNUM && !sse) {
5454 | mov [BASE+RA*8-8], RC
5455 }
5456 | jmp <1
5457 |
5458 |5: // Traverse hash part.
5459 | sub RC, DISPATCH
5460 |6:
5461 | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
5462 | imul KBASE, RC, #NODE
5463 | add NODE:KBASE, TAB:RB->node
5464 | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
5465 | lea DISPATCH, [RC+DISPATCH+1]
5466 | // Copy key and value from hash slot.
5467 |.if X64
5468 | mov RBa, NODE:KBASE->key
5469 | mov RCa, NODE:KBASE->val
5470 | mov [BASE+RA*8], RBa
5471 | mov [BASE+RA*8+8], RCa
5472 |.else
5473 | mov RB, NODE:KBASE->key.gcr
5474 | mov RC, NODE:KBASE->key.it
5475 | mov [BASE+RA*8], RB
5476 | mov [BASE+RA*8+4], RC
5477 | mov RB, NODE:KBASE->val.gcr
5478 | mov RC, NODE:KBASE->val.it
5479 | mov [BASE+RA*8+8], RB
5480 | mov [BASE+RA*8+12], RC
5481 |.endif
5482 | mov [BASE+RA*8-8], DISPATCH
5483 | jmp <2
5484 |
5485 |7: // Skip holes in hash part.
5486 | add RC, 1
5487 | jmp <6
5488 break;
5489
5490 case BC_ISNEXT:
5491 | ins_AD // RA = base, RD = target (points to ITERN)
5492 | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
5493 | mov CFUNC:RB, [BASE+RA*8-24]
5494 | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
5495 | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
5496 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
5497 | branchPC RD
5498 | mov dword [BASE+RA*8-8], 0 // Initialize control var.
5499 |1:
5500 | ins_next
5501 |5: // Despecialize bytecode if any of the checks fail.
5502 | mov PC_OP, BC_JMP
5503 | branchPC RD
5504 | mov byte [PC], BC_ITERC
5505 | jmp <1
5506 break;
5507
5508 case BC_VARG:
5509 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
5510 | mov TMP1, KBASE // Need one more free register.
5511 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
5512 | lea RA, [BASE+RA*8]
5513 | sub KBASE, [BASE-4]
5514 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
5515 | test RB, RB
5516 | jz >5 // Copy all varargs?
5517 | lea RB, [RA+RB*8-8]
5518 | cmp KBASE, BASE // No vararg slots?
5519 | jnb >2
5520 |1: // Copy vararg slots to destination slots.
5521 |.if X64
5522 | mov RCa, [KBASE-8]
5523 | add KBASE, 8
5524 | mov [RA], RCa
5525 |.else
5526 | mov RC, [KBASE-8]
5527 | mov [RA], RC
5528 | mov RC, [KBASE-4]
5529 | add KBASE, 8
5530 | mov [RA+4], RC
5531 |.endif
5532 | add RA, 8
5533 | cmp RA, RB // All destination slots filled?
5534 | jnb >3
5535 | cmp KBASE, BASE // No more vararg slots?
5536 | jb <1
5537 |2: // Fill up remainder with nil.
5538 | mov dword [RA+4], LJ_TNIL
5539 | add RA, 8
5540 | cmp RA, RB
5541 | jb <2
5542 |3:
5543 | mov KBASE, TMP1
5544 | ins_next
5545 |
5546 |5: // Copy all varargs.
5547 | mov MULTRES, 1 // MULTRES = 0+1
5548 | mov RC, BASE
5549 | sub RC, KBASE
5550 | jbe <3 // No vararg slots?
5551 | mov RB, RC
5552 | shr RB, 3
5553 | add RB, 1
5554 | mov MULTRES, RB // MULTRES = #varargs+1
5555 | mov L:RB, SAVE_L
5556 | add RC, RA
5557 | cmp RC, L:RB->maxstack
5558 | ja >7 // Need to grow stack?
5559 |6: // Copy all vararg slots.
5560 |.if X64
5561 | mov RCa, [KBASE-8]
5562 | add KBASE, 8
5563 | mov [RA], RCa
5564 |.else
5565 | mov RC, [KBASE-8]
5566 | mov [RA], RC
5567 | mov RC, [KBASE-4]
5568 | add KBASE, 8
5569 | mov [RA+4], RC
5570 |.endif
5571 | add RA, 8
5572 | cmp KBASE, BASE // No more vararg slots?
5573 | jb <6
5574 | jmp <3
5575 |
5576 |7: // Grow stack for varargs.
5577 | mov L:RB->base, BASE
5578 | mov L:RB->top, RA
5579 | mov SAVE_PC, PC
5580 | sub KBASE, BASE // Need delta, because BASE may change.
5581 | mov FCARG2, MULTRES
5582 | sub FCARG2, 1
5583 | mov FCARG1, L:RB
5584 | call extern lj_state_growstack@8 // (lua_State *L, int n)
5585 | mov BASE, L:RB->base
5586 | mov RA, L:RB->top
5587 | add KBASE, BASE
5588 | jmp <6
5589 break;
5590
5591 /* -- Returns ----------------------------------------------------------- */
5592
5593 case BC_RETM:
5594 | ins_AD // RA = results, RD = extra_nresults
5595 | add RD, MULTRES // MULTRES >=1, so RD >=1.
5596 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
5597 break;
5598
5599 case BC_RET: case BC_RET0: case BC_RET1:
5600 | ins_AD // RA = results, RD = nresults+1
5601 if (op != BC_RET0) {
5602 | shl RA, 3
5603 }
5604 |1:
5605 | mov PC, [BASE-4]
5606 | mov MULTRES, RD // Save nresults+1.
5607 | test PC, FRAME_TYPE // Check frame type marker.
5608 | jnz >7 // Not returning to a fixarg Lua func?
5609 switch (op) {
5610 case BC_RET:
5611 |->BC_RET_Z:
5612 | mov KBASE, BASE // Use KBASE for result move.
5613 | sub RD, 1
5614 | jz >3
5615 |2: // Move results down.
5616 |.if X64
5617 | mov RBa, [KBASE+RA]
5618 | mov [KBASE-8], RBa
5619 |.else
5620 | mov RB, [KBASE+RA]
5621 | mov [KBASE-8], RB
5622 | mov RB, [KBASE+RA+4]
5623 | mov [KBASE-4], RB
5624 |.endif
5625 | add KBASE, 8
5626 | sub RD, 1
5627 | jnz <2
5628 |3:
5629 | mov RD, MULTRES // Note: MULTRES may be >255.
5630 | movzx RB, PC_RB // So cannot compare with RDL!
5631 |5:
5632 | cmp RB, RD // More results expected?
5633 | ja >6
5634 break;
5635 case BC_RET1:
5636 |.if X64
5637 | mov RBa, [BASE+RA]
5638 | mov [BASE-8], RBa
5639 |.else
5640 | mov RB, [BASE+RA+4]
5641 | mov [BASE-4], RB
5642 | mov RB, [BASE+RA]
5643 | mov [BASE-8], RB
5644 |.endif
5645 /* fallthrough */
5646 case BC_RET0:
5647 |5:
5648 | cmp PC_RB, RDL // More results expected?
5649 | ja >6
5650 default:
5651 break;
5652 }
5653 | movzx RA, PC_RA
5654 | not RAa // Note: ~RA = -(RA+1)
5655 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
5656 | mov LFUNC:KBASE, [BASE-8]
5657 | mov KBASE, LFUNC:KBASE->pc
5658 | mov KBASE, [KBASE+PC2PROTO(k)]
5659 | ins_next
5660 |
5661 |6: // Fill up results with nil.
5662 if (op == BC_RET) {
5663 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
5664 | add KBASE, 8
5665 } else {
5666 | mov dword [BASE+RD*8-12], LJ_TNIL
5667 }
5668 | add RD, 1
5669 | jmp <5
5670 |
5671 |7: // Non-standard return case.
5672 | lea RB, [PC-FRAME_VARG]
5673 | test RB, FRAME_TYPEP
5674 | jnz ->vm_return
5675 | // Return from vararg function: relocate BASE down and RA up.
5676 | sub BASE, RB
5677 if (op != BC_RET0) {
5678 | add RA, RB
5679 }
5680 | jmp <1
5681 break;
5682
5683 /* -- Loops and branches ------------------------------------------------ */
5684
5685 |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
5686 |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
5687 |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
5688 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
5689
5690 case BC_FORL:
5691#if LJ_HASJIT
5692 | hotloop RB
5693#endif
5694 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
5695 break;
5696
5697 case BC_JFORI:
5698 case BC_JFORL:
5699#if !LJ_HASJIT
5700 break;
5701#endif
5702 case BC_FORI:
5703 case BC_IFORL:
5704 vk = (op == BC_IFORL || op == BC_JFORL);
5705 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
5706 | lea RA, [BASE+RA*8]
5707 if (LJ_DUALNUM) {
5708 | cmp FOR_TIDX, LJ_TISNUM; jne >9
5709 if (!vk) {
5710 | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
5711 | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
5712 | mov RB, dword FOR_IDX
5713 | cmp dword FOR_STEP, 0; jl >5
5714 } else {
5715#ifdef LUA_USE_ASSERT
5716 | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
5717 | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
5718#endif
5719 | mov RB, dword FOR_STEP
5720 | test RB, RB; js >5
5721 | add RB, dword FOR_IDX; jo >1
5722 | mov dword FOR_IDX, RB
5723 }
5724 | cmp RB, dword FOR_STOP
5725 | mov FOR_TEXT, LJ_TISNUM
5726 | mov dword FOR_EXT, RB
5727 if (op == BC_FORI) {
5728 | jle >7
5729 |1:
5730 |6:
5731 | branchPC RD
5732 } else if (op == BC_JFORI) {
5733 | branchPC RD
5734 | movzx RD, PC_RD
5735 | jle =>BC_JLOOP
5736 |1:
5737 |6:
5738 } else if (op == BC_IFORL) {
5739 | jg >7
5740 |6:
5741 | branchPC RD
5742 |1:
5743 } else {
5744 | jle =>BC_JLOOP
5745 |1:
5746 |6:
5747 }
5748 |7:
5749 | ins_next
5750 |
5751 |5: // Invert check for negative step.
5752 if (vk) {
5753 | add RB, dword FOR_IDX; jo <1
5754 | mov dword FOR_IDX, RB
5755 }
5756 | cmp RB, dword FOR_STOP
5757 | mov FOR_TEXT, LJ_TISNUM
5758 | mov dword FOR_EXT, RB
5759 if (op == BC_FORI) {
5760 | jge <7
5761 } else if (op == BC_JFORI) {
5762 | branchPC RD
5763 | movzx RD, PC_RD
5764 | jge =>BC_JLOOP
5765 } else if (op == BC_IFORL) {
5766 | jl <7
5767 } else {
5768 | jge =>BC_JLOOP
5769 }
5770 | jmp <6
5771 |9: // Fallback to FP variant.
5772 } else if (!vk) {
5773 | cmp FOR_TIDX, LJ_TISNUM
5774 }
5775 if (!vk) {
5776 | jae ->vmeta_for
5777 | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
5778 } else {
5779#ifdef LUA_USE_ASSERT
5780 | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
5781 | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
5782#endif
5783 }
5784 | mov RB, FOR_TSTEP // Load type/hiword of for step.
5785 if (!vk) {
5786 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5787 }
5788 if (sse) {
5789 | movsd xmm0, qword FOR_IDX
5790 | movsd xmm1, qword FOR_STOP
5791 if (vk) {
5792 | addsd xmm0, qword FOR_STEP
5793 | movsd qword FOR_IDX, xmm0
5794 | test RB, RB; js >3
5795 } else {
5796 | jl >3
5797 }
5798 | ucomisd xmm1, xmm0
5799 |1:
5800 | movsd qword FOR_EXT, xmm0
5801 } else {
5802 | fld qword FOR_STOP
5803 | fld qword FOR_IDX
5804 if (vk) {
5805 | fadd qword FOR_STEP // nidx = idx + step
5806 | fst qword FOR_IDX
5807 | fst qword FOR_EXT
5808 | test RB, RB; js >1
5809 } else {
5810 | fst qword FOR_EXT
5811 | jl >1
5812 }
5813 | fxch // Swap lim/(n)idx if step non-negative.
5814 |1:
5815 | fcomparepp // eax (RD) modified if !cmov.
5816 if (!cmov) {
5817 | movzx RD, PC_RD // Need to reload RD.
5818 }
5819 }
5820 if (op == BC_FORI) {
5821 if (LJ_DUALNUM) {
5822 | jnb <7
5823 } else {
5824 | jnb >2
5825 | branchPC RD
5826 }
5827 } else if (op == BC_JFORI) {
5828 | branchPC RD
5829 | movzx RD, PC_RD
5830 | jnb =>BC_JLOOP
5831 } else if (op == BC_IFORL) {
5832 if (LJ_DUALNUM) {
5833 | jb <7
5834 } else {
5835 | jb >2
5836 | branchPC RD
5837 }
5838 } else {
5839 | jnb =>BC_JLOOP
5840 }
5841 if (LJ_DUALNUM) {
5842 | jmp <6
5843 } else {
5844 |2:
5845 | ins_next
5846 }
5847 if (sse) {
5848 |3: // Invert comparison if step is negative.
5849 | ucomisd xmm0, xmm1
5850 | jmp <1
5851 }
5852 break;
5853
5854 case BC_ITERL:
5855#if LJ_HASJIT
5856 | hotloop RB
5857#endif
5858 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
5859 break;
5860
5861 case BC_JITERL:
5862#if !LJ_HASJIT
5863 break;
5864#endif
5865 case BC_IITERL:
5866 | ins_AJ // RA = base, RD = target
5867 | lea RA, [BASE+RA*8]
5868 | mov RB, [RA+4]
5869 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
5870 if (op == BC_JITERL) {
5871 | mov [RA-4], RB
5872 | mov RB, [RA]
5873 | mov [RA-8], RB
5874 | jmp =>BC_JLOOP
5875 } else {
5876 | branchPC RD // Otherwise save control var + branch.
5877 | mov RD, [RA]
5878 | mov [RA-4], RB
5879 | mov [RA-8], RD
5880 }
5881 |1:
5882 | ins_next
5883 break;
5884
5885 case BC_LOOP:
5886 | ins_A // RA = base, RD = target (loop extent)
5887 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5888 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5889#if LJ_HASJIT
5890 | hotloop RB
5891#endif
5892 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
5893 break;
5894
5895 case BC_ILOOP:
5896 | ins_A // RA = base, RD = target (loop extent)
5897 | ins_next
5898 break;
5899
5900 case BC_JLOOP:
5901#if LJ_HASJIT
5902 | ins_AD // RA = base (ignored), RD = traceno
5903 | mov RA, [DISPATCH+DISPATCH_J(trace)]
5904 | mov TRACE:RD, [RA+RD*4]
5905 | mov RDa, TRACE:RD->mcode
5906 | mov L:RB, SAVE_L
5907 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5908 | mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
5909 | // Save additional callee-save registers only used in compiled code.
5910 |.if X64WIN
5911 | mov TMPQ, r12
5912 | mov TMPa, r13
5913 | mov CSAVE_4, r14
5914 | mov CSAVE_3, r15
5915 | mov RAa, rsp
5916 | sub rsp, 9*16+4*8
5917 | movdqa [RAa], xmm6
5918 | movdqa [RAa-1*16], xmm7
5919 | movdqa [RAa-2*16], xmm8
5920 | movdqa [RAa-3*16], xmm9
5921 | movdqa [RAa-4*16], xmm10
5922 | movdqa [RAa-5*16], xmm11
5923 | movdqa [RAa-6*16], xmm12
5924 | movdqa [RAa-7*16], xmm13
5925 | movdqa [RAa-8*16], xmm14
5926 | movdqa [RAa-9*16], xmm15
5927 |.elif X64
5928 | mov TMPQ, r12
5929 | mov TMPa, r13
5930 | sub rsp, 16
5931 |.endif
5932 | jmp RDa
5933#endif
5934 break;
5935
5936 case BC_JMP:
5937 | ins_AJ // RA = unused, RD = target
5938 | branchPC RD
5939 | ins_next
5940 break;
5941
5942 /* -- Function headers -------------------------------------------------- */
5943
5944 /*
5945 ** Reminder: A function may be called with func/args above L->maxstack,
5946 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
5947 ** too. This means all FUNC* ops (including fast functions) must check
5948 ** for stack overflow _before_ adding more slots!
5949 */
5950
5951 case BC_FUNCF:
5952#if LJ_HASJIT
5953 | hotcall RB
5954#endif
5955 case BC_FUNCV: /* NYI: compiled vararg functions. */
5956 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
5957 break;
5958
5959 case BC_JFUNCF:
5960#if !LJ_HASJIT
5961 break;
5962#endif
5963 case BC_IFUNCF:
5964 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
5965 | mov KBASE, [PC-4+PC2PROTO(k)]
5966 | mov L:RB, SAVE_L
5967 | lea RA, [BASE+RA*8] // Top of frame.
5968 | cmp RA, L:RB->maxstack
5969 | ja ->vm_growstack_f
5970 | movzx RA, byte [PC-4+PC2PROTO(numparams)]
5971 | cmp NARGS:RD, RA // Check for missing parameters.
5972 | jbe >3
5973 |2:
5974 if (op == BC_JFUNCF) {
5975 | movzx RD, PC_RD
5976 | jmp =>BC_JLOOP
5977 } else {
5978 | ins_next
5979 }
5980 |
5981 |3: // Clear missing parameters.
5982 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
5983 | add NARGS:RD, 1
5984 | cmp NARGS:RD, RA
5985 | jbe <3
5986 | jmp <2
5987 break;
5988
5989 case BC_JFUNCV:
5990#if !LJ_HASJIT
5991 break;
5992#endif
5993 | int3 // NYI: compiled vararg functions
5994 break; /* NYI: compiled vararg functions. */
5995
5996 case BC_IFUNCV:
5997 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
5998 | lea RB, [NARGS:RD*8+FRAME_VARG]
5999 | lea RD, [BASE+NARGS:RD*8]
6000 | mov LFUNC:KBASE, [BASE-8]
6001 | mov [RD-4], RB // Store delta + FRAME_VARG.
6002 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
6003 | mov L:RB, SAVE_L
6004 | lea RA, [RD+RA*8]
6005 | cmp RA, L:RB->maxstack
6006 | ja ->vm_growstack_v // Need to grow stack.
6007 | mov RA, BASE
6008 | mov BASE, RD
6009 | movzx RB, byte [PC-4+PC2PROTO(numparams)]
6010 | test RB, RB
6011 | jz >2
6012 |1: // Copy fixarg slots up to new frame.
6013 | add RA, 8
6014 | cmp RA, BASE
6015 | jnb >3 // Less args than parameters?
6016 | mov KBASE, [RA-8]
6017 | mov [RD], KBASE
6018 | mov KBASE, [RA-4]
6019 | mov [RD+4], KBASE
6020 | add RD, 8
6021 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
6022 | sub RB, 1
6023 | jnz <1
6024 |2:
6025 if (op == BC_JFUNCV) {
6026 | movzx RD, PC_RD
6027 | jmp =>BC_JLOOP
6028 } else {
6029 | mov KBASE, [PC-4+PC2PROTO(k)]
6030 | ins_next
6031 }
6032 |
6033 |3: // Clear missing parameters.
6034 | mov dword [RD+4], LJ_TNIL
6035 | add RD, 8
6036 | sub RB, 1
6037 | jnz <3
6038 | jmp <2
6039 break;
6040
6041 case BC_FUNCC:
6042 case BC_FUNCCW:
6043 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
6044 | mov CFUNC:RB, [BASE-8]
6045 | mov KBASEa, CFUNC:RB->f
6046 | mov L:RB, SAVE_L
6047 | lea RD, [BASE+NARGS:RD*8-8]
6048 | mov L:RB->base, BASE
6049 | lea RA, [RD+8*LUA_MINSTACK]
6050 | cmp RA, L:RB->maxstack
6051 | mov L:RB->top, RD
6052 if (op == BC_FUNCC) {
6053 |.if X64
6054 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
6055 |.else
6056 | mov ARG1, L:RB
6057 |.endif
6058 } else {
6059 |.if X64
6060 | mov CARG2, KBASEa
6061 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
6062 |.else
6063 | mov ARG2, KBASEa
6064 | mov ARG1, L:RB
6065 |.endif
6066 }
6067 | ja ->vm_growstack_c // Need to grow stack.
6068 | set_vmstate C
6069 if (op == BC_FUNCC) {
6070 | call KBASEa // (lua_State *L)
6071 } else {
6072 | // (lua_State *L, lua_CFunction f)
6073 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
6074 }
6075 | set_vmstate INTERP
6076 | // nresults returned in eax (RD).
6077 | mov BASE, L:RB->base
6078 | lea RA, [BASE+RD*8]
6079 | neg RA
6080 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
6081 | mov PC, [BASE-4] // Fetch PC of caller.
6082 | jmp ->vm_returnc
6083 break;
6084
6085 /* ---------------------------------------------------------------------- */
6086
6087 default:
6088 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
6089 exit(2);
6090 break;
6091 }
6092}
6093
6094static int build_backend(BuildCtx *ctx)
6095{
6096 int op;
6097 int cmov = 1;
6098 int sse = 0;
6099#ifdef LUAJIT_CPU_NOCMOV
6100 cmov = 0;
6101#endif
6102#if defined(LUAJIT_CPU_SSE2) || defined(LJ_TARGET_X64)
6103 sse = 1;
6104#endif
6105
6106 dasm_growpc(Dst, BC__MAX);
6107
6108 build_subroutines(ctx, cmov, sse);
6109
6110 |.code_op
6111 for (op = 0; op < BC__MAX; op++)
6112 build_ins(ctx, (BCOp)op, op, cmov, sse);
6113
6114 return BC__MAX;
6115}
6116
6117/* Emit pseudo frame-info for all assembler functions. */
6118static void emit_asm_debug(BuildCtx *ctx)
6119{
6120 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
6121#if LJ_64
6122#define SZPTR "8"
6123#define BSZPTR "3"
6124#define REG_SP "0x7"
6125#define REG_RA "0x10"
6126#else
6127#define SZPTR "4"
6128#define BSZPTR "2"
6129#define REG_SP "0x4"
6130#define REG_RA "0x8"
6131#endif
6132 switch (ctx->mode) {
6133 case BUILD_elfasm:
6134 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
6135 fprintf(ctx->fp,
6136 ".Lframe0:\n"
6137 "\t.long .LECIE0-.LSCIE0\n"
6138 ".LSCIE0:\n"
6139 "\t.long 0xffffffff\n"
6140 "\t.byte 0x1\n"
6141 "\t.string \"\"\n"
6142 "\t.uleb128 0x1\n"
6143 "\t.sleb128 -" SZPTR "\n"
6144 "\t.byte " REG_RA "\n"
6145 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
6146 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
6147 "\t.align " SZPTR "\n"
6148 ".LECIE0:\n\n");
6149 fprintf(ctx->fp,
6150 ".LSFDE0:\n"
6151 "\t.long .LEFDE0-.LASFDE0\n"
6152 ".LASFDE0:\n"
6153 "\t.long .Lframe0\n"
6154#if LJ_64
6155 "\t.quad .Lbegin\n"
6156 "\t.quad %d\n"
6157 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
6158 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
6159 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
6160 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
6161 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
6162#else
6163 "\t.long .Lbegin\n"
6164 "\t.long %d\n"
6165 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
6166 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
6167 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
6168 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
6169 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
6170#endif
6171 "\t.align " SZPTR "\n"
6172 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
6173#if LJ_HASFFI
6174 fprintf(ctx->fp,
6175 ".LSFDE1:\n"
6176 "\t.long .LEFDE1-.LASFDE1\n"
6177 ".LASFDE1:\n"
6178 "\t.long .Lframe0\n"
6179#if LJ_64
6180 "\t.quad lj_vm_ffi_call\n"
6181 "\t.quad %d\n"
6182 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
6183 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
6184 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
6185 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
6186#else
6187 "\t.long lj_vm_ffi_call\n"
6188 "\t.long %d\n"
6189 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
6190 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
6191 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
6192 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
6193#endif
6194 "\t.align " SZPTR "\n"
6195 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
6196#endif
6197#if (defined(__sun__) && defined(__svr4__)) || defined(__solaris_)
6198 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
6199#else
6200 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
6201#endif
6202 fprintf(ctx->fp,
6203 ".Lframe1:\n"
6204 "\t.long .LECIE1-.LSCIE1\n"
6205 ".LSCIE1:\n"
6206 "\t.long 0\n"
6207 "\t.byte 0x1\n"
6208 "\t.string \"zPR\"\n"
6209 "\t.uleb128 0x1\n"
6210 "\t.sleb128 -" SZPTR "\n"
6211 "\t.byte " REG_RA "\n"
6212 "\t.uleb128 6\n" /* augmentation length */
6213 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6214 "\t.long lj_err_unwind_dwarf-.\n"
6215 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6216 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
6217 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
6218 "\t.align " SZPTR "\n"
6219 ".LECIE1:\n\n");
6220 fprintf(ctx->fp,
6221 ".LSFDE2:\n"
6222 "\t.long .LEFDE2-.LASFDE2\n"
6223 ".LASFDE2:\n"
6224 "\t.long .LASFDE2-.Lframe1\n"
6225 "\t.long .Lbegin-.\n"
6226 "\t.long %d\n"
6227 "\t.uleb128 0\n" /* augmentation length */
6228 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
6229#if LJ_64
6230 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
6231 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
6232 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
6233 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
6234#else
6235 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
6236 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
6237 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
6238 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
6239#endif
6240 "\t.align " SZPTR "\n"
6241 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
6242#if LJ_HASFFI
6243 fprintf(ctx->fp,
6244 ".Lframe2:\n"
6245 "\t.long .LECIE2-.LSCIE2\n"
6246 ".LSCIE2:\n"
6247 "\t.long 0\n"
6248 "\t.byte 0x1\n"
6249 "\t.string \"zR\"\n"
6250 "\t.uleb128 0x1\n"
6251 "\t.sleb128 -" SZPTR "\n"
6252 "\t.byte " REG_RA "\n"
6253 "\t.uleb128 1\n" /* augmentation length */
6254 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6255 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
6256 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
6257 "\t.align " SZPTR "\n"
6258 ".LECIE2:\n\n");
6259 fprintf(ctx->fp,
6260 ".LSFDE3:\n"
6261 "\t.long .LEFDE3-.LASFDE3\n"
6262 ".LASFDE3:\n"
6263 "\t.long .LASFDE3-.Lframe2\n"
6264 "\t.long lj_vm_ffi_call-.\n"
6265 "\t.long %d\n"
6266 "\t.uleb128 0\n" /* augmentation length */
6267#if LJ_64
6268 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
6269 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
6270 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
6271 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
6272#else
6273 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
6274 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
6275 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
6276 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
6277#endif
6278 "\t.align " SZPTR "\n"
6279 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
6280#endif
6281 break;
6282 case BUILD_coffasm:
6283 fprintf(ctx->fp, "\t.section .eh_frame,\"dr\"\n");
6284 fprintf(ctx->fp,
6285 "\t.def %slj_err_unwind_dwarf; .scl 2; .type 32; .endef\n",
6286 LJ_32 ? "_" : "");
6287 fprintf(ctx->fp,
6288 "Lframe1:\n"
6289 "\t.long LECIE1-LSCIE1\n"
6290 "LSCIE1:\n"
6291 "\t.long 0\n"
6292 "\t.byte 0x1\n"
6293 "\t.string \"zP\"\n"
6294 "\t.uleb128 0x1\n"
6295 "\t.sleb128 -" SZPTR "\n"
6296 "\t.byte " REG_RA "\n"
6297 "\t.uleb128 5\n" /* augmentation length */
6298 "\t.byte 0x00\n" /* absptr */
6299 "\t.long %slj_err_unwind_dwarf\n"
6300 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
6301 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
6302 "\t.align " SZPTR "\n"
6303 "LECIE1:\n\n", LJ_32 ? "_" : "");
6304 fprintf(ctx->fp,
6305 "LSFDE1:\n"
6306 "\t.long LEFDE1-LASFDE1\n"
6307 "LASFDE1:\n"
6308 "\t.long LASFDE1-Lframe1\n"
6309 "\t.long %slj_vm_asm_begin\n"
6310 "\t.long %d\n"
6311 "\t.uleb128 0\n" /* augmentation length */
6312 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
6313#if LJ_64
6314 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
6315 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
6316 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
6317 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
6318#else
6319 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
6320 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
6321 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
6322 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
6323#endif
6324 "\t.align " SZPTR "\n"
6325 "LEFDE1:\n\n", LJ_32 ? "_" : "", (int)ctx->codesz, CFRAME_SIZE);
6326 break;
6327 /* Mental note: never let Apple design an assembler.
6328 ** Or a linker. Or a plastic case. But I digress.
6329 */
6330 case BUILD_machasm: {
6331#if LJ_HASFFI
6332 int fcsize = 0;
6333#endif
6334 int i;
6335 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
6336 fprintf(ctx->fp,
6337 "EH_frame1:\n"
6338 "\t.set L$set$x,LECIEX-LSCIEX\n"
6339 "\t.long L$set$x\n"
6340 "LSCIEX:\n"
6341 "\t.long 0\n"
6342 "\t.byte 0x1\n"
6343 "\t.ascii \"zPR\\0\"\n"
6344 "\t.byte 0x1\n"
6345 "\t.byte 128-" SZPTR "\n"
6346 "\t.byte " REG_RA "\n"
6347 "\t.byte 6\n" /* augmentation length */
6348 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
6349#if LJ_64
6350 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
6351 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6352 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
6353#else
6354 "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
6355 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6356 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
6357#endif
6358 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
6359 "\t.align " BSZPTR "\n"
6360 "LECIEX:\n\n");
6361 for (i = 0; i < ctx->nsym; i++) {
6362 const char *name = ctx->sym[i].name;
6363 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
6364 if (size == 0) continue;
6365#if LJ_HASFFI
6366 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
6367#endif
6368 fprintf(ctx->fp,
6369 "%s.eh:\n"
6370 "LSFDE%d:\n"
6371 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
6372 "\t.long L$set$%d\n"
6373 "LASFDE%d:\n"
6374 "\t.long LASFDE%d-EH_frame1\n"
6375 "\t.long %s-.\n"
6376 "\t.long %d\n"
6377 "\t.byte 0\n" /* augmentation length */
6378 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
6379#if LJ_64
6380 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
6381 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
6382 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
6383 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
6384#else
6385 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
6386 "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
6387 "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
6388 "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
6389#endif
6390 "\t.align " BSZPTR "\n"
6391 "LEFDE%d:\n\n",
6392 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
6393 }
6394#if LJ_HASFFI
6395 if (fcsize) {
6396 fprintf(ctx->fp,
6397 "EH_frame2:\n"
6398 "\t.set L$set$y,LECIEY-LSCIEY\n"
6399 "\t.long L$set$y\n"
6400 "LSCIEY:\n"
6401 "\t.long 0\n"
6402 "\t.byte 0x1\n"
6403 "\t.ascii \"zR\\0\"\n"
6404 "\t.byte 0x1\n"
6405 "\t.byte 128-" SZPTR "\n"
6406 "\t.byte " REG_RA "\n"
6407 "\t.byte 1\n" /* augmentation length */
6408#if LJ_64
6409 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6410 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
6411#else
6412 "\t.byte 0x1b\n" /* pcrel|sdata4 */
6413 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
6414#endif
6415 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
6416 "\t.align " BSZPTR "\n"
6417 "LECIEY:\n\n");
6418 fprintf(ctx->fp,
6419 "_lj_vm_ffi_call.eh:\n"
6420 "LSFDEY:\n"
6421 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
6422 "\t.long L$set$yy\n"
6423 "LASFDEY:\n"
6424 "\t.long LASFDEY-EH_frame2\n"
6425 "\t.long _lj_vm_ffi_call-.\n"
6426 "\t.long %d\n"
6427 "\t.byte 0\n" /* augmentation length */
6428#if LJ_64
6429 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
6430 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
6431 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
6432 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
6433#else
6434 "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
6435 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
6436 "\t.byte 0xd\n\t.uleb128 0x4\n" /* def_cfa_register ebp */
6437 "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
6438#endif
6439 "\t.align " BSZPTR "\n"
6440 "LEFDEY:\n\n", fcsize);
6441 }
6442#endif
6443#if LJ_64
6444 fprintf(ctx->fp, "\t.subsections_via_symbols\n");
6445#else
6446 fprintf(ctx->fp,
6447 "\t.non_lazy_symbol_pointer\n"
6448 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
6449 ".indirect_symbol _lj_err_unwind_dwarf\n"
6450 ".long 0\n");
6451#endif
6452 }
6453 break;
6454 default: /* Difficult for other modes. */
6455 break;
6456 }
6457}
6458