aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/libraries/LuaJIT-1.1.7/src/ljit_x86.dash
blob: 432e1c83948c372bacbf52daec814d1d89f87e7a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
|//
|// Common DynASM definitions and macros for x86 CPUs.
|// Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h
|//
|
|// Standard DynASM declarations.
|.arch x86
|.section code, deopt, tail, mfmap
|
|// Type definitions with (almost) global validity.
|.type L,		lua_State,	esi	// L.
|.type BASE,		TValue,		ebx	// L->base.
|.type TOP,		TValue,		edi	// L->top (calls/open ops).
|.type CI,		CallInfo,	ecx	// L->ci (calls, locally).
|.type LCL,		LClosure,	eax	// func->value (calls).
|
|// Type definitions with local validity.
|.type GL,		global_State
|.type TVALUE,		TValue
|.type VALUE,		Value
|.type CINFO,		CallInfo
|.type GCOBJECT,	GCObject
|.type TSTRING,		TString
|.type TABLE,		Table
|.type CCLOSURE,	CClosure
|.type PROTO,		Proto
|.type UPVAL,		UpVal
|.type NODE,		Node
|
|// Definitions copied to DynASM domain to avoid unnecessary constant args.
|// CHECK: must match with the definitions in lua.h!
|.define LUA_TNIL,		0
|.define LUA_TBOOLEAN,		1
|.define LUA_TLIGHTUSERDATA,	2
|.define LUA_TNUMBER,		3
|.define LUA_TSTRING,		4
|.define LUA_TTABLE,		5
|.define LUA_TFUNCTION,		6
|.define LUA_TUSERDATA,		7
|.define LUA_TTHREAD,		8
|
|.define LUA_TNUM_NUM,		0x33
|.define LUA_TNUM_NUM_NUM,	0x333
|.define LUA_TSTR_STR,		0x44
|.define LUA_TSTR_NUM,		0x43
|.define LUA_TSTR_NUM_NUM,	0x433
|.define LUA_TTABLE_NUM,	0x53
|.define LUA_TTABLE_STR,	0x54
|
|// Macros to test, set and copy stack slots.
|.macro istt, idx, tp;  cmp dword BASE[idx].tt, tp; .endmacro
|.macro isnil, idx;  istt idx, LUA_TNIL; .endmacro
|.macro isnumber, idx;  istt idx, LUA_TNUMBER; .endmacro
|.macro isstring, idx;  istt idx, LUA_TSTRING; .endmacro
|.macro istable, idx;  istt idx, LUA_TTABLE; .endmacro
|.macro isfunction, idx;  istt idx, LUA_TFUNCTION; .endmacro
|
|.macro isnumber2, idx1, idx2, reg
|  mov reg, BASE[idx1].tt;  shl reg, 4;  or reg, BASE[idx2].tt
|  cmp reg, LUA_TNUM_NUM
|.endmacro
|.macro isnumber2, idx1, idx2; isnumber2, idx1, idx2, eax; .endmacro
|
|.macro isnumber3, idx1, idx2, idx3, reg
|  mov reg, BASE[idx1].tt;  shl reg, 4;  or reg, BASE[idx2].tt
|  shl reg, 4;  or reg, BASE[idx3].tt;  cmp reg, LUA_TNUM_NUM_NUM
|.endmacro
|.macro isnumber3, idx1, idx2, idx3; isnumber3, idx1, idx2, idx3, eax; .endmacro
|
|.macro tvisnil, tv;  cmp dword tv.tt, LUA_TNIL; .endmacro
|
|.macro settt, tv, tp;  mov dword tv.tt, tp; .endmacro
|.macro setnilvalue, tv;  settt tv, LUA_TNIL; .endmacro
|
|.macro setbvalue, tv, val		// May use edx.
||if (val) {  /* true */
|   mov edx, LUA_TBOOLEAN
|   mov dword tv.value, edx		// Assumes: LUA_TBOOLEAN == 1
|   settt tv, edx
||} else {  /* false */
|   mov dword tv.value, 0
|   settt tv, LUA_TBOOLEAN
||}
|.endmacro
|
|.macro loadnvaluek, vptr
||if ((vptr)->n == (lua_Number)0) {
|  fldz
||} else if ((vptr)->n == (lua_Number)1) {
|  fld1
||} else {
|  fld qword [vptr]
||}
|.endmacro
|
|.macro setnvaluek, tv, vptr		// Pass a Value *! With permanent addr.
|  // SSE2 does not pay off here (I tried).
|  loadnvaluek vptr
|  fstp qword tv.value
|  settt tv, LUA_TNUMBER
|.endmacro
|
|.macro setnvalue, tv, vptr		// Pass a Value *! Temporary ok.
|  mov dword tv.value, (vptr)->na[0]
|  mov dword tv.value.na[1], (vptr)->na[1]
|  settt tv, LUA_TNUMBER
|.endmacro
|
|.macro setsvalue, tv, vptr
|  mov aword tv.value, vptr
|  settt tv, LUA_TSTRING
|.endmacro
|
|.macro sethvalue, tv, vptr
|  mov aword tv.value, vptr
|  settt tv, LUA_TTABLE
|.endmacro
|
|.macro copyslotSSE, D, S, R1		// May use xmm0.
|  mov R1, S.tt;  movq xmm0, qword S.value
|  mov D.tt, R1;  movq qword D.value, xmm0
|.endmacro
|
|.macro copyslot, D, S, R1, R2, R3
||if (J->flags & JIT_F_CPU_SSE2) {
|  copyslotSSE D, S, R1
||} else {
|  mov R1, S.value;  mov R2, S.value.na[1];  mov R3, S.tt
|  mov D.value, R1;  mov D.value.na[1], R2;  mov D.tt, R3
||}
|.endmacro
|
|.macro copyslot, D, S, R1, R2
||if (J->flags & JIT_F_CPU_SSE2) {
|  copyslotSSE D, S, R1
||} else {
|  mov R1, S.value;  mov R2, S.value.na[1];  mov D.value, R1
|  mov R1, S.tt;  mov D.value.na[1], R2;  mov D.tt, R1
||}
|.endmacro
|
|.macro copyslot, D, S
|  copyslot D, S, ecx, edx, eax
|.endmacro
|
|.macro copyconst, tv, tvk		// May use edx.
||switch (ttype(tvk)) {
||case LUA_TNIL:
|   setnilvalue tv
||  break;
||case LUA_TBOOLEAN:
|   setbvalue tv, bvalue(tvk)		// May use edx.
||  break;
||case LUA_TNUMBER: {
|   setnvaluek tv, &(tvk)->value
||  break;
||}
||case LUA_TSTRING:
|   setsvalue tv, &gcvalue(tvk)
||  break;
||default: lua_assert(0); break;
||}
|.endmacro
|
|// Macros to get Lua structures.
|.macro getLCL, reg			// May use CI and TOP (edi).
||if (!J->pt->is_vararg) {
|  mov LCL:reg, BASE[-1].value
||} else {
|  mov CI, L->ci
|  mov TOP, CI->func
|  mov LCL:reg, TOP->value
||}
|.endmacro
|.macro getLCL;  getLCL eax; .endmacro
|
|// Macros to handle variants.
|.macro addidx, type, idx
||if (idx) {
|  add type, idx*#type
||}
|.endmacro
|
|.macro subidx, type, idx
||if (idx) {
|  sub type, idx*#type
||}
|.endmacro
|
|// Annoying x87 stuff: support for two compare variants.
|.macro fcomparepp			// Compare and pop st0 >< st1.
||if (J->flags & JIT_F_CPU_CMOV) {
|  fucomip st1
|  fpop
||} else {
|  fucompp
|  fnstsw ax				// eax modified!
|  sahf
|  // Sometimes test ah, imm8 would be faster.
|  // But all following compares need to be changed then.
|  // Don't bother since this is only compatibility stuff for old CPUs.
||}
|.endmacro
|
|// If you change LUA_TVALUE_ALIGN, be sure to change the Makefile, too:
|//   DASMFLAGS= -D TVALUE_SIZE=...
|// Then rerun make. Or change the default below:
|.if not TVALUE_SIZE; .define TVALUE_SIZE, 16; .endif
|
|.if TVALUE_SIZE == 16
|  .macro TValuemul, reg;  sal reg, 4; .endmacro  // *16
|  .macro TValuediv, reg;  sar reg, 4; .endmacro  // /16
|  .macro Nodemul, reg;  sal reg, 5; .endmacro    // *32
|.elif TVALUE_SIZE == 12
|  .macro TValuemul, reg;  sal reg, 2;  lea reg, [reg+reg*2]; .endmacro  // *12
|  .macro TValuediv, reg;  sal reg, 2;  imul reg, 0xaaaaaaab; .endmacro  // /12
|  .macro Nodemul, reg;  imul reg, 28; .endmacro  // *28
|.else
|  .fatal Unsupported TValue size `TVALUE_SIZE'.
|.endif
|
|//
|// x86 C calling conventions and stack frame layout during a JIT call:
|//
|// ebp+aword*4  CARG2     nresults
|// ebp+aword*3  CARG2     func      (also used as SAVER3 for L)
|// ebp+aword*2  CARG1     L
|// -------------------------------  call to GATE_LJ
|// ebp+aword*1  retaddr
|// ebp+aword*0  frameptr       ebp
|// ebp-aword*1  SAVER1    TOP  edi
|// ebp-aword*2  SAVER2    BASE ebx
|// -------------------------------
|//              GATE_LJ retaddr
|// esp+aword*2  ARG3
|// esp+aword*1  ARG2
|// esp+aword*0  ARG1                <-- esp for first JIT frame
|// -------------------------------
|//              1st JIT frame retaddr
|// esp+aword*2  ARG3
|// esp+aword*1  ARG2
|// esp+aword*0  ARG1                <-- esp for second JIT frame
|// -------------------------------
|//              2nd JIT frame retaddr
|//
|// We could omit the fixed frame pointer (ebp) and have one more register
|// available. But there is no pressing need (could use it for CI).
|// And it's easier for debugging (gdb is still confused -- why?).
|//
|// The stack is aligned to 4 awords (16 bytes). Calls to C functions
|// with up to 3 arguments do not need any stack pointer adjustment.
|//
|
|.define CARG3, [ebp+aword*4]
|.define CARG2, [ebp+aword*3]
|.define CARG1, [ebp+aword*2]
|.define SAVER1, [ebp-aword*1]
|.define SAVER2, [ebp-aword*2]
|.define ARG7, aword [esp+aword*6]	// Requires large frame.
|.define ARG6, aword [esp+aword*5]	// Requires large frame.
|.define ARG5, aword [esp+aword*4]	// Requires large frame.
|.define ARG4, aword [esp+aword*3]	// Requires large frame.
|.define ARG3, aword [esp+aword*2]
|.define ARG2, aword [esp+aword*1]
|.define ARG1, aword [esp]
|.define FRAME_RETADDR, aword [esp+aword*3]
|.define TMP3, [esp+aword*2]
|.define TMP2, [esp+aword*1]
|.define TMP1, [esp]
|.define FPARG2, qword [esp+qword*1]	// Requires large frame.
|.define FPARG1, qword [esp]
|.define LJFRAME_OFFSET, aword*2	// 16 byte aligned with retaddr + ebp.
|.define FRAME_OFFSET, aword*3		// 16 byte aligned with retaddr.
|.define LFRAME_OFFSET, aword*7		// 16 byte aligned with retaddr.
|.define S2LFRAME_OFFSET, aword*4	// Delta to large frame.
|
|.macro call, target, a1
|  mov ARG1, a1;  call target; .endmacro
|.macro call, target, a1, a2
|  mov ARG1, a1;  mov ARG2, a2;  call target; .endmacro
|.macro call, target, a1, a2, a3
|  mov ARG1, a1;  mov ARG2, a2;  mov ARG3, a3;  call target; .endmacro
|.macro call, target, a1, a2, a3, a4
|  push a4;  push a3;  push a2;  push a1
|  call target;  add esp, S2LFRAME_OFFSET;  .endmacro
|.macro call, target, a1, a2, a3, a4, a5
|  mov ARG1, a5; push a4; push a3;  push a2;  push a1
|  call target;  add esp, S2LFRAME_OFFSET;  .endmacro
|
|// The following macros require a large frame.
|.macro call_LFRAME, target, a1, a2, a3, a4
|  mov ARG1, a1;  mov ARG2, a2;  mov ARG3, a3;  mov ARG4, a4
|  call target; .endmacro
|.macro call_LFRAME, target, a1, a2, a3, a4, a5
|  mov ARG1, a1;  mov ARG2, a2;  mov ARG3, a3;  mov ARG4, a4;  mov ARG5, a5
|  call target; .endmacro
|