diff options
Diffstat (limited to '')
-rw-r--r-- | libraries/luajit-2.0/src/lj_str.c | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/libraries/luajit-2.0/src/lj_str.c b/libraries/luajit-2.0/src/lj_str.c new file mode 100644 index 0000000..cd3a8b6 --- /dev/null +++ b/libraries/luajit-2.0/src/lj_str.c | |||
@@ -0,0 +1,415 @@ | |||
1 | /* | ||
2 | ** String handling. | ||
3 | ** Copyright (C) 2005-2011 Mike Pall. See Copyright Notice in luajit.h | ||
4 | ** | ||
5 | ** Portions taken verbatim or adapted from the Lua interpreter. | ||
6 | ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h | ||
7 | */ | ||
8 | |||
9 | #include <stdio.h> | ||
10 | |||
11 | #define lj_str_c | ||
12 | #define LUA_CORE | ||
13 | |||
14 | #include "lj_obj.h" | ||
15 | #include "lj_gc.h" | ||
16 | #include "lj_err.h" | ||
17 | #include "lj_str.h" | ||
18 | #include "lj_state.h" | ||
19 | #include "lj_char.h" | ||
20 | |||
21 | /* -- String interning ---------------------------------------------------- */ | ||
22 | |||
23 | /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | ||
24 | int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | ||
25 | { | ||
26 | MSize i, n = a->len > b->len ? b->len : a->len; | ||
27 | for (i = 0; i < n; i += 4) { | ||
28 | /* Note: innocuous access up to end of string + 3. */ | ||
29 | uint32_t va = *(const uint32_t *)(strdata(a)+i); | ||
30 | uint32_t vb = *(const uint32_t *)(strdata(b)+i); | ||
31 | if (va != vb) { | ||
32 | #if LJ_LE | ||
33 | va = lj_bswap(va); vb = lj_bswap(vb); | ||
34 | #endif | ||
35 | i -= n; | ||
36 | if ((int32_t)i >= -3) { | ||
37 | va >>= 32+(i<<3); vb >>= 32+(i<<3); | ||
38 | if (va == vb) break; | ||
39 | } | ||
40 | return va < vb ? -1 : 1; | ||
41 | } | ||
42 | } | ||
43 | return (int32_t)(a->len - b->len); | ||
44 | } | ||
45 | |||
46 | /* Fast string data comparison. Caveat: unaligned access to 1st string! */ | ||
47 | static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len) | ||
48 | { | ||
49 | MSize i = 0; | ||
50 | lua_assert(len > 0); | ||
51 | lua_assert((((uintptr_t)a + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4); | ||
52 | do { /* Note: innocuous access up to end of string + 3. */ | ||
53 | uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i); | ||
54 | if (v) { | ||
55 | i -= len; | ||
56 | #if LJ_LE | ||
57 | return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1; | ||
58 | #else | ||
59 | return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1; | ||
60 | #endif | ||
61 | } | ||
62 | i += 4; | ||
63 | } while (i < len); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | /* Resize the string hash table (grow and shrink). */ | ||
68 | void lj_str_resize(lua_State *L, MSize newmask) | ||
69 | { | ||
70 | global_State *g = G(L); | ||
71 | GCRef *newhash; | ||
72 | MSize i; | ||
73 | if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) | ||
74 | return; /* No resizing during GC traversal or if already too big. */ | ||
75 | newhash = lj_mem_newvec(L, newmask+1, GCRef); | ||
76 | memset(newhash, 0, (newmask+1)*sizeof(GCRef)); | ||
77 | for (i = g->strmask; i != ~(MSize)0; i--) { /* Rehash old table. */ | ||
78 | GCobj *p = gcref(g->strhash[i]); | ||
79 | while (p) { /* Follow each hash chain and reinsert all strings. */ | ||
80 | MSize h = gco2str(p)->hash & newmask; | ||
81 | GCobj *next = gcnext(p); | ||
82 | /* NOBARRIER: The string table is a GC root. */ | ||
83 | setgcrefr(p->gch.nextgc, newhash[h]); | ||
84 | setgcref(newhash[h], p); | ||
85 | p = next; | ||
86 | } | ||
87 | } | ||
88 | lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); | ||
89 | g->strmask = newmask; | ||
90 | g->strhash = newhash; | ||
91 | } | ||
92 | |||
93 | /* Intern a string and return string object. */ | ||
94 | GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) | ||
95 | { | ||
96 | global_State *g; | ||
97 | GCstr *s; | ||
98 | GCobj *o; | ||
99 | MSize len = (MSize)lenx; | ||
100 | MSize a, b, h = len; | ||
101 | if (lenx >= LJ_MAX_STR) | ||
102 | lj_err_msg(L, LJ_ERR_STROV); | ||
103 | g = G(L); | ||
104 | /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */ | ||
105 | if (len >= 4) { /* Caveat: unaligned access! */ | ||
106 | a = lj_getu32(str); | ||
107 | h ^= lj_getu32(str+len-4); | ||
108 | b = lj_getu32(str+(len>>1)-2); | ||
109 | h ^= b; h -= lj_rol(b, 14); | ||
110 | b += lj_getu32(str+(len>>2)-1); | ||
111 | } else if (len > 0) { | ||
112 | a = *(const uint8_t *)str; | ||
113 | h ^= *(const uint8_t *)(str+len-1); | ||
114 | b = *(const uint8_t *)(str+(len>>1)); | ||
115 | h ^= b; h -= lj_rol(b, 14); | ||
116 | } else { | ||
117 | return &g->strempty; | ||
118 | } | ||
119 | a ^= h; a -= lj_rol(h, 11); | ||
120 | b ^= a; b -= lj_rol(a, 25); | ||
121 | h ^= b; h -= lj_rol(b, 16); | ||
122 | /* Check if the string has already been interned. */ | ||
123 | o = gcref(g->strhash[h & g->strmask]); | ||
124 | if (LJ_LIKELY((((uintptr_t)str + len) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) { | ||
125 | while (o != NULL) { | ||
126 | GCstr *sx = gco2str(o); | ||
127 | if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) { | ||
128 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ | ||
129 | if (isdead(g, o)) flipwhite(o); | ||
130 | return sx; /* Return existing string. */ | ||
131 | } | ||
132 | o = gcnext(o); | ||
133 | } | ||
134 | } else { /* Slow path: end of string is too close to a page boundary. */ | ||
135 | while (o != NULL) { | ||
136 | GCstr *sx = gco2str(o); | ||
137 | if (sx->len == len && memcmp(str, strdata(sx), len) == 0) { | ||
138 | /* Resurrect if dead. Can only happen with fixstring() (keywords). */ | ||
139 | if (isdead(g, o)) flipwhite(o); | ||
140 | return sx; /* Return existing string. */ | ||
141 | } | ||
142 | o = gcnext(o); | ||
143 | } | ||
144 | } | ||
145 | /* Nope, create a new string. */ | ||
146 | s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr); | ||
147 | newwhite(g, s); | ||
148 | s->gct = ~LJ_TSTR; | ||
149 | s->len = len; | ||
150 | s->hash = h; | ||
151 | s->reserved = 0; | ||
152 | memcpy(strdatawr(s), str, len); | ||
153 | strdatawr(s)[len] = '\0'; /* Zero-terminate string. */ | ||
154 | /* Add it to string hash table. */ | ||
155 | h &= g->strmask; | ||
156 | s->nextgc = g->strhash[h]; | ||
157 | /* NOBARRIER: The string table is a GC root. */ | ||
158 | setgcref(g->strhash[h], obj2gco(s)); | ||
159 | if (g->strnum++ > g->strmask) /* Allow a 100% load factor. */ | ||
160 | lj_str_resize(L, (g->strmask<<1)+1); /* Grow string table. */ | ||
161 | return s; /* Return newly interned string. */ | ||
162 | } | ||
163 | |||
164 | void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | ||
165 | { | ||
166 | g->strnum--; | ||
167 | lj_mem_free(g, s, sizestring(s)); | ||
168 | } | ||
169 | |||
170 | /* -- Type conversions ---------------------------------------------------- */ | ||
171 | |||
172 | /* Convert string object to number. */ | ||
173 | int LJ_FASTCALL lj_str_tonum(GCstr *str, TValue *n) | ||
174 | { | ||
175 | int ok = lj_str_numconv(strdata(str), n); | ||
176 | if (ok && tvisint(n)) | ||
177 | setnumV(n, (lua_Number)intV(n)); | ||
178 | return ok; | ||
179 | } | ||
180 | |||
181 | int LJ_FASTCALL lj_str_tonumber(GCstr *str, TValue *n) | ||
182 | { | ||
183 | return lj_str_numconv(strdata(str), n); | ||
184 | } | ||
185 | |||
186 | /* Convert string to number. */ | ||
187 | int LJ_FASTCALL lj_str_numconv(const char *s, TValue *n) | ||
188 | { | ||
189 | #if LJ_DUALNUM | ||
190 | int sign = 1; | ||
191 | #else | ||
192 | lua_Number sign = 1; | ||
193 | #endif | ||
194 | const uint8_t *p = (const uint8_t *)s; | ||
195 | while (lj_char_isspace(*p)) p++; | ||
196 | if (*p == '-') { p++; sign = -1; } else if (*p == '+') { p++; } | ||
197 | if ((uint32_t)(*p - '0') < 10) { | ||
198 | uint32_t k = (uint32_t)(*p++ - '0'); | ||
199 | if (k == 0 && ((*p & ~0x20) == 'X')) { | ||
200 | p++; | ||
201 | if (!lj_char_isxdigit(*p)) | ||
202 | return 0; /* Don't accept '0x' without hex digits. */ | ||
203 | do { | ||
204 | if (k >= 0x10000000u) goto parsedbl; | ||
205 | k = (k << 4) + (*p & 15u); | ||
206 | if (!lj_char_isdigit(*p)) k += 9; | ||
207 | p++; | ||
208 | } while (lj_char_isxdigit(*p)); | ||
209 | } else { | ||
210 | while ((uint32_t)(*p - '0') < 10) { | ||
211 | if (LJ_UNLIKELY(k >= 429496729) && (k != 429496729 || *p > '5')) | ||
212 | goto parsedbl; | ||
213 | k = k * 10u + (uint32_t)(*p++ - '0'); | ||
214 | } | ||
215 | } | ||
216 | while (LJ_UNLIKELY(lj_char_isspace(*p))) p++; | ||
217 | if (LJ_LIKELY(*p == '\0')) { | ||
218 | #if LJ_DUALNUM | ||
219 | if (sign == 1) { | ||
220 | if (k < 0x80000000u) { | ||
221 | setintV(n, (int32_t)k); | ||
222 | return 1; | ||
223 | } | ||
224 | } else if (k <= 0x80000000u) { | ||
225 | setintV(n, -(int32_t)k); | ||
226 | return 1; | ||
227 | } | ||
228 | #endif | ||
229 | setnumV(n, sign * (lua_Number)k); | ||
230 | return 1; | ||
231 | } | ||
232 | } | ||
233 | parsedbl: | ||
234 | { | ||
235 | TValue tv; | ||
236 | char *endptr; | ||
237 | setnumV(&tv, lua_str2number(s, &endptr)); | ||
238 | if (endptr == s) return 0; /* Conversion failed. */ | ||
239 | if (LJ_UNLIKELY(*endptr != '\0')) { | ||
240 | while (lj_char_isspace((uint8_t)*endptr)) endptr++; | ||
241 | if (*endptr != '\0') return 0; /* Invalid trailing characters? */ | ||
242 | } | ||
243 | if (LJ_LIKELY(!tvisnan(&tv))) | ||
244 | setnumV(n, numV(&tv)); | ||
245 | else | ||
246 | setnanV(n); /* Canonicalize injected NaNs. */ | ||
247 | return 1; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | /* Print number to buffer. Canonicalizes non-finite values. */ | ||
252 | size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o) | ||
253 | { | ||
254 | if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) { /* Finite? */ | ||
255 | lua_Number n = o->n; | ||
256 | return (size_t)lua_number2str(s, n); | ||
257 | } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) { | ||
258 | s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3; | ||
259 | } else if ((o->u32.hi & 0x80000000) == 0) { | ||
260 | s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3; | ||
261 | } else { | ||
262 | s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4; | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /* Print integer to buffer. Returns pointer to start. */ | ||
267 | char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k) | ||
268 | { | ||
269 | uint32_t u = (uint32_t)(k < 0 ? -k : k); | ||
270 | p += 1+10; | ||
271 | do { *--p = (char)('0' + u % 10); } while (u /= 10); | ||
272 | if (k < 0) *--p = '-'; | ||
273 | return p; | ||
274 | } | ||
275 | |||
276 | /* Convert number to string. */ | ||
277 | GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np) | ||
278 | { | ||
279 | char buf[LJ_STR_NUMBUF]; | ||
280 | size_t len = lj_str_bufnum(buf, (TValue *)np); | ||
281 | return lj_str_new(L, buf, len); | ||
282 | } | ||
283 | |||
284 | /* Convert integer to string. */ | ||
285 | GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k) | ||
286 | { | ||
287 | char s[1+10]; | ||
288 | char *p = lj_str_bufint(s, k); | ||
289 | return lj_str_new(L, p, (size_t)(s+sizeof(s)-p)); | ||
290 | } | ||
291 | |||
292 | GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o) | ||
293 | { | ||
294 | return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n); | ||
295 | } | ||
296 | |||
297 | /* -- String formatting --------------------------------------------------- */ | ||
298 | |||
299 | static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len) | ||
300 | { | ||
301 | char *p; | ||
302 | MSize i; | ||
303 | if (sb->n + len > sb->sz) { | ||
304 | MSize sz = sb->sz * 2; | ||
305 | while (sb->n + len > sz) sz = sz * 2; | ||
306 | lj_str_resizebuf(L, sb, sz); | ||
307 | } | ||
308 | p = sb->buf + sb->n; | ||
309 | sb->n += len; | ||
310 | for (i = 0; i < len; i++) p[i] = str[i]; | ||
311 | } | ||
312 | |||
313 | static void addchar(lua_State *L, SBuf *sb, int c) | ||
314 | { | ||
315 | if (sb->n + 1 > sb->sz) { | ||
316 | MSize sz = sb->sz * 2; | ||
317 | lj_str_resizebuf(L, sb, sz); | ||
318 | } | ||
319 | sb->buf[sb->n++] = (char)c; | ||
320 | } | ||
321 | |||
322 | /* Push formatted message as a string object to Lua stack. va_list variant. */ | ||
323 | const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp) | ||
324 | { | ||
325 | SBuf *sb = &G(L)->tmpbuf; | ||
326 | lj_str_needbuf(L, sb, (MSize)strlen(fmt)); | ||
327 | lj_str_resetbuf(sb); | ||
328 | for (;;) { | ||
329 | const char *e = strchr(fmt, '%'); | ||
330 | if (e == NULL) break; | ||
331 | addstr(L, sb, fmt, (MSize)(e-fmt)); | ||
332 | /* This function only handles %s, %c, %d, %f and %p formats. */ | ||
333 | switch (e[1]) { | ||
334 | case 's': { | ||
335 | const char *s = va_arg(argp, char *); | ||
336 | if (s == NULL) s = "(null)"; | ||
337 | addstr(L, sb, s, (MSize)strlen(s)); | ||
338 | break; | ||
339 | } | ||
340 | case 'c': | ||
341 | addchar(L, sb, va_arg(argp, int)); | ||
342 | break; | ||
343 | case 'd': { | ||
344 | char buf[LJ_STR_INTBUF]; | ||
345 | char *p = lj_str_bufint(buf, va_arg(argp, int32_t)); | ||
346 | addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p)); | ||
347 | break; | ||
348 | } | ||
349 | case 'f': { | ||
350 | char buf[LJ_STR_NUMBUF]; | ||
351 | TValue tv; | ||
352 | MSize len; | ||
353 | tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER)); | ||
354 | len = (MSize)lj_str_bufnum(buf, &tv); | ||
355 | addstr(L, sb, buf, len); | ||
356 | break; | ||
357 | } | ||
358 | case 'p': { | ||
359 | #define FMTP_CHARS (2*sizeof(ptrdiff_t)) | ||
360 | char buf[2+FMTP_CHARS]; | ||
361 | ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *)); | ||
362 | ptrdiff_t i, lasti = 2+FMTP_CHARS; | ||
363 | if (p == 0) { | ||
364 | addstr(L, sb, "NULL", 4); | ||
365 | break; | ||
366 | } | ||
367 | #if LJ_64 | ||
368 | /* Shorten output for 64 bit pointers. */ | ||
369 | lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0); | ||
370 | #endif | ||
371 | buf[0] = '0'; | ||
372 | buf[1] = 'x'; | ||
373 | for (i = lasti-1; i >= 2; i--, p >>= 4) | ||
374 | buf[i] = "0123456789abcdef"[(p & 15)]; | ||
375 | addstr(L, sb, buf, (MSize)lasti); | ||
376 | break; | ||
377 | } | ||
378 | case '%': | ||
379 | addchar(L, sb, '%'); | ||
380 | break; | ||
381 | default: | ||
382 | addchar(L, sb, '%'); | ||
383 | addchar(L, sb, e[1]); | ||
384 | break; | ||
385 | } | ||
386 | fmt = e+2; | ||
387 | } | ||
388 | addstr(L, sb, fmt, (MSize)strlen(fmt)); | ||
389 | setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n)); | ||
390 | incr_top(L); | ||
391 | return strVdata(L->top - 1); | ||
392 | } | ||
393 | |||
394 | /* Push formatted message as a string object to Lua stack. Vararg variant. */ | ||
395 | const char *lj_str_pushf(lua_State *L, const char *fmt, ...) | ||
396 | { | ||
397 | const char *msg; | ||
398 | va_list argp; | ||
399 | va_start(argp, fmt); | ||
400 | msg = lj_str_pushvf(L, fmt, argp); | ||
401 | va_end(argp); | ||
402 | return msg; | ||
403 | } | ||
404 | |||
405 | /* -- Buffer handling ----------------------------------------------------- */ | ||
406 | |||
407 | char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz) | ||
408 | { | ||
409 | if (sz > sb->sz) { | ||
410 | if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF; | ||
411 | lj_str_resizebuf(L, sb, sz); | ||
412 | } | ||
413 | return sb->buf; | ||
414 | } | ||
415 | |||