aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/LuaSL/testLua/yueliang-0.4.1/orig-5.0.3/llex.lua
diff options
context:
space:
mode:
Diffstat (limited to 'LuaSL/testLua/yueliang-0.4.1/orig-5.0.3/llex.lua')
-rw-r--r--LuaSL/testLua/yueliang-0.4.1/orig-5.0.3/llex.lua531
1 files changed, 0 insertions, 531 deletions
diff --git a/LuaSL/testLua/yueliang-0.4.1/orig-5.0.3/llex.lua b/LuaSL/testLua/yueliang-0.4.1/orig-5.0.3/llex.lua
deleted file mode 100644
index 077f1aa..0000000
--- a/LuaSL/testLua/yueliang-0.4.1/orig-5.0.3/llex.lua
+++ /dev/null
@@ -1,531 +0,0 @@
1--[[--------------------------------------------------------------------
2
3 llex.lua
4 Lua 5 lexical analyzer in Lua
5 This file is part of Yueliang.
6
7 Copyright (c) 2005-2006 Kein-Hong Man <khman@users.sf.net>
8 The COPYRIGHT file describes the conditions
9 under which this software may be distributed.
10
11 See the ChangeLog for more information.
12
13----------------------------------------------------------------------]]
14
15--[[--------------------------------------------------------------------
16-- Notes:
17-- * intended to 'imitate' llex.c code; performance is not a concern
18-- * tokens are strings; code structure largely retained
19-- * deleted stuff (compared to llex.c) are noted, comments retained
20-- * Added:
21-- luaX:chunkid (from lobject.c)
22-- * To use the lexer:
23-- (1) luaX:init() to initialize the lexer
24-- (2) luaX:setinput() to set the input stream to lex, get LS
25-- (3) call luaX:lex() to get tokens, until "TK_EOS":
26-- LS.t.token = luaX:lex(LS, LS.t)
27-- * since EOZ is returned as a string, be careful when regexp testing
28----------------------------------------------------------------------]]
29
30luaX = {}
31
32-- FIRST_RESERVED is not required as tokens are manipulated as strings
33-- TOKEN_LEN deleted; maximum length of a reserved word
34
35------------------------------------------------------------------------
36-- "ORDER RESERVED" deleted; enumeration in one place: luaX.RESERVED
37------------------------------------------------------------------------
38
39-- terminal symbols denoted by reserved words: TK_AND to TK_WHILE
40-- other terminal symbols: TK_NAME to TK_EOS
41luaX.RESERVED = [[
42TK_AND and
43TK_BREAK break
44TK_DO do
45TK_ELSE else
46TK_ELSEIF elseif
47TK_END end
48TK_FALSE false
49TK_FOR for
50TK_FUNCTION function
51TK_IF if
52TK_IN in
53TK_LOCAL local
54TK_NIL nil
55TK_NOT not
56TK_OR or
57TK_REPEAT repeat
58TK_RETURN return
59TK_THEN then
60TK_TRUE true
61TK_UNTIL until
62TK_WHILE while
63TK_NAME *name
64TK_CONCAT ..
65TK_DOTS ...
66TK_EQ ==
67TK_GE >=
68TK_LE <=
69TK_NE ~=
70TK_NUMBER *number
71TK_STRING *string
72TK_EOS <eof>]]
73
74-- NUM_RESERVED is not required; number of reserved words
75
76--[[--------------------------------------------------------------------
77-- Instead of passing seminfo, the Token struct (e.g. LS.t) is passed
78-- so that lexer functions can use its table element, LS.t.seminfo
79--
80-- Token (struct of LS.t and LS.lookahead):
81-- token -- token symbol
82-- seminfo -- semantics information
83--
84-- LexState (struct of LS; LS is initialized by luaX:setinput):
85-- current -- current character
86-- linenumber -- input line counter
87-- lastline -- line of last token 'consumed'
88-- t -- current token (table: struct Token)
89-- lookahead -- look ahead token (table: struct Token)
90-- fs -- 'FuncState' is private to the parser
91-- L -- LuaState
92-- z -- input stream
93-- buff -- buffer for tokens
94-- source -- current source name
95-- nestlevel -- level of nested non-terminals
96----------------------------------------------------------------------]]
97
98-- token2string is now a hash; see luaX:init
99
100------------------------------------------------------------------------
101-- initialize lexer
102------------------------------------------------------------------------
103function luaX:init()
104 self.token2string = {}
105 self.string2token = {}
106 for v in string.gfind(self.RESERVED, "[^\n]+") do
107 local _, _, tok, str = string.find(v, "(%S+)%s+(%S+)")
108 self.token2string[tok] = str
109 self.string2token[str] = tok
110 end
111end
112
113luaX.MAXSRC = 80
114
115------------------------------------------------------------------------
116-- returns a suitably-formatted chunk name or id
117-- * from lobject.c, used in llex.c and ldebug.c
118-- * the result, out, is returned (was first argument)
119------------------------------------------------------------------------
120function luaX:chunkid(source, bufflen)
121 local out
122 local first = string.sub(source, 1, 1)
123 if first == "=" then
124 out = string.sub(source, 2, bufflen) -- remove first char
125 else -- out = "source", or "...source"
126 if first == "@" then
127 source = string.sub(source, 2) -- skip the '@'
128 bufflen = bufflen - string.len(" `...' ")
129 local l = string.len(source)
130 out = ""
131 if l > bufflen then
132 source = string.sub(source, 1 + l - bufflen) -- get last part of file name
133 out = out.."..."
134 end
135 out = out..source
136 else -- out = [string "string"]
137 local len = string.find(source, "\n", 1, 1) -- stop at first newline
138 len = len and (len - 1) or string.len(source)
139 bufflen = bufflen - string.len(" [string \"...\"] ")
140 if len > bufflen then len = bufflen end
141 out = "[string \""
142 if len < string.len(source) then -- must truncate?
143 out = out..string.sub(source, 1, len).."..."
144 else
145 out = out..source
146 end
147 out = out.."\"]"
148 end
149 end
150 return out
151end
152
153--[[--------------------------------------------------------------------
154-- Support functions for lexer
155-- * all lexer errors eventually reaches errorline:
156 checklimit -> syntaxerror -> error -> errorline
157 lexerror -> error -> errorline
158----------------------------------------------------------------------]]
159
160------------------------------------------------------------------------
161-- limit check, syntax error if fails (also called by parser)
162------------------------------------------------------------------------
163function luaX:checklimit(ls, val, limit, msg)
164 if val > limit then
165 msg = string.format("too many %s (limit=%d)", msg, limit)
166 self:syntaxerror(ls, msg)
167 end
168end
169
170------------------------------------------------------------------------
171-- formats error message and throws error (also called by parser)
172------------------------------------------------------------------------
173function luaX:errorline(ls, s, token, line)
174 local buff = self:chunkid(ls.source, self.MAXSRC)
175 error(string.format("%s:%d: %s near `%s'", buff, line, s, token))
176end
177
178------------------------------------------------------------------------
179-- throws an error, adds line number
180------------------------------------------------------------------------
181function luaX:error(ls, s, token)
182 self:errorline(ls, s, token, ls.linenumber)
183end
184
185------------------------------------------------------------------------
186-- throws a syntax error (mainly called by parser)
187-- * ls.t.token has to be set by the function calling luaX:lex
188-- (see next() and lookahead() in lparser.c)
189------------------------------------------------------------------------
190function luaX:syntaxerror(ls, msg)
191 local lasttoken
192 local tok = ls.t.token
193 if tok == "TK_NAME" then
194 lasttoken = ls.t.seminfo
195 elseif tok == "TK_STRING" or tok == "TK_NUMBER" then
196 lasttoken = ls.buff
197 else
198 lasttoken = self:token2str(ls.t.token)
199 end
200 self:error(ls, msg, lasttoken)
201end
202
203------------------------------------------------------------------------
204-- look up token and return keyword if found (also called by parser)
205------------------------------------------------------------------------
206function luaX:token2str(ls, token)
207 if string.sub(token, 1, 3) ~= "TK_" then
208 return token
209 else
210 --lua_assert(string.len(token) == 1)
211 return self.token2string[token]
212 end
213end
214
215------------------------------------------------------------------------
216-- throws a lexer error
217------------------------------------------------------------------------
218function luaX:lexerror(ls, s, token)
219 if token == "TK_EOS" then
220 self:error(ls, s, self:token2str(ls, token))
221 else
222 self:error(ls, s, ls.buff)
223 end
224end
225
226------------------------------------------------------------------------
227-- move on to next line
228------------------------------------------------------------------------
229function luaX:inclinenumber(LS)
230 self:next(LS) -- skip '\n'
231 LS.linenumber = LS.linenumber + 1
232 self:checklimit(LS, LS.linenumber, self.MAX_INT, "lines in a chunk")
233end
234
235luaX.MAX_INT = 2147483645 -- INT_MAX-2 for 32-bit systems (llimits.h)
236
237------------------------------------------------------------------------
238-- initializes an input stream for lexing
239-- * if LS (the lexer state) is passed as a table, then it is filled in,
240-- otherwise it has to be retrieved as a return value
241------------------------------------------------------------------------
242function luaX:setinput(L, LS, z, source)
243 if not LS then LS = {} end -- create struct
244 if not LS.lookahead then LS.lookahead = {} end
245 if not LS.t then LS.t = {} end
246 LS.L = L
247 LS.lookahead.token = "TK_EOS" -- no look-ahead token
248 LS.z = z
249 LS.fs = nil
250 LS.linenumber = 1
251 LS.lastline = 1
252 LS.source = source
253 self:next(LS) -- read first char
254 if LS.current == "#" then
255 repeat -- skip first line
256 self:next(LS)
257 until LS.current == "\n" or LS.current == "EOZ"
258 end
259 return LS
260end
261
262--[[--------------------------------------------------------------------
263-- LEXICAL ANALYZER
264----------------------------------------------------------------------]]
265
266-- NOTE the following buffer handling stuff are no longer required:
267-- use buffer to store names, literal strings and numbers
268-- EXTRABUFF deleted; extra space to allocate when growing buffer
269-- MAXNOCHECK deleted; maximum number of chars that can be read without checking buffer size
270-- checkbuffer(LS, len) deleted
271
272------------------------------------------------------------------------
273-- gets the next character and returns it
274------------------------------------------------------------------------
275function luaX:next(LS)
276 local c = luaZ:zgetc(LS.z)
277 LS.current = c
278 return c
279end
280
281------------------------------------------------------------------------
282-- saves the given character into the token buffer
283------------------------------------------------------------------------
284function luaX:save(LS, c)
285 LS.buff = LS.buff..c
286end
287
288------------------------------------------------------------------------
289-- save current character into token buffer, grabs next character
290------------------------------------------------------------------------
291function luaX:save_and_next(LS)
292 self:save(LS, LS.current)
293 return self:next(LS)
294end
295
296------------------------------------------------------------------------
297-- reads a name
298-- * originally returns the string length
299------------------------------------------------------------------------
300function luaX:readname(LS)
301 LS.buff = ""
302 repeat
303 self:save_and_next(LS)
304 until LS.current == "EOZ" or not string.find(LS.current, "[_%w]")
305 return LS.buff
306end
307
308------------------------------------------------------------------------
309-- reads a number (LUA_NUMBER)
310------------------------------------------------------------------------
311function luaX:read_numeral(LS, comma, Token)
312 LS.buff = ""
313 if comma then self:save(LS, '.') end
314 while string.find(LS.current, "%d") do
315 self:save_and_next(LS)
316 end
317 if LS.current == "." then
318 self:save_and_next(LS)
319 if LS.current == "." then
320 self:save_and_next(LS)
321 self:lexerror(LS,
322 "ambiguous syntax (decimal point x string concatenation)",
323 "TK_NUMBER")
324 end
325 end
326 while string.find(LS.current, "%d") do
327 self:save_and_next(LS)
328 end
329 if LS.current == "e" or LS.current == "E" then
330 self:save_and_next(LS) -- read 'E'
331 if LS.current == "+" or LS.current == "-" then
332 self:save_and_next(LS) -- optional exponent sign
333 end
334 while string.find(LS.current, "%d") do
335 self:save_and_next(LS)
336 end
337 end
338 local seminfo = tonumber(LS.buff)
339 if not seminfo then
340 self:lexerror(LS, "malformed number", "TK_NUMBER")
341 end
342 Token.seminfo = seminfo
343end
344
345------------------------------------------------------------------------
346-- reads a long string or long comment
347------------------------------------------------------------------------
348function luaX:read_long_string(LS, Token)
349 local cont = 0
350 LS.buff = ""
351 self:save(LS, "[") -- save first '['
352 self:save_and_next(LS) -- pass the second '['
353 if LS.current == "\n" then -- string starts with a newline?
354 self:inclinenumber(LS) -- skip it
355 end
356 while true do
357 local c = LS.current
358 if c == "EOZ" then
359 self:lexerror(LS, Token and "unfinished long string" or
360 "unfinished long comment", "TK_EOS")
361 elseif c == "[" then
362 self:save_and_next(LS)
363 if LS.current == "[" then
364 cont = cont + 1
365 self:save_and_next(LS)
366 end
367 elseif c == "]" then
368 self:save_and_next(LS)
369 if LS.current == "]" then
370 if cont == 0 then break end
371 cont = cont - 1
372 self:save_and_next(LS)
373 end
374 elseif c == "\n" then
375 self:save(LS, "\n")
376 self:inclinenumber(LS)
377 if not Token then LS.buff = "" end -- reset buffer to avoid wasting space
378 else
379 self:save_and_next(LS)
380 end--if c
381 end--while
382 self:save_and_next(LS) -- skip the second ']'
383 if Token then
384 Token.seminfo = string.sub(LS.buff, 3, -3)
385 end
386end
387
388------------------------------------------------------------------------
389-- reads a string
390------------------------------------------------------------------------
391function luaX:read_string(LS, del, Token)
392 LS.buff = ""
393 self:save_and_next(LS)
394 while LS.current ~= del do
395 local c = LS.current
396 if c == "EOZ" then
397 self:lexerror(LS, "unfinished string", "TK_EOS")
398 elseif c == "\n" then
399 self:lexerror(LS, "unfinished string", "TK_STRING")
400 elseif c == "\\" then
401 c = self:next(LS) -- do not save the '\'
402 if c ~= "EOZ" then -- will raise an error next loop
403 -- escapes handling greatly simplified here:
404 local i = string.find("abfnrtv\n", c, 1, 1)
405 if i then
406 self:save(LS, string.sub("\a\b\f\n\r\t\v\n", i, i))
407 if i == 8 then self:inclinenumber(LS) else self:next(LS) end
408 elseif not string.find(c, "%d") then
409 self:save_and_next(LS) -- handles \\, \", \', and \?
410 else -- \xxx
411 c, i = 0, 0
412 repeat
413 c = 10 * c + LS.current
414 self:next(LS)
415 i = i + 1
416 until i >= 3 or not string.find(LS.current, "%d")
417 if c > 255 then -- UCHAR_MAX
418 self:lexerror(LS, "escape sequence too large", "TK_STRING")
419 end
420 self:save(LS, string.char(c))
421 end
422 end
423 else
424 self:save_and_next(LS)
425 end--if c
426 end--while
427 self:save_and_next(LS) -- skip delimiter
428 Token.seminfo = string.sub(LS.buff, 2, -2)
429end
430
431------------------------------------------------------------------------
432-- main lexer function
433------------------------------------------------------------------------
434function luaX:lex(LS, Token)
435 while true do
436 local c = LS.current
437 ----------------------------------------------------------------
438 if c == "\n" then
439 self:inclinenumber(LS)
440 ----------------------------------------------------------------
441 elseif c == "-" then
442 c = self:next(LS)
443 if c ~= "-" then return "-" end
444 -- else is a comment
445 c = self:next(LS)
446 if c == "[" and self:next(LS) == "[" then
447 self:read_long_string(LS) -- long comment
448 else -- short comment
449 c = LS.current
450 while c ~= "\n" and c ~= "EOZ" do
451 c = self:next(LS)
452 end
453 end
454 ----------------------------------------------------------------
455 elseif c == "[" then
456 c = self:next(LS)
457 if c ~= "[" then return "["
458 else
459 self:read_long_string(LS, Token)
460 return "TK_STRING"
461 end
462 ----------------------------------------------------------------
463 elseif c == "=" then
464 c = self:next(LS)
465 if c ~= "=" then return "="
466 else self:next(LS); return "TK_EQ" end
467 ----------------------------------------------------------------
468 elseif c == "<" then
469 c = self:next(LS)
470 if c ~= "=" then return "<"
471 else self:next(LS); return "TK_LE" end
472 ----------------------------------------------------------------
473 elseif c == ">" then
474 c = self:next(LS)
475 if c ~= "=" then return ">"
476 else self:next(LS); return "TK_GE" end
477 ----------------------------------------------------------------
478 elseif c == "~" then
479 c = self:next(LS)
480 if c ~= "=" then return "~"
481 else self:next(LS); return "TK_NE" end
482 ----------------------------------------------------------------
483 elseif c == "\"" or c == "'" then
484 self:read_string(LS, c, Token)
485 return "TK_STRING"
486 ----------------------------------------------------------------
487 elseif c == "." then
488 c = self:next(LS)
489 if c == "." then
490 c = self:next(LS)
491 if c == "." then
492 self:next(LS)
493 return "TK_DOTS" -- ...
494 else
495 return "TK_CONCAT" -- ..
496 end
497 elseif not string.find(c, "%d") then
498 return '.'
499 else
500 self:read_numeral(LS, true, Token)
501 return "TK_NUMBER"
502 end
503 ----------------------------------------------------------------
504 elseif c == "EOZ" then
505 return "TK_EOS"
506 ----------------------------------------------------------------
507 else -- default
508 if string.find(c, "%s") then
509 self:next(LS)
510 elseif string.find(c, "%d") then
511 self:read_numeral(LS, false, Token)
512 return "TK_NUMBER"
513 elseif string.find(c, "[_%a]") then
514 -- identifier or reserved word
515 local l = self:readname(LS)
516 local tok = self.string2token[l]
517 if tok then return tok end -- reserved word?
518 Token.seminfo = l
519 return "TK_NAME"
520 else
521 if string.find(c, "%c") then
522 self:error(LS, "invalid control char",
523 string.format("char(%d)", string.byte(c)))
524 end
525 self:next(LS)
526 return c -- single-char tokens (+ - / ...)
527 end
528 ----------------------------------------------------------------
529 end--if c
530 end--while
531end