From 2d1df4714e2736dbde7855ddcd76b4c1de822fa5 Mon Sep 17 00:00:00 2001 From: David Walter Seikel Date: Mon, 23 Jan 2012 21:58:02 +1000 Subject: Added a big bunch of example lua scripts for testing the speed of lua compiling. --- LuaSL/testLua/yueliang-0.4.1/orig-5.1.3/llex.lua | 686 +++++++++++++++++++++++ 1 file changed, 686 insertions(+) create mode 100644 LuaSL/testLua/yueliang-0.4.1/orig-5.1.3/llex.lua (limited to 'LuaSL/testLua/yueliang-0.4.1/orig-5.1.3/llex.lua') diff --git a/LuaSL/testLua/yueliang-0.4.1/orig-5.1.3/llex.lua b/LuaSL/testLua/yueliang-0.4.1/orig-5.1.3/llex.lua new file mode 100644 index 0000000..7949326 --- /dev/null +++ b/LuaSL/testLua/yueliang-0.4.1/orig-5.1.3/llex.lua @@ -0,0 +1,686 @@ +--[[-------------------------------------------------------------------- + + llex.lua + Lua lexical analyzer in Lua + This file is part of Yueliang. + + Copyright (c) 2005-2006 Kein-Hong Man + The COPYRIGHT file describes the conditions + under which this software may be distributed. + + See the ChangeLog for more information. + +----------------------------------------------------------------------]] + +--[[-------------------------------------------------------------------- +-- Notes: +-- * intended to 'imitate' llex.c code; performance is not a concern +-- * tokens are strings; code structure largely retained +-- * deleted stuff (compared to llex.c) are noted, comments retained +-- * nextc() returns the currently read character to simplify coding +-- here; next() in llex.c does not return anything +-- * compatibility code is marked with "--#" comments +-- +-- Added: +-- * luaX:chunkid (function luaO_chunkid from lobject.c) +-- * luaX:str2d (function luaO_str2d from lobject.c) +-- * luaX.LUA_QS used in luaX:lexerror (from luaconf.h) +-- * luaX.LUA_COMPAT_LSTR in luaX:read_long_string (from luaconf.h) +-- * luaX.MAX_INT used in luaX:inclinenumber (from llimits.h) +-- +-- To use the lexer: +-- (1) luaX:init() to initialize the lexer +-- (2) luaX:setinput() to set the input stream to lex +-- (3) call luaX:next() or luaX:luaX:lookahead() to get tokens, +-- until "TK_EOS": luaX:next() +-- * since EOZ is returned as a string, be careful when regexp testing +-- +-- Not implemented: +-- * luaX_newstring: not required by this Lua implementation +-- * buffer MAX_SIZET size limit (from llimits.h) test not implemented +-- in the interest of performance +-- * locale-aware number handling is largely redundant as Lua's +-- tonumber() function is already capable of this +-- +-- Changed in 5.1.x: +-- * TK_NAME token order moved down +-- * string representation for TK_NAME, TK_NUMBER, TK_STRING changed +-- * token struct renamed to lower case (LS -> ls) +-- * LexState struct: removed nestlevel, added decpoint +-- * error message functions have been greatly simplified +-- * token2string renamed to luaX_tokens, exposed in llex.h +-- * lexer now handles all kinds of newlines, including CRLF +-- * shbang first line handling removed from luaX:setinput; +-- it is now done in lauxlib.c (luaL_loadfile) +-- * next(ls) macro renamed to nextc(ls) due to new luaX_next function +-- * EXTRABUFF and MAXNOCHECK removed due to lexer changes +-- * checkbuffer(ls, len) macro deleted +-- * luaX:read_numeral now has 3 support functions: luaX:trydecpoint, +-- luaX:buffreplace and (luaO_str2d from lobject.c) luaX:str2d +-- * luaX:read_numeral is now more promiscuous in slurping characters; +-- hexadecimal numbers was added, locale-aware decimal points too +-- * luaX:skip_sep is new; used by luaX:read_long_string +-- * luaX:read_long_string handles new-style long blocks, with some +-- optional compatibility code +-- * luaX:llex: parts changed to support new-style long blocks +-- * luaX:llex: readname functionality has been folded in +-- * luaX:llex: removed test for control characters +----------------------------------------------------------------------]] + +luaX = {} + +-- FIRST_RESERVED is not required as tokens are manipulated as strings +-- TOKEN_LEN deleted; maximum length of a reserved word not needed + +------------------------------------------------------------------------ +-- "ORDER RESERVED" deleted; enumeration in one place: luaX.RESERVED +------------------------------------------------------------------------ + +-- terminal symbols denoted by reserved words: TK_AND to TK_WHILE +-- other terminal symbols: TK_NAME to TK_EOS +luaX.RESERVED = [[ +TK_AND and +TK_BREAK break +TK_DO do +TK_ELSE else +TK_ELSEIF elseif +TK_END end +TK_FALSE false +TK_FOR for +TK_FUNCTION function +TK_IF if +TK_IN in +TK_LOCAL local +TK_NIL nil +TK_NOT not +TK_OR or +TK_REPEAT repeat +TK_RETURN return +TK_THEN then +TK_TRUE true +TK_UNTIL until +TK_WHILE while +TK_CONCAT .. +TK_DOTS ... +TK_EQ == +TK_GE >= +TK_LE <= +TK_NE ~= +TK_NAME +TK_NUMBER +TK_STRING +TK_EOS ]] + +-- NUM_RESERVED is not required; number of reserved words + +--[[-------------------------------------------------------------------- +-- Instead of passing seminfo, the Token struct (e.g. ls.t) is passed +-- so that lexer functions can use its table element, ls.t.seminfo +-- +-- SemInfo (struct no longer needed, a mixed-type value is used) +-- +-- Token (struct of ls.t and ls.lookahead): +-- token -- token symbol +-- seminfo -- semantics information +-- +-- LexState (struct of ls; ls is initialized by luaX:setinput): +-- current -- current character (charint) +-- linenumber -- input line counter +-- lastline -- line of last token 'consumed' +-- t -- current token (table: struct Token) +-- lookahead -- look ahead token (table: struct Token) +-- fs -- 'FuncState' is private to the parser +-- L -- LuaState +-- z -- input stream +-- buff -- buffer for tokens +-- source -- current source name +-- decpoint -- locale decimal point +-- nestlevel -- level of nested non-terminals +----------------------------------------------------------------------]] + +-- luaX.tokens (was luaX_tokens) is now a hash; see luaX:init + +luaX.MAXSRC = 80 +luaX.MAX_INT = 2147483645 -- constants from elsewhere (see above) +luaX.LUA_QS = "'%s'" +luaX.LUA_COMPAT_LSTR = 1 +--luaX.MAX_SIZET = 4294967293 + +------------------------------------------------------------------------ +-- initialize lexer +-- * original luaX_init has code to create and register token strings +-- * luaX.tokens: TK_* -> token +-- * luaX.enums: token -> TK_* (used in luaX:llex) +------------------------------------------------------------------------ +function luaX:init() + local tokens, enums = {}, {} + for v in string.gmatch(self.RESERVED, "[^\n]+") do + local _, _, tok, str = string.find(v, "(%S+)%s+(%S+)") + tokens[tok] = str + enums[str] = tok + end + self.tokens = tokens + self.enums = enums +end + +------------------------------------------------------------------------ +-- returns a suitably-formatted chunk name or id +-- * from lobject.c, used in llex.c and ldebug.c +-- * the result, out, is returned (was first argument) +------------------------------------------------------------------------ +function luaX:chunkid(source, bufflen) + local out + local first = string.sub(source, 1, 1) + if first == "=" then + out = string.sub(source, 2, bufflen) -- remove first char + else -- out = "source", or "...source" + if first == "@" then + source = string.sub(source, 2) -- skip the '@' + bufflen = bufflen - #" '...' " + local l = #source + out = "" + if l > bufflen then + source = string.sub(source, 1 + l - bufflen) -- get last part of file name + out = out.."..." + end + out = out..source + else -- out = [string "string"] + local len = string.find(source, "[\n\r]") -- stop at first newline + len = len and (len - 1) or #source + bufflen = bufflen - #(" [string \"...\"] ") + if len > bufflen then len = bufflen end + out = "[string \"" + if len < #source then -- must truncate? + out = out..string.sub(source, 1, len).."..." + else + out = out..source + end + out = out.."\"]" + end + end + return out +end + +--[[-------------------------------------------------------------------- +-- Support functions for lexer +-- * all lexer errors eventually reaches lexerror: + syntaxerror -> lexerror +----------------------------------------------------------------------]] + +------------------------------------------------------------------------ +-- look up token and return keyword if found (also called by parser) +------------------------------------------------------------------------ +function luaX:token2str(ls, token) + if string.sub(token, 1, 3) ~= "TK_" then + if string.find(token, "%c") then + return string.format("char(%d)", string.byte(token)) + end + return token + else + return self.tokens[token] + end +end + +------------------------------------------------------------------------ +-- throws a lexer error +-- * txtToken has been made local to luaX:lexerror +-- * can't communicate LUA_ERRSYNTAX, so it is unimplemented +------------------------------------------------------------------------ +function luaX:lexerror(ls, msg, token) + local function txtToken(ls, token) + if token == "TK_NAME" or + token == "TK_STRING" or + token == "TK_NUMBER" then + return ls.buff + else + return self:token2str(ls, token) + end + end + local buff = self:chunkid(ls.source, self.MAXSRC) + local msg = string.format("%s:%d: %s", buff, ls.linenumber, msg) + if token then + msg = string.format("%s near "..self.LUA_QS, msg, txtToken(ls, token)) + end + -- luaD_throw(ls->L, LUA_ERRSYNTAX) + error(msg) +end + +------------------------------------------------------------------------ +-- throws a syntax error (mainly called by parser) +-- * ls.t.token has to be set by the function calling luaX:llex +-- (see luaX:next and luaX:lookahead elsewhere in this file) +------------------------------------------------------------------------ +function luaX:syntaxerror(ls, msg) + self:lexerror(ls, msg, ls.t.token) +end + +------------------------------------------------------------------------ +-- move on to next line +------------------------------------------------------------------------ +function luaX:currIsNewline(ls) + return ls.current == "\n" or ls.current == "\r" +end + +function luaX:inclinenumber(ls) + local old = ls.current + -- lua_assert(currIsNewline(ls)) + self:nextc(ls) -- skip '\n' or '\r' + if self:currIsNewline(ls) and ls.current ~= old then + self:nextc(ls) -- skip '\n\r' or '\r\n' + end + ls.linenumber = ls.linenumber + 1 + if ls.linenumber >= self.MAX_INT then + self:syntaxerror(ls, "chunk has too many lines") + end +end + +------------------------------------------------------------------------ +-- initializes an input stream for lexing +-- * if ls (the lexer state) is passed as a table, then it is filled in, +-- otherwise it has to be retrieved as a return value +-- * LUA_MINBUFFER not used; buffer handling not required any more +------------------------------------------------------------------------ +function luaX:setinput(L, ls, z, source) + if not ls then ls = {} end -- create struct + if not ls.lookahead then ls.lookahead = {} end + if not ls.t then ls.t = {} end + ls.decpoint = "." + ls.L = L + ls.lookahead.token = "TK_EOS" -- no look-ahead token + ls.z = z + ls.fs = nil + ls.linenumber = 1 + ls.lastline = 1 + ls.source = source + self:nextc(ls) -- read first char +end + +--[[-------------------------------------------------------------------- +-- LEXICAL ANALYZER +----------------------------------------------------------------------]] + +------------------------------------------------------------------------ +-- checks if current character read is found in the set 'set' +------------------------------------------------------------------------ +function luaX:check_next(ls, set) + if not string.find(set, ls.current, 1, 1) then + return false + end + self:save_and_next(ls) + return true +end + +------------------------------------------------------------------------ +-- retrieve next token, checking the lookahead buffer if necessary +-- * note that the macro next(ls) in llex.c is now luaX:nextc +-- * utilized used in lparser.c (various places) +------------------------------------------------------------------------ +function luaX:next(ls) + ls.lastline = ls.linenumber + if ls.lookahead.token ~= "TK_EOS" then -- is there a look-ahead token? + -- this must be copy-by-value + ls.t.seminfo = ls.lookahead.seminfo -- use this one + ls.t.token = ls.lookahead.token + ls.lookahead.token = "TK_EOS" -- and discharge it + else + ls.t.token = self:llex(ls, ls.t) -- read next token + end +end + +------------------------------------------------------------------------ +-- fill in the lookahead buffer +-- * utilized used in lparser.c:constructor +------------------------------------------------------------------------ +function luaX:lookahead(ls) + -- lua_assert(ls.lookahead.token == "TK_EOS") + ls.lookahead.token = self:llex(ls, ls.lookahead) +end + +------------------------------------------------------------------------ +-- gets the next character and returns it +-- * this is the next() macro in llex.c; see notes at the beginning +------------------------------------------------------------------------ +function luaX:nextc(ls) + local c = luaZ:zgetc(ls.z) + ls.current = c + return c +end + +------------------------------------------------------------------------ +-- saves the given character into the token buffer +-- * buffer handling code removed, not used in this implementation +-- * test for maximum token buffer length not used, makes things faster +------------------------------------------------------------------------ + +function luaX:save(ls, c) + local buff = ls.buff + -- if you want to use this, please uncomment luaX.MAX_SIZET further up + --if #buff > self.MAX_SIZET then + -- self:lexerror(ls, "lexical element too long") + --end + ls.buff = buff..c +end + +------------------------------------------------------------------------ +-- save current character into token buffer, grabs next character +-- * like luaX:nextc, returns the character read for convenience +------------------------------------------------------------------------ +function luaX:save_and_next(ls) + self:save(ls, ls.current) + return self:nextc(ls) +end + +------------------------------------------------------------------------ +-- LUA_NUMBER +-- * luaX:read_numeral is the main lexer function to read a number +-- * luaX:str2d, luaX:buffreplace, luaX:trydecpoint are support functions +------------------------------------------------------------------------ + +------------------------------------------------------------------------ +-- string to number converter (was luaO_str2d from lobject.c) +-- * returns the number, nil if fails (originally returns a boolean) +-- * conversion function originally lua_str2number(s,p), a macro which +-- maps to the strtod() function by default (from luaconf.h) +------------------------------------------------------------------------ +function luaX:str2d(s) + local result = tonumber(s) + if result then return result end + -- conversion failed + if string.lower(string.sub(s, 1, 2)) == "0x" then -- maybe an hexadecimal constant? + result = tonumber(s, 16) + if result then return result end -- most common case + -- Was: invalid trailing characters? + -- In C, this function then skips over trailing spaces. + -- true is returned if nothing else is found except for spaces. + -- If there is still something else, then it returns a false. + -- All this is not necessary using Lua's tonumber. + end + return nil +end + +------------------------------------------------------------------------ +-- single-character replacement, for locale-aware decimal points +------------------------------------------------------------------------ +function luaX:buffreplace(ls, from, to) + local result, buff = "", ls.buff + for p = 1, #buff do + local c = string.sub(buff, p, p) + if c == from then c = to end + result = result..c + end + ls.buff = result +end + +------------------------------------------------------------------------ +-- Attempt to convert a number by translating '.' decimal points to +-- the decimal point character used by the current locale. This is not +-- needed in Yueliang as Lua's tonumber() is already locale-aware. +-- Instead, the code is here in case the user implements localeconv(). +------------------------------------------------------------------------ +function luaX:trydecpoint(ls, Token) + -- format error: try to update decimal point separator + local old = ls.decpoint + -- translate the following to Lua if you implement localeconv(): + -- struct lconv *cv = localeconv(); + -- ls->decpoint = (cv ? cv->decimal_point[0] : '.'); + self:buffreplace(ls, old, ls.decpoint) -- try updated decimal separator + local seminfo = self:str2d(ls.buff) + Token.seminfo = seminfo + if not seminfo then + -- format error with correct decimal point: no more options + self:buffreplace(ls, ls.decpoint, ".") -- undo change (for error message) + self:lexerror(ls, "malformed number", "TK_NUMBER") + end +end + +------------------------------------------------------------------------ +-- main number conversion function +-- * "^%w$" needed in the scan in order to detect "EOZ" +------------------------------------------------------------------------ +function luaX:read_numeral(ls, Token) + -- lua_assert(string.find(ls.current, "%d")) + repeat + self:save_and_next(ls) + until string.find(ls.current, "%D") and ls.current ~= "." + if self:check_next(ls, "Ee") then -- 'E'? + self:check_next(ls, "+-") -- optional exponent sign + end + while string.find(ls.current, "^%w$") or ls.current == "_" do + self:save_and_next(ls) + end + self:buffreplace(ls, ".", ls.decpoint) -- follow locale for decimal point + local seminfo = self:str2d(ls.buff) + Token.seminfo = seminfo + if not seminfo then -- format error? + self:trydecpoint(ls, Token) -- try to update decimal point separator + end +end + +------------------------------------------------------------------------ +-- count separators ("=") in a long string delimiter +-- * used by luaX:read_long_string +------------------------------------------------------------------------ +function luaX:skip_sep(ls) + local count = 0 + local s = ls.current + -- lua_assert(s == "[" or s == "]") + self:save_and_next(ls) + while ls.current == "=" do + self:save_and_next(ls) + count = count + 1 + end + return (ls.current == s) and count or (-count) - 1 +end + +------------------------------------------------------------------------ +-- reads a long string or long comment +------------------------------------------------------------------------ +function luaX:read_long_string(ls, Token, sep) + local cont = 0 + self:save_and_next(ls) -- skip 2nd '[' + if self:currIsNewline(ls) then -- string starts with a newline? + self:inclinenumber(ls) -- skip it + end + while true do + local c = ls.current + if c == "EOZ" then + self:lexerror(ls, Token and "unfinished long string" or + "unfinished long comment", "TK_EOS") + elseif c == "[" then + --# compatibility code start + if self.LUA_COMPAT_LSTR then + if self:skip_sep(ls) == sep then + self:save_and_next(ls) -- skip 2nd '[' + cont = cont + 1 + --# compatibility code start + if self.LUA_COMPAT_LSTR == 1 then + if sep == 0 then + self:lexerror(ls, "nesting of [[...]] is deprecated", "[") + end + end + --# compatibility code end + end + end + --# compatibility code end + elseif c == "]" then + if self:skip_sep(ls) == sep then + self:save_and_next(ls) -- skip 2nd ']' + --# compatibility code start + if self.LUA_COMPAT_LSTR and self.LUA_COMPAT_LSTR == 2 then + cont = cont - 1 + if sep == 0 and cont >= 0 then break end + end + --# compatibility code end + break + end + elseif self:currIsNewline(ls) then + self:save(ls, "\n") + self:inclinenumber(ls) + if not Token then ls.buff = "" end -- avoid wasting space + else -- default + if Token then + self:save_and_next(ls) + else + self:nextc(ls) + end + end--if c + end--while + if Token then + local p = 3 + sep + Token.seminfo = string.sub(ls.buff, p, -p) + end +end + +------------------------------------------------------------------------ +-- reads a string +-- * has been restructured significantly compared to the original C code +------------------------------------------------------------------------ + +function luaX:read_string(ls, del, Token) + self:save_and_next(ls) + while ls.current ~= del do + local c = ls.current + if c == "EOZ" then + self:lexerror(ls, "unfinished string", "TK_EOS") + elseif self:currIsNewline(ls) then + self:lexerror(ls, "unfinished string", "TK_STRING") + elseif c == "\\" then + c = self:nextc(ls) -- do not save the '\' + if self:currIsNewline(ls) then -- go through + self:save(ls, "\n") + self:inclinenumber(ls) + elseif c ~= "EOZ" then -- will raise an error next loop + -- escapes handling greatly simplified here: + local i = string.find("abfnrtv", c, 1, 1) + if i then + self:save(ls, string.sub("\a\b\f\n\r\t\v", i, i)) + self:nextc(ls) + elseif not string.find(c, "%d") then + self:save_and_next(ls) -- handles \\, \", \', and \? + else -- \xxx + c, i = 0, 0 + repeat + c = 10 * c + ls.current + self:nextc(ls) + i = i + 1 + until i >= 3 or not string.find(ls.current, "%d") + if c > 255 then -- UCHAR_MAX + self:lexerror(ls, "escape sequence too large", "TK_STRING") + end + self:save(ls, string.char(c)) + end + end + else + self:save_and_next(ls) + end--if c + end--while + self:save_and_next(ls) -- skip delimiter + Token.seminfo = string.sub(ls.buff, 2, -2) +end + +------------------------------------------------------------------------ +-- main lexer function +------------------------------------------------------------------------ +function luaX:llex(ls, Token) + ls.buff = "" + while true do + local c = ls.current + ---------------------------------------------------------------- + if self:currIsNewline(ls) then + self:inclinenumber(ls) + ---------------------------------------------------------------- + elseif c == "-" then + c = self:nextc(ls) + if c ~= "-" then return "-" end + -- else is a comment + local sep = -1 + if self:nextc(ls) == '[' then + sep = self:skip_sep(ls) + ls.buff = "" -- 'skip_sep' may dirty the buffer + end + if sep >= 0 then + self:read_long_string(ls, nil, sep) -- long comment + ls.buff = "" + else -- else short comment + while not self:currIsNewline(ls) and ls.current ~= "EOZ" do + self:nextc(ls) + end + end + ---------------------------------------------------------------- + elseif c == "[" then + local sep = self:skip_sep(ls) + if sep >= 0 then + self:read_long_string(ls, Token, sep) + return "TK_STRING" + elseif sep == -1 then + return "[" + else + self:lexerror(ls, "invalid long string delimiter", "TK_STRING") + end + ---------------------------------------------------------------- + elseif c == "=" then + c = self:nextc(ls) + if c ~= "=" then return "=" + else self:nextc(ls); return "TK_EQ" end + ---------------------------------------------------------------- + elseif c == "<" then + c = self:nextc(ls) + if c ~= "=" then return "<" + else self:nextc(ls); return "TK_LE" end + ---------------------------------------------------------------- + elseif c == ">" then + c = self:nextc(ls) + if c ~= "=" then return ">" + else self:nextc(ls); return "TK_GE" end + ---------------------------------------------------------------- + elseif c == "~" then + c = self:nextc(ls) + if c ~= "=" then return "~" + else self:nextc(ls); return "TK_NE" end + ---------------------------------------------------------------- + elseif c == "\"" or c == "'" then + self:read_string(ls, c, Token) + return "TK_STRING" + ---------------------------------------------------------------- + elseif c == "." then + c = self:save_and_next(ls) + if self:check_next(ls, ".") then + if self:check_next(ls, ".") then + return "TK_DOTS" -- ... + else return "TK_CONCAT" -- .. + end + elseif not string.find(c, "%d") then + return "." + else + self:read_numeral(ls, Token) + return "TK_NUMBER" + end + ---------------------------------------------------------------- + elseif c == "EOZ" then + return "TK_EOS" + ---------------------------------------------------------------- + else -- default + if string.find(c, "%s") then + -- lua_assert(self:currIsNewline(ls)) + self:nextc(ls) + elseif string.find(c, "%d") then + self:read_numeral(ls, Token) + return "TK_NUMBER" + elseif string.find(c, "[_%a]") then + -- identifier or reserved word + repeat + c = self:save_and_next(ls) + until c == "EOZ" or not string.find(c, "[_%w]") + local ts = ls.buff + local tok = self.enums[ts] + if tok then return tok end -- reserved word? + Token.seminfo = ts + return "TK_NAME" + else + self:nextc(ls) + return c -- single-char tokens (+ - / ...) + end + ---------------------------------------------------------------- + end--if c + end--while +end -- cgit v1.1