diff options
Diffstat (limited to '')
-rw-r--r-- | LuaSL/testLua/yueliang-0.4.1/nat-5.1.3/llex_mk2.lua | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/LuaSL/testLua/yueliang-0.4.1/nat-5.1.3/llex_mk2.lua b/LuaSL/testLua/yueliang-0.4.1/nat-5.1.3/llex_mk2.lua new file mode 100644 index 0000000..dae57f1 --- /dev/null +++ b/LuaSL/testLua/yueliang-0.4.1/nat-5.1.3/llex_mk2.lua | |||
@@ -0,0 +1,314 @@ | |||
1 | --[[-------------------------------------------------------------------- | ||
2 | |||
3 | llex.lua | ||
4 | Lua 5.1 lexical analyzer in Lua | ||
5 | This file is part of Yueliang. | ||
6 | |||
7 | Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> | ||
8 | The COPYRIGHT file describes the conditions | ||
9 | under which this software may be distributed. | ||
10 | |||
11 | See the ChangeLog for more information. | ||
12 | |||
13 | ----------------------------------------------------------------------]] | ||
14 | |||
15 | --[[-------------------------------------------------------------------- | ||
16 | -- Notes: | ||
17 | -- * takes in the entire source at once | ||
18 | -- * greatly simplified chunkid, error handling | ||
19 | -- * NO shbang handling (it's done elsewhere in Lua 5.1) | ||
20 | -- * NO localized decimal point replacement magic | ||
21 | -- * NO limit to number of lines (MAX_INT = 2147483645) | ||
22 | -- * NO support for compatible long strings (LUA_COMPAT_LSTR) | ||
23 | -- * NO next(), lookahead() because I want next() to set tok and | ||
24 | -- seminfo that are locals, and that can only be done easily in | ||
25 | -- lparser, not llex. lastline would be handled in lparser too. | ||
26 | -- | ||
27 | -- Usage example: | ||
28 | -- local llex = require("llex_mk2") | ||
29 | -- llex.init(source_code, source_code_name) | ||
30 | -- repeat | ||
31 | -- local token, seminfo = llex.llex() | ||
32 | -- until token == "<eof>" | ||
33 | -- | ||
34 | ----------------------------------------------------------------------]] | ||
35 | |||
36 | local base = _G | ||
37 | local string = require "string" | ||
38 | module "llex" | ||
39 | |||
40 | ---------------------------------------------------------------------- | ||
41 | -- initialize keyword list | ||
42 | ---------------------------------------------------------------------- | ||
43 | local kw = {} | ||
44 | for v in string.gmatch([[ | ||
45 | and break do else elseif end false for function if in | ||
46 | local nil not or repeat return then true until while]], "%S+") do | ||
47 | kw[v] = true | ||
48 | end | ||
49 | |||
50 | ---------------------------------------------------------------------- | ||
51 | -- initialize lexer for given source _z and source name _sourceid | ||
52 | ---------------------------------------------------------------------- | ||
53 | local z, sourceid, I | ||
54 | local find = string.find | ||
55 | local match = string.match | ||
56 | local sub = string.sub | ||
57 | |||
58 | function init(_z, _sourceid) | ||
59 | z = _z -- source | ||
60 | sourceid = _sourceid -- name of source | ||
61 | I = 1 -- lexer's position in source | ||
62 | ln = 1 -- line number | ||
63 | end | ||
64 | |||
65 | ---------------------------------------------------------------------- | ||
66 | -- returns a chunk name or id, no truncation for long names | ||
67 | ---------------------------------------------------------------------- | ||
68 | function chunkid() | ||
69 | if sourceid and match(sourceid, "^[=@]") then | ||
70 | return sub(sourceid, 2) -- remove first char | ||
71 | end | ||
72 | return "[string]" | ||
73 | end | ||
74 | |||
75 | ---------------------------------------------------------------------- | ||
76 | -- formats error message and throws error | ||
77 | -- * a simplified version, does not report what token was responsible | ||
78 | ---------------------------------------------------------------------- | ||
79 | function errorline(s, line) | ||
80 | base.error(string.format("%s:%d: %s", chunkid(), line or ln, s)) | ||
81 | end | ||
82 | |||
83 | ---------------------------------------------------------------------- | ||
84 | -- handles line number incrementation and end-of-line characters | ||
85 | ---------------------------------------------------------------------- | ||
86 | |||
87 | local function inclinenumber(i) | ||
88 | local sub = sub | ||
89 | local old = sub(z, i, i) | ||
90 | i = i + 1 -- skip '\n' or '\r' | ||
91 | local c = sub(z, i, i) | ||
92 | if (c == "\n" or c == "\r") and (c ~= old) then | ||
93 | i = i + 1 -- skip '\n\r' or '\r\n' | ||
94 | end | ||
95 | ln = ln + 1 | ||
96 | I = i | ||
97 | return i | ||
98 | end | ||
99 | |||
100 | ------------------------------------------------------------------------ | ||
101 | -- count separators ("=") in a long string delimiter | ||
102 | ------------------------------------------------------------------------ | ||
103 | local function skip_sep(i) | ||
104 | local sub = sub | ||
105 | local s = sub(z, i, i) | ||
106 | i = i + 1 | ||
107 | local count = #match(z, "=*", i) -- note, take the length | ||
108 | i = i + count | ||
109 | I = i | ||
110 | return (sub(z, i, i) == s) and count or (-count) - 1 | ||
111 | end | ||
112 | |||
113 | ---------------------------------------------------------------------- | ||
114 | -- reads a long string or long comment | ||
115 | ---------------------------------------------------------------------- | ||
116 | |||
117 | local function read_long_string(is_str, sep) | ||
118 | local i = I + 1 -- skip 2nd '[' | ||
119 | local sub = sub | ||
120 | local buff = "" | ||
121 | local c = sub(z, i, i) | ||
122 | if c == "\r" or c == "\n" then -- string starts with a newline? | ||
123 | i = inclinenumber(i) -- skip it | ||
124 | end | ||
125 | local j = i | ||
126 | while true do | ||
127 | local p, q, r = find(z, "([\r\n%]])", i) -- (long range) | ||
128 | if not p then | ||
129 | errorline(is_str and "unfinished long string" or | ||
130 | "unfinished long comment") | ||
131 | end | ||
132 | if is_str then | ||
133 | buff = buff..sub(z, i, p - 1) -- save string portions | ||
134 | end | ||
135 | i = p | ||
136 | if r == "]" then -- delimiter test | ||
137 | if skip_sep(i) == sep then | ||
138 | i = I + 1 -- skip 2nd ']' | ||
139 | break | ||
140 | end | ||
141 | buff = buff..sub(z, i, I - 1) | ||
142 | i = I | ||
143 | else -- newline | ||
144 | buff = buff.."\n" | ||
145 | i = inclinenumber(i) | ||
146 | end | ||
147 | end--while | ||
148 | I = i | ||
149 | return buff | ||
150 | end | ||
151 | |||
152 | ---------------------------------------------------------------------- | ||
153 | -- reads a string | ||
154 | ---------------------------------------------------------------------- | ||
155 | local function read_string(del) | ||
156 | local i = I | ||
157 | local find = find | ||
158 | local sub = sub | ||
159 | local buff = "" | ||
160 | while true do | ||
161 | local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range) | ||
162 | if p then | ||
163 | if r == "\n" or r == "\r" then | ||
164 | errorline("unfinished string") | ||
165 | end | ||
166 | buff = buff..sub(z, i, p - 1) -- normal portions | ||
167 | i = p | ||
168 | if r == "\\" then -- handle escapes | ||
169 | i = i + 1 | ||
170 | r = sub(z, i, i) | ||
171 | if r == "" then break end -- (EOZ error) | ||
172 | p = find("abfnrtv\n\r", r, 1, true) | ||
173 | ------------------------------------------------------ | ||
174 | if p then -- special escapes | ||
175 | if p > 7 then | ||
176 | r = "\n" | ||
177 | i = inclinenumber(i) | ||
178 | else | ||
179 | r = sub("\a\b\f\n\r\t\v", p, p) | ||
180 | i = i + 1 | ||
181 | end | ||
182 | ------------------------------------------------------ | ||
183 | elseif find(r, "%D") then -- other non-digits | ||
184 | i = i + 1 | ||
185 | ------------------------------------------------------ | ||
186 | else -- \xxx sequence | ||
187 | local p, q, s = find(z, "^(%d%d?%d?)", i) | ||
188 | i = q + 1 | ||
189 | if s + 1 > 256 then -- UCHAR_MAX | ||
190 | errorline("escape sequence too large") | ||
191 | end | ||
192 | r = string.char(s) | ||
193 | ------------------------------------------------------ | ||
194 | end--if p | ||
195 | else | ||
196 | i = i + 1 | ||
197 | if r == del then -- ending delimiter | ||
198 | I = i; return buff -- return string | ||
199 | end | ||
200 | end--if r | ||
201 | buff = buff..r -- handled escapes falls through to here | ||
202 | else | ||
203 | break -- (error) | ||
204 | end--if p | ||
205 | end--while | ||
206 | errorline("unfinished string") | ||
207 | end | ||
208 | |||
209 | ------------------------------------------------------------------------ | ||
210 | -- main lexer function | ||
211 | ------------------------------------------------------------------------ | ||
212 | function llex() | ||
213 | local find = find | ||
214 | local match = match | ||
215 | while true do--outer | ||
216 | local i = I | ||
217 | -- inner loop allows break to be used to nicely section tests | ||
218 | while true do--inner | ||
219 | ---------------------------------------------------------------- | ||
220 | local p, _, r = find(z, "^([_%a][_%w]*)", i) | ||
221 | if p then | ||
222 | I = i + #r | ||
223 | if kw[r] then return r end -- reserved word (keyword) | ||
224 | return "<name>", r -- identifier | ||
225 | end | ||
226 | ---------------------------------------------------------------- | ||
227 | local p, _, r = find(z, "^(%.?)%d", i) | ||
228 | if p then -- numeral | ||
229 | if r == "." then i = i + 1 end | ||
230 | local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) | ||
231 | i = q + 1 | ||
232 | if #r == 1 then -- optional exponent | ||
233 | if match(z, "^[%+%-]", i) then -- optional sign | ||
234 | i = i + 1 | ||
235 | end | ||
236 | end | ||
237 | local _, q = find(z, "^[_%w]*", i) | ||
238 | I = q + 1 | ||
239 | local v = base.tonumber(sub(z, p, q)) -- handles hex also | ||
240 | if not v then errorline("malformed number") end | ||
241 | return "<number>", v | ||
242 | end | ||
243 | ---------------------------------------------------------------- | ||
244 | local p, q, r = find(z, "^(%s)[ \t]*", i) | ||
245 | if p then | ||
246 | if r == "\n" or r == "\r" then -- newline | ||
247 | inclinenumber(i) | ||
248 | else | ||
249 | I = q + 1 -- whitespace | ||
250 | end | ||
251 | break -- (continue) | ||
252 | end | ||
253 | ---------------------------------------------------------------- | ||
254 | local r = match(z, "^%p", i) | ||
255 | if r then | ||
256 | local p = find("-[\"\'.=<>~", r, 1, true) | ||
257 | if p then | ||
258 | -- two-level if block for punctuation/symbols | ||
259 | -------------------------------------------------------- | ||
260 | if p <= 2 then | ||
261 | if p == 1 then -- minus | ||
262 | local c = match(z, "^%-%-(%[?)", i) | ||
263 | if c then | ||
264 | i = i + 2 | ||
265 | local sep = -1 | ||
266 | if c == "[" then | ||
267 | sep = skip_sep(i) | ||
268 | end | ||
269 | if sep >= 0 then -- long comment | ||
270 | read_long_string(false, sep) | ||
271 | else -- short comment | ||
272 | I = find(z, "[\n\r]", i) or (#z + 1) | ||
273 | end | ||
274 | break -- (continue) | ||
275 | end | ||
276 | -- (fall through for "-") | ||
277 | else -- [ or long string | ||
278 | local sep = skip_sep(i) | ||
279 | if sep >= 0 then | ||
280 | return "<string>", read_long_string(true, sep) | ||
281 | elseif sep == -1 then | ||
282 | return "[" | ||
283 | else | ||
284 | errorline("invalid long string delimiter") | ||
285 | end | ||
286 | end | ||
287 | -------------------------------------------------------- | ||
288 | elseif p <= 5 then | ||
289 | if p < 5 then -- strings | ||
290 | I = i + 1 | ||
291 | return "<string>", read_string(r) | ||
292 | end | ||
293 | r = match(z, "^%.%.?%.?", i) -- .|..|... dots | ||
294 | -- (fall through) | ||
295 | -------------------------------------------------------- | ||
296 | else -- relational | ||
297 | r = match(z, "^%p=?", i) | ||
298 | -- (fall through) | ||
299 | end | ||
300 | end | ||
301 | I = i + #r; return r -- for other symbols, fall through | ||
302 | end | ||
303 | ---------------------------------------------------------------- | ||
304 | local r = sub(z, i, i) | ||
305 | if r ~= "" then | ||
306 | I = i + 1; return r -- other single-char tokens | ||
307 | end | ||
308 | return "<eof>" -- end of stream | ||
309 | ---------------------------------------------------------------- | ||
310 | end--while inner | ||
311 | end--while outer | ||
312 | end | ||
313 | |||
314 | return base.getfenv() | ||