1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
|
--[[--------------------------------------------------------------------
llex.lua
Lua 5 lexical analyzer in Lua
This file is part of Yueliang.
Copyright (c) 2005-2006 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * parser to implement luaX_syntaxerror, call errorline with 2 parms
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- local lex_init = require("llex.lua")
-- local llex = lex_init(z, source)
-- llex:chunkid()
-- * returns formatted name of chunk id
-- llex:errorline(s, token, line)
-- * throws an error with a formatted message
-- llex:lex()
-- * returns next lexical element (token, seminfo)
----------------------------------------------------------------------]]
return
function(z, source)
--[[--------------------------------------------------------------------
-- lexer initialization
----------------------------------------------------------------------]]
--------------------------------------------------------------------
-- initialize variables
--------------------------------------------------------------------
local string = string
local EOF = "<eof>"
local z = z
local luaX = {source = source, lineno = 1,}
local curr, buff
--------------------------------------------------------------------
-- initialize keyword list
--------------------------------------------------------------------
local kw = {}
for v in string.gfind([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while]], "%S+") do
kw[v] = true
end
--[[--------------------------------------------------------------------
-- support functions
----------------------------------------------------------------------]]
--------------------------------------------------------------------
-- returns a chunk name or id
--------------------------------------------------------------------
function luaX:chunkid()
local sub = string.sub
local first = sub(source, 1, 1)
if first == "=" or first == "@" then
return sub(source, 2) -- remove first char
end
return "[string]"
end
--------------------------------------------------------------------
-- formats error message and throws error
--------------------------------------------------------------------
function luaX:errorline(s, token, line)
if not line then line = self.lineno end
error(string.format("%s:%d: %s near '%s'", self:chunkid(), line, s, token))
end
--------------------------------------------------------------------
-- throws a lexer error
--------------------------------------------------------------------
local function lexerror(s, token)
if not token then token = buff end
luaX:errorline(s, token)
end
--------------------------------------------------------------------
-- gets the next character and returns it
--------------------------------------------------------------------
local function nextc()
local c = z:getc()
curr = c
return c
end
--------------------------------------------------------------------
-- save current character into token buffer, grabs next character
-- * save(c) merged into this and elsewhere to save space
--------------------------------------------------------------------
local function save_next()
buff = buff..curr
return nextc()
end
--------------------------------------------------------------------
-- move on to next line
--------------------------------------------------------------------
local function nextline()
local luaX = luaX
nextc() -- skip '\n'
luaX.lineno = luaX.lineno + 1
end
--[[--------------------------------------------------------------------
-- reads a number (LUA_NUMBER)
----------------------------------------------------------------------]]
local function read_numeral(comma)
buff = ""
local find = string.find
if comma then buff = "." end
------------------------------------------------------------------
while find(curr, "%d") do save_next() end
if curr == "." then
if save_next() == "." then
save_next()
lexerror("ambiguous syntax (dots follows digits)")
end
end
------------------------------------------------------------------
while find(curr, "%d") do save_next() end
if find(curr, "^[eE]$") then
save_next() -- read 'E' and optional exponent sign
if find(curr, "^[+-]$") then save_next() end
while find(curr, "%d") do save_next() end
end
c = tonumber(buff)
if c then return c end
lexerror("malformed number")
end
--[[--------------------------------------------------------------------
-- reads a long string or long comment
----------------------------------------------------------------------]]
local function read_long(is_str)
local cont = 0
buff = ""
nextc() -- pass the '[['
if curr == "\n" then -- string starts with a newline?
nextline() -- skip it
end
while true do
local c = curr
----------------------------------------------------------------
if c == "EOZ" then
lexerror(is_str and "unfinished long string" or
"unfinished long comment", EOF)
----------------------------------------------------------------
elseif c == "[" then
if save_next() == "[" then
cont = cont + 1; save_next()
end
----------------------------------------------------------------
elseif c == "]" then
if save_next() == "]" then
if cont == 0 then break end
cont = cont - 1; save_next()
end
----------------------------------------------------------------
elseif c == "\n" then
buff = buff.."\n"; nextline()
if not is_str then buff = "" end -- avoid wasting space
----------------------------------------------------------------
else
save_next()
----------------------------------------------------------------
end--if c
end--while
nextc() -- skip second ']'
return string.sub(buff, 1, -2)
end
--[[--------------------------------------------------------------------
-- reads a string
----------------------------------------------------------------------]]
local function read_string(del)
local find = string.find
buff = ""
save_next() -- save delimiter
while curr ~= del do
local c = curr
----------------------------------------------------------------
-- end-of-file, newline
----------------------------------------------------------------
if c == "EOZ" then
lexerror("unfinished string", EOF)
elseif c == "\n" then
lexerror("unfinished string")
----------------------------------------------------------------
-- escapes
----------------------------------------------------------------
elseif c == "\\" then
c = nextc() -- do not save the '\'
if c ~= "EOZ" then -- will raise an error next loop iteration
local d = find("\nabfnrtv", c, 1, 1)
if d then
buff = buff..string.sub("\n\a\b\f\n\r\t\v", d, d)
if d == 1 then nextline() else nextc() end
elseif find(c, "%D") then
save_next() -- handles \\, \", \', and \?
else -- \xxx
c, d = 0, 0
repeat
c = 10 * c + curr; d = d + 1; nextc()
until d >= 3 or find(curr, "%D")
if c > 255 then -- UCHAR_MAX
lexerror("escape sequence too large")
end
buff = buff..string.char(c)
end
end
----------------------------------------------------------------
-- a regular character
----------------------------------------------------------------
else
save_next()
----------------------------------------------------------------
end--if c
end--while
nextc() -- skip delimiter
return string.sub(buff, 2)
end
--[[--------------------------------------------------------------------
-- main lexer function
----------------------------------------------------------------------]]
function luaX:lex()
local find = string.find
while true do
local c = curr
----------------------------------------------------------------
-- operators, numbers
----------------------------------------------------------------
local d = find("=<>~\"'-[.\n", c, 1, 1)
if d then
------------------------------------------------------------
if d <= 4 then -- "=<>~" (relational operators)
if nextc() ~= "=" then return c end
nextc(); return c.."="
------------------------------------------------------------
elseif d <= 6 then -- "\"" or "'" (string)
return "<string>", read_string(c)
------------------------------------------------------------
elseif c == "-" then -- "-" ("-", comment, or long comment)
if nextc() ~= "-" then return "-" end
c = nextc() -- otherwise it is a comment
if c == "[" and nextc() == "[" then
read_long() -- long comment
else -- short comment
while c ~= "\n" and c ~= "EOZ" do c = nextc() end
end
------------------------------------------------------------
elseif c == "[" then -- "[" ("[" or long string)
if nextc() ~= "[" then return c end
return "<string>", read_long(true)
------------------------------------------------------------
elseif c == "." then -- "." (".", concatenation, or dots)
buff = ""
c = save_next()
if c == "." then -- interpret 2 or 3 dots
if save_next() == "." then save_next() end
return buff
end
if find(c, "%d") then
return "<number>", read_numeral(true)
end
return "."
------------------------------------------------------------
else-- c == "\n" then -- "\n" (newline)
nextline()
------------------------------------------------------------
end--if d/c
----------------------------------------------------------------
-- number, end-of-file, identifier or reserved word
----------------------------------------------------------------
elseif find(c, "%d") then -- number
return "<number>", read_numeral(false)
----------------------------------------------------------------
elseif find(c, "[_%a]") then -- reads a name
if c == "EOZ" then return EOF end -- end-of-file
buff = ""
repeat
c = save_next()
until c == "EOZ" or find(c, "[^_%w]")
c = buff
if kw[c] then return c end -- reserved word
return "<name>", c
----------------------------------------------------------------
-- whitespace, other characters, control characters
----------------------------------------------------------------
elseif find(c, "%s") then -- whitespace
nextc()
----------------------------------------------------------------
elseif find(c, "%c") then -- control characters
lexerror("invalid control char", "char("..string.byte(c)..")")
----------------------------------------------------------------
else -- single-char tokens (+ - / etc.)
nextc(); return c
----------------------------------------------------------------
end--if d/c
end--while
end
--[[--------------------------------------------------------------------
-- initial processing (shbang handling)
----------------------------------------------------------------------]]
nextc() -- read first char
if cur == "#" then -- skip first line
repeat nextc() until curr == "\n" or curr == "EOZ"
end
return luaX
--[[------------------------------------------------------------------]]
end
|