aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/LuaSL/testLua/yueliang-0.4.1/nat-5.1.3/llex_mk2.lua
blob: dae57f10bbc8609a29c88f7de0e7d748bedd5b34 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
--[[--------------------------------------------------------------------

  llex.lua
  Lua 5.1 lexical analyzer in Lua
  This file is part of Yueliang.

  Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
  The COPYRIGHT file describes the conditions
  under which this software may be distributed.

  See the ChangeLog for more information.

----------------------------------------------------------------------]]

--[[--------------------------------------------------------------------
-- Notes:
-- * takes in the entire source at once
-- * greatly simplified chunkid, error handling
-- * NO shbang handling (it's done elsewhere in Lua 5.1)
-- * NO localized decimal point replacement magic
-- * NO limit to number of lines (MAX_INT = 2147483645)
-- * NO support for compatible long strings (LUA_COMPAT_LSTR)
-- * NO next(), lookahead() because I want next() to set tok and
--   seminfo that are locals, and that can only be done easily in
--   lparser, not llex. lastline would be handled in lparser too.
--
-- Usage example:
--   local llex = require("llex_mk2")
--   llex.init(source_code, source_code_name)
--   repeat
--      local token, seminfo = llex.llex()
--   until token == "<eof>"
--
----------------------------------------------------------------------]]

local base = _G
local string = require "string"
module "llex"

----------------------------------------------------------------------
-- initialize keyword list
----------------------------------------------------------------------
local kw = {}
for v in string.gmatch([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while]], "%S+") do
  kw[v] = true
end

----------------------------------------------------------------------
-- initialize lexer for given source _z and source name _sourceid
----------------------------------------------------------------------
local z, sourceid, I
local find = string.find
local match = string.match
local sub = string.sub

function init(_z, _sourceid)
  z = _z                        -- source
  sourceid = _sourceid          -- name of source
  I = 1                         -- lexer's position in source
  ln = 1                        -- line number
end

----------------------------------------------------------------------
-- returns a chunk name or id, no truncation for long names
----------------------------------------------------------------------
function chunkid()
  if sourceid and match(sourceid, "^[=@]") then
    return sub(sourceid, 2)  -- remove first char
  end
  return "[string]"
end

----------------------------------------------------------------------
-- formats error message and throws error
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------
function errorline(s, line)
  base.error(string.format("%s:%d: %s", chunkid(), line or ln, s))
end

----------------------------------------------------------------------
-- handles line number incrementation and end-of-line characters
----------------------------------------------------------------------

local function inclinenumber(i)
  local sub = sub
  local old = sub(z, i, i)
  i = i + 1  -- skip '\n' or '\r'
  local c = sub(z, i, i)
  if (c == "\n" or c == "\r") and (c ~= old) then
    i = i + 1  -- skip '\n\r' or '\r\n'
  end
  ln = ln + 1
  I = i
  return i
end

------------------------------------------------------------------------
-- count separators ("=") in a long string delimiter
------------------------------------------------------------------------
local function skip_sep(i)
  local sub = sub
  local s = sub(z, i, i)
  i = i + 1
  local count = #match(z, "=*", i)  -- note, take the length
  i = i + count
  I = i
  return (sub(z, i, i) == s) and count or (-count) - 1
end

----------------------------------------------------------------------
-- reads a long string or long comment
----------------------------------------------------------------------

local function read_long_string(is_str, sep)
  local i = I + 1  -- skip 2nd '['
  local sub = sub
  local buff = ""
  local c = sub(z, i, i)
  if c == "\r" or c == "\n" then  -- string starts with a newline?
    i = inclinenumber(i)  -- skip it
  end
  local j = i
  while true do
    local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
    if not p then
      errorline(is_str and "unfinished long string" or
                "unfinished long comment")
    end
    if is_str then
      buff = buff..sub(z, i, p - 1)     -- save string portions
    end
    i = p
    if r == "]" then                    -- delimiter test
      if skip_sep(i) == sep then
        i = I + 1  -- skip 2nd ']'
        break
      end
      buff = buff..sub(z, i, I - 1)
      i = I
    else                                -- newline
      buff = buff.."\n"
      i = inclinenumber(i)
    end
  end--while
  I = i
  return buff
end

----------------------------------------------------------------------
-- reads a string
----------------------------------------------------------------------
local function read_string(del)
  local i = I
  local find = find
  local sub = sub
  local buff = ""
  while true do
    local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
    if p then
      if r == "\n" or r == "\r" then
        errorline("unfinished string")
      end
      buff = buff..sub(z, i, p - 1)             -- normal portions
      i = p
      if r == "\\" then                         -- handle escapes
        i = i + 1
        r = sub(z, i, i)
        if r == "" then break end -- (EOZ error)
        p = find("abfnrtv\n\r", r, 1, true)
        ------------------------------------------------------
        if p then                               -- special escapes
          if p > 7 then
            r = "\n"
            i = inclinenumber(i)
          else
            r = sub("\a\b\f\n\r\t\v", p, p)
            i = i + 1
          end
        ------------------------------------------------------
        elseif find(r, "%D") then               -- other non-digits
          i = i + 1
        ------------------------------------------------------
        else                                    -- \xxx sequence
          local p, q, s = find(z, "^(%d%d?%d?)", i)
          i = q + 1
          if s + 1 > 256 then -- UCHAR_MAX
            errorline("escape sequence too large")
          end
          r = string.char(s)
        ------------------------------------------------------
        end--if p
      else
        i = i + 1
        if r == del then                        -- ending delimiter
          I = i; return buff                    -- return string
        end
      end--if r
      buff = buff..r -- handled escapes falls through to here
    else
      break -- (error)
    end--if p
  end--while
  errorline("unfinished string")
end

------------------------------------------------------------------------
-- main lexer function
------------------------------------------------------------------------
function llex()
  local find = find
  local match = match
  while true do--outer
    local i = I
    -- inner loop allows break to be used to nicely section tests
    while true do--inner
      ----------------------------------------------------------------
      local p, _, r = find(z, "^([_%a][_%w]*)", i)
      if p then
        I = i + #r
        if kw[r] then return r end              -- reserved word (keyword)
        return "<name>", r                      -- identifier
      end
      ----------------------------------------------------------------
      local p, _, r = find(z, "^(%.?)%d", i)
      if p then                                 -- numeral
        if r == "." then i = i + 1 end
        local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
        i = q + 1
        if #r == 1 then                         -- optional exponent
          if match(z, "^[%+%-]", i) then        -- optional sign
            i = i + 1
          end
        end
        local _, q = find(z, "^[_%w]*", i)
        I = q + 1
        local v = base.tonumber(sub(z, p, q))   -- handles hex also
        if not v then errorline("malformed number") end
        return "<number>", v
      end
      ----------------------------------------------------------------
      local p, q, r = find(z, "^(%s)[ \t]*", i)
      if p then
        if r == "\n" or r == "\r" then          -- newline
          inclinenumber(i)
        else
          I = q + 1                             -- whitespace
        end
        break -- (continue)
      end
      ----------------------------------------------------------------
      local r = match(z, "^%p", i)
      if r then
        local p = find("-[\"\'.=<>~", r, 1, true)
        if p then
          -- two-level if block for punctuation/symbols
          --------------------------------------------------------
          if p <= 2 then
            if p == 1 then                      -- minus
              local c = match(z, "^%-%-(%[?)", i)
              if c then
                i = i + 2
                local sep = -1
                if c == "[" then
                  sep = skip_sep(i)
                end
                if sep >= 0 then                -- long comment
                  read_long_string(false, sep)
                else                            -- short comment
                  I = find(z, "[\n\r]", i) or (#z + 1)
                end
                break -- (continue)
              end
              -- (fall through for "-")
            else                                -- [ or long string
              local sep = skip_sep(i)
              if sep >= 0 then
                return "<string>", read_long_string(true, sep)
              elseif sep == -1 then
                return "["
              else
                errorline("invalid long string delimiter")
              end
            end
          --------------------------------------------------------
          elseif p <= 5 then
            if p < 5 then                       -- strings
              I = i + 1
              return "<string>", read_string(r)
            end
            r = match(z, "^%.%.?%.?", i)        -- .|..|... dots
            -- (fall through)
          --------------------------------------------------------
          else                                  -- relational
            r = match(z, "^%p=?", i)
            -- (fall through)
          end
        end
        I = i + #r; return r  -- for other symbols, fall through
      end
      ----------------------------------------------------------------
      local r = sub(z, i, i)
      if r ~= "" then
        I = i + 1; return r                     -- other single-char tokens
      end
      return "<eof>"                            -- end of stream
      ----------------------------------------------------------------
    end--while inner
  end--while outer
end

return base.getfenv()