--[[ lua_parser_loose.lua. Loose parsing of Lua code. See README. (c) 2013 David Manura. MIT License. --]] local PARSE = {} local LEX = require 'lua_lexer_loose' local function warn(message, position) io.stderr:write('WARNING: ', tostring(position), ': ', message, '\n') end --[[ Loose parser. lx - lexer stream of Lua tokens. f(event...) - callback function to send events to. Events generated: 'Var', name, lineinfo - variable declaration that immediately comes into scope. 'VarSelf', name, lineinfo - same as 'Var' but for implicit 'self' parameter in method definitions. lineinfo is zero-width space after '(' 'VarNext', name, lineinfo - variable definition that comes into scope upon next statement. 'VarInside', name, lineinfo - variable definition that comes into scope inside following block. Used for control variables in 'for' statements. 'Id', name, lineinfo - reference to variable. 'String', name - string or table field. 'Scope', opt - beginning of scope block. 'EndScope', nil, lineinfo - end of scope block. 'FunctionCall', name, lineinfo - function call (in addition to other events). --]] function PARSE.parse_scope(lx, f, level) local cprev = {tag='Eof'} -- stack of scopes. local scopes = {{}} for l = 2, (level or 1) do scopes[l] = {} end local function scope_begin(opt, lineinfo) scopes[#scopes+1] = {} f('Scope', opt, lineinfo) end local function scope_end(opt, lineinfo) if #scopes <= 1 then warn("'end' without opening block", lineinfo) else table.remove(scopes) end f('EndScope', opt, lineinfo) end local function parse_function_list(has_self) local c = lx:next(); assert(c[1] == '(') f('Statement', c[1], c.lineinfo) -- generate Statement for function definition scope_begin(c[1], c.lineinfo) if has_self then local lineinfo = c.lineinfo+1 -- zero size f('VarSelf', 'self', lineinfo) end while lx:peek().tag == 'Id' do local c = lx:next() f('Var', c[1], c.lineinfo) if lx:peek()[1] == ',' then lx:next() end end if lx:peek()[1] == ')' then lx:next() end end while 1 do local c = lx:next() -- Detect end of previous statement if c.tag == 'Eof' -- trigger 'Statement' at the end of file or c.tag == 'Keyword' and ( c[1] == 'break' or c[1] == 'goto' or c[1] == 'do' or c[1] == 'while' or c[1] == 'repeat' or c[1] == 'if' or c[1] == 'for' or c[1] == 'function' and lx:peek().tag == 'Id' or c[1] == 'local' or c[1] == ';' or c[1] == 'until' or c[1] == 'return' or c[1] == 'end') or c.tag == 'Id' and (cprev.tag == 'Id' or cprev.tag == 'Keyword' and (cprev[1] == ']' or cprev[1] == ')' or cprev[1] == '}' or cprev[1] == '...' or cprev[1] == 'end' or cprev[1] == 'true' or cprev[1] == 'false' or cprev[1] == 'nil') or cprev.tag == 'Number' or cprev.tag == 'String') then if scopes[#scopes].inside_until then scope_end(nil, c.lineinfo) end f('Statement', c[1], c.lineinfo) end if c.tag == 'Eof' then break end -- Process token(s) if c.tag == 'Keyword' then if c[1] == 'local' and lx:peek().tag == 'Keyword' and lx:peek()[1] == 'function' then -- local function local c = lx:next(); assert(c[1] == 'function') if lx:peek().tag == 'Id' then c = lx:next() f('Var', c[1], c.lineinfo) if lx:peek()[1] == '(' then parse_function_list() end end elseif c[1] == 'function' then if lx:peek()[1] == '(' then -- inline function parse_function_list() elseif lx:peek().tag == 'Id' then -- function definition statement c = lx:next(); assert(c.tag == 'Id') f('Id', c[1], c.lineinfo) local has_self while lx:peek()[1] ~= '(' and lx:peek().tag ~= 'Eof' do c = lx:next() if c.tag == 'Id' then f('String', c[1], c.lineinfo) elseif c.tag == 'Keyword' and c[1] == ':' then has_self = true end end if lx:peek()[1] == '(' then parse_function_list(has_self) end end elseif c[1] == 'local' and lx:peek().tag == 'Id' then c = lx:next() f('VarNext', c[1], c.lineinfo) while lx:peek().tag == 'Keyword' and lx:peek()[1] == ',' do c = lx:next(); if lx:peek().tag ~= 'Id' then break end c = lx:next() f('VarNext', c[1], c.lineinfo) end elseif c[1] == 'for' and lx:peek().tag == 'Id' then c = lx:next() f('VarInside', c[1], c.lineinfo) while lx:peek().tag == 'Keyword' and lx:peek()[1] == ',' do c = lx:next(); if lx:peek().tag ~= 'Id' then break end c = lx:next() f('VarInside', c[1], c.lineinfo) end elseif c[1] == 'do' then scope_begin('do', c.lineinfo) -- note: do/while/for statement scopes all begin at 'do'. elseif c[1] == 'repeat' or c[1] == 'then' then scope_begin(c[1], c.lineinfo) elseif c[1] == 'end' or c[1] == 'elseif' then scope_end(c[1], c.lineinfo) elseif c[1] == 'else' then scope_end(nil, c.lineinfo) scope_begin(c[1], c.lineinfo) elseif c[1] == 'until' then scopes[#scopes].inside_until = true elseif c[1] == '{' then scopes[#scopes].inside_table = (scopes[#scopes].inside_table or 0) + 1 elseif c[1] == '}' then local newval = (scopes[#scopes].inside_table or 0) - 1 newval = newval >= 1 and newval or nil scopes[#scopes].inside_table = newval end elseif c.tag == 'Id' then local cnext = lx:peek() if cnext.tag == 'Keyword' and (cnext[1] == '(' or cnext[1] == '{') or cnext.tag == 'String' then f('FunctionCall', c[1], c.lineinfo) end if scopes[#scopes].inside_table and cnext.tag == 'Keyword' and cnext[1] == '=' then -- table field f('String', c[1], c.lineinfo) elseif cprev.tag == 'Keyword' and (cprev[1] == ':' or cprev[1] == '.') then f('String', c[1], c.lineinfo) else f('Id', c[1], c.lineinfo) end end if c.tag ~= 'Comment' then cprev = c end end end --[[ This is similar to parse_scope but determines if variables are local or global. lx - lexer stream of Lua tokens. f(event...) - callback function to send events to. Events generated: 'Id', name, lineinfo, 'local'|'global' (plus all events in parse_scope) --]] function PARSE.parse_scope_resolve(lx, f, vars) local NEXT = {} -- unique key local INSIDE = {} -- unique key local function newscope(vars, opt, lineinfo) local newvars = opt=='do' and vars[INSIDE] or {} if newvars == vars[INSIDE] then vars[INSIDE] = false end newvars[INSIDE]=false newvars[NEXT]=false local level = (vars[0] or 0) + 1 newvars[0] = level -- keep the current level newvars[-1] = lineinfo -- keep the start of the scope newvars[level] = newvars -- reference the current vars table return setmetatable(newvars, {__index=vars}) end vars = vars or newscope({[0] = 0}, nil, 1) vars[NEXT] = false -- vars that come into scope upon next statement vars[INSIDE] = false -- vars that come into scope upon entering block PARSE.parse_scope(lx, function(op, name, lineinfo) -- in some (rare) cases VarNext can follow Statement event (which copies -- vars[NEXT]). This may cause vars[0] to be `nil`, so default to 1. local var = op:find("^Var") and {fpos = lineinfo, at = (vars[0] or 1) + (op == 'VarInside' and 1 or 0), masked = vars[name], self = (op == 'VarSelf') or nil } or nil if op == 'Var' or op == 'VarSelf' then vars[name] = var elseif op == 'VarNext' then vars[NEXT] = vars[NEXT] or {} vars[NEXT][name] = var elseif op == 'VarInside' then vars[INSIDE] = vars[INSIDE] or {} vars[INSIDE][name] = var elseif op == 'Scope' then vars = newscope(vars, name, lineinfo) elseif op == 'EndScope' then local mt = getmetatable(vars) if mt == nil then warn("'end' without opening block.", lineinfo) else vars = mt.__index end elseif op == 'Id' then -- Just make callback elseif op == 'String' or op == 'FunctionCall' then -- Just make callback elseif op == 'Statement' then -- beginning of statement -- Apply vars that come into scope upon beginning of statement. if vars[NEXT] then for k,v in pairs(vars[NEXT]) do vars[k] = v; vars[NEXT][k] = nil end end else assert(false) end f(op, name, lineinfo, vars) end, vars[0]) end function PARSE.extract_vars(code, f) local lx = LEX.lexc(code) local char0 = 1 -- next char offset to write local function gen(char1, nextchar0) char0 = nextchar0 end PARSE.parse_scope_resolve(lx, function(op, name, lineinfo, other) if op == 'Id' then f('Id', name, other, lineinfo) elseif op == 'Var' or op == 'VarNext' or op == 'VarInside' then gen(lineinfo, lineinfo+#name) f('Var', name, "local", lineinfo) end -- ignore 'VarSelf' and others end) gen(#code+1, nil) end --[[ Converts 5.2 code to 5.1 style code with explicit _ENV variables. Example: "function f(_ENV, x) print(x, y)" --> "function _ENV.f(_ENV, x) _ENV.print(x, _ENV.y) end" code - string of Lua code. Assumed to be valid Lua (FIX: 5.1 or 5.2?) f(s) - call back function to send chunks of Lua code output to. Example: io.stdout. --]] function PARSE.replace_env(code, f) if not f then return PARSE.accumulate(PARSE.replace_env, code) end PARSE.extract_vars(code, function(op, name, other) if op == 'Id' then f(other == 'global' and '_ENV.' .. name or name) elseif op == 'Var' or op == 'Other' then f(name) end end) end -- helper function. Can be passed as argument `f` to functions -- like `replace_env` above to accumulate fragments into a single string. function PARSE.accumulator() local ts = {} local mt = {} mt.__index = mt function mt:__call(s) ts[#ts+1] = s end function mt:result() return table.concat(ts) end return setmetatable({}, mt) end -- helper function function PARSE.accumulate(g, code) local accum = PARSE.accumulator() g(code, accum) return accum:result() end return PARSE