-
Notifications
You must be signed in to change notification settings - Fork 0
/
bcstrict.lua
292 lines (261 loc) · 9.66 KB
/
bcstrict.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
local function unpack (fmt, s, pos)
assert(pos, "lost pos")
return string.unpack(fmt, s, pos)
end
-- ldump.c:61:dumpSize.
-- size_t packed into a big-endian variable-length integer.
-- High bit signals final byte; lower 7 bits carry data.
local function parse_size (s, x)
local n, tmp = 0
repeat
tmp, x = unpack("B", s, x)
n = (n << 7) | (tmp & 127)
until tmp & 128 ~= 0
return n, x
end
-- ldump.c:73:dumpInt.
local parse_int = parse_size
-- ldump.c:88:dumpString. Two formats.
-- * [dumpSize]0. No string (*not* the same as an empty string). Occurs when
-- debug info is missing, e.g. stripped dump or nameless locals.
-- * [dumpSize]size, char[size-1]. Encoded size includes space for trailing 0,
-- which is not included in the actual dump.
local function parse_string (s, y)
local len, x = parse_size(s, y)
if len == 0 then
return nil, x
end
return unpack("c"..(len-1), s, x)
end
-- ldump.c:100:dumpCode. [dumpInt]sizecode, Instruction[sizecode].
-- Instruction is a typedef for an unsigned integer (int or long) with at least
-- 32 bits; this is almost certainly 4 bytes, but theoretically doesn't have to
-- be, so we pass the format in as an argument.
-- The actual dump format is easy, the finicky bit is parsing the instructions.
-- lopcodes.h:13. On the 5.4 VM, iABC instructions are 32-bit integers packing
-- opcode:7, A:8, k:1, B:8, C:8 bits. (Now in alphabetical order!)
-- lopcodes.h:212:OP_GETTABUP,/* A B C R[A] := UpValue[B][K[C]:string]
-- lopcodes.h:217:OP_SETTABUP,/* A B C UpValue[A][K[B]:string] := RK(C)
-- A global access compiles down to a table access to the upvalue holding the
-- closed-over value of _ENV. Unfortunately, at this point, we don't actually
-- know which upvalue (if any!) is _ENV, so we have to mark down every upvalue
-- table access as suspicious.
-- Returns a sequence of {upvalue, instruction index, is write, table index}
-- tuples; of these, only the upvalue is strictly necessary:
-- * instruction index is used to look line numbers up from debug info
-- * table index can be looked up in the constants table for the name accessed
local function parse_code (s, y, ins_fmt)
local OP_GETTABUP, OP_SETTABUP = 11, 15
local d = {}
local v, x = parse_int(s, y)
for j=1,v do
v, x = unpack(ins_fmt, s, x)
local o, a, b, c = v & 127, v>>7 & 255, v>>16 & 255, v>>24
if o == OP_GETTABUP then
d[#d+1] = {b, j, false, c}
elseif o == OP_SETTABUP then
d[#d+1] = {a, j, true, b}
end
end
return d, x
end
-- ldump.c:108:dumpConstants. [dumpInt]sizek, Various[sizek].
-- This is a nasty format whose size can't be computed without parsing.
-- "Various" is allowed to be one of four formats:
-- * (char)LUA_VNUMFLT==19, lua_Number.
-- * (char)LUA_VNUMINT==3, lua_Integer.
-- * (char)LUA_VSHRSTR==4 or LUA_VLNGSTR==20, dumpString.
-- * (char)LUA_VNIL==0 or LUA_VFALSE==1 or LUA_VTRUE==17.
-- Only string constants *really* matter, since those are generated by "real"
-- global accesses; the others only matter because we need to know their size
-- to avoid desyncs. They *could* be generated by directly indexing _ENV, but
-- the compiler generally doesn't seem to do so for non-string indices.
local function parse_constants (s, y)
local k = {}
local v, x = parse_size(s, y)
for j=1,v do
v, x = unpack("B", s, x)
if v == 3 then -- number (numint)
v, x = unpack("j", s, x)
elseif v == 19 then -- number (numflt)
v, x = unpack("n", s, x)
elseif v == 4 or v == 20 then -- string (shrstr/lngstr)
v, x = parse_string(s, x)
elseif v == 0 then
v = "nil"
elseif v == 1 then
v = "false"
elseif v == 17 then
v = "true"
else
assert(false, "bad ttype "..v.." at byte "..x)
end
k[j] = v
end
return k, x
end
-- ldump.c:143:dumpUpvalues.
-- [dumpInt]sizeupvalues, {(char)instack, (char)idx, (char)kind}[sizeupvalues].
-- Each upvalue is identified by whether it belongs to the enclosing function's
-- stack (i.e. local variables, instack==1) or not (i.e. upvalues, instack==0),
-- and an index into the eclosing function's upvalues/locals list.
-- The main function of a chunk gets _ENV as its single upvalue at (1,0), which
-- looks like the first local variable of its fictional enclosing scope.
-- Outside this special case, the inherited global _ENV is *never* passed down
-- from the stack: instack upvalues are locals new to the enclosing function.
-- Thus, given the (instack, idx) tuple which identified _ENV among upvalues of
-- our parent function we can find the index of any upvalue (at most 1) which
-- refers to the global _ENV. Not every function must have _ENV as an upvalue,
-- but it needs to be present to be passed down to children.
local function parse_upvalues (s, y, env_index)
local v, x = parse_size(s, y)
-- precompute end of upvalues so that we can just early return
local z = x + 3*v
if not env_index then
return nil, z
end
for j=1,v do
-- We encode the (instack, idx) tuple as essentially (instack<<8)|idx,
-- i.e. (1,0)=256 and (0,idx)=idx, the only cases we care about.
-- We don't need to check kind.
v, x = unpack(">i2x", s, x)
if v == env_index then
return j-1, z
end
end
return nil, z
end
-- ldump.c:154:dumpDebug.
-- [dumpInt]sizelineinfo, (char)lineinfo[sizelineinfo],
-- [dumpInt]sizeabslineinfo, {dumpInt, dumpInt}abslineinfo[sizeabslineinfo],
-- [dumpInt]sizelocvars, {dumpString, dumpInt, dumpInt}locvars[sizelocvars],
-- [dumpInt]sizeupvalues, [dumpString]upvalues[sizeupvalues].
-- This section is mostly zeroed out for stripped dumps.
-- Line numbers aren't essential, but useful to report if available. 5.4 uses a
-- new mixed absolute reference + increments lineinfo format, where consecutive
-- instructions within +127/-128 SLoC take up just 1 byte, while rare cases of
-- large line gaps get an absolute line number.
-- The starting line number must be passed in from the function header, which
-- isn't part of the optional debug data, it's present even in stripped dumps.
local function parse_debug (s, y, linedefined)
local lineinfo = {}
local v, x = parse_size(s, y)
for j=1,v do
lineinfo[j], x = unpack("b", s, x)
end
local abslineinfo = {}
v, x = parse_size(s, x)
for _=1,v do
local j, v
j, x = parse_size(s, x)
v, x = parse_size(s, x)
abslineinfo[j] = v
end
v, x = parse_size(s, x)
for _=1,v do
_, x = parse_string(s, x)
_, x = parse_size(s, x)
_, x = parse_size(s, x)
end
v, x = parse_size(s, x)
for _=1,v do
_, x = parse_string(s, x)
end
lineinfo[0] = linedefined
for j=1,#lineinfo do
lineinfo[j] = abslineinfo[j] or lineinfo[j-1] + lineinfo[j]
end
return lineinfo, x
end
-- ldump.c:179:dumpFunction.
-- [dumpString]source, [dumpSize]linedefined, [dumpSize]lastlinedefined,
-- (char)numparams, (char)is_vararg, (char)maxstacksize,
-- dumpCode, dumpConstants, dumpUpvalues, dumpProtos, dumpDebug.
local function parse_function (cb, s, x, ins_fmt, env_index, parent)
local source, x = parse_string(s, x)
source = source or parent
local linedefined, x = parse_size(s, x)
local lastlinedefined, x = parse_size(s, x)
if linedefined == 0 then
-- chunk _ENV has (char)instack==1, (char)idx==0. See parse_upvalues.
env_index = 256
end
-- skip numparams, is_vararg, maxstacksize
local x = unpack("xxx", s, x)
local code, x = parse_code(s, x, ins_fmt)
local constants, x = parse_constants(s, x)
local env_index, x = parse_upvalues(s, x, env_index)
-- This should properly be broken out into its own function, but it's pretty
-- small and need to be corecursive with parse_function.
local nprotos, x = parse_size(s, x)
for _=1,nprotos do
x = parse_function(cb, s, x, ins_fmt, env_index, source)
end
local lineinfo, x = parse_debug(s, x, linedefined)
if env_index then
for j=1,#code do
local a = code[j]
if a[1] == env_index then
local line = lineinfo[a[2]]
if line then
-- leave it
elseif linedefined == 0 then
line = "main"
else
line = linedefined .. "-" .. lastlinedefined
end
cb(constants[a[4]+1], a[3], source or "=stripped", line)
end
end
end
return x
end
-- ldump.c:197:dumpHeader.
-- "\x1bLua"[:4], (char)LUAC_VERSION==0x53, (char)LUAC_FORMAT==0,
-- LUAC_DATA=="\x19\x93\r\n\x1a\n"[:6],
-- (char)sizeof(Instruction), (char)sizeof(lua_Integer), (char)sizeof(lua_Number),
-- (lua_Integer)LUAC_INT==0x5678, (lua_Number)LUAC_NUM==370.5.
-- Additionally, skip an extra byte: ldump.c:221. (char)sizeupvalues.
local function parse_header (s)
local sig, ver, fmt, lit, isz, int, num, x = unpack("c4BBc6Bxxjnx", s, 1)
assert(sig == "\x1bLua", "not a dump")
assert(ver == 0x54 and fmt == 0, "not a standard 5.3 dump")
assert(lit == "\x19\x93\r\n\x1a\n", "mangled dump (conversions?)")
assert(int == 0x5678, "mangled dump (wrong-endian?)")
assert(num == 370.5, "mangled dump (floats broken?)")
return "I"..isz, x
end
local function check_dump (s, cb)
local ins_fmt, x = parse_header(s)
return parse_function(cb, s, x, ins_fmt)
end
local function strict_mode (env, fun)
if not fun then
fun = string.dump(debug.getinfo(2, "f").func)
elseif type(fun) == "function" then
fun = string.dump(fun)
end
env = env or _ENV
local accum = {}
check_dump(fun, function (key, is_write, source, line)
if not env[key] then
source = source:sub(2)
local action = is_write and "write: " or "read: "
accum[#accum+1] = source..":"..line..": global "..action..key
end
end)
if #accum > 0 then
accum[0] = "unexpected globals"
error(table.concat(accum, "\n\t", 0), 2)
end
end
-- We explicitly list globals we're using to avoid the failure case where the
-- check code accidentally writes new globals, which has definitely happened.
strict_mode{assert=1, debug=1, error=1, string=1, table=1, type=1, loadfile=1}
if debug and not debug.getinfo(3, "") then
local files = {...}
for j=1,#files do
strict_mode(nil, assert(loadfile(files[j])))
end
end
return strict_mode