-
Notifications
You must be signed in to change notification settings - Fork 7
/
lib.rs
445 lines (422 loc) · 19.5 KB
/
lib.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
use std::default::Default;
use std::option::Option;
use std::vec::Vec;
/**
* The structs here represent the intermediate representation of the program.
* In particular:
* * All variables are represented with the location it should be read/written to (i.e. local#1.var#1, or local#1.var#1.var#1, or global#1).
* * Note: A param is implicitly a local (with the same index space), and param indices preceed the local indices.
* * Note: Params may also be modified (just like locals and globals). They do not affect the value in the caller (because of pass-by-value).
* * All expressions have optional type annotation (which will be `any` for now).
* * All functions are top level and don't have closures (the closure is just treated as another parameter).
* * Top level code are put in the entry point function.
* *
* * There are no constant/variable declarations - semantic analyser should hoist all declarations:
* * Semantic analyser should generate a struct to put all 'local variables' into, and place this struct in Func::locals. Same for global variables, put them in a single struct that is in Program::globals.
* * If params need to be captured, semantic analyser should copy them into the struct too.
* * IR optimisation passes might pull out fields and put them in separate local variables (can create a new struct type so we don't disturb the old one, then dead code elimination can remove the old struct).
* * There is no difference between constant declarations and variable declarations in the IR.
* *
* * Pre-generated functions can be something like `+(any, any) -> any`, which will internally query the type of its arguments and then forward it to the `Add` primitive or the builtin concat(string, string) function.
* *
* * todo!: For optimisation of eliding storage locals in gc roots during a function call, we need to have information about the lifetime of each local. This IR should be modified to accomodate it.
* * * Or we could do some control flow analysis to figure out when the value in a variable is actually useful.
* * * * E.g. if the current statement never leads to any reads (before being overwritten), then this variable doesn't contain useful information.
* * * It should also figure out if a variable is read but never written between two function calls, then we can know if we need to pop then push it back to the gc roots, or just tee it from the gc roots into locals.
* * todo!: Also, functions should be annotated with a flag whether they might do heap allocations.
*/
pub mod error;
pub mod opt;
pub mod superset;
// mod primfunc;
// If it stores value `func_idx`, then it refers to imports[func_idx] if (func_idx < imports.len())
// or funcs[func_idx - imports.len()] otherwise.
pub type FuncIdx = usize;
#[derive(Debug, Default)]
pub struct Program {
pub struct_types: Vec<Box<[VarType]>>, // stores the list of fields of all structs (i.e. objects) in the program (indexed with typeidx)
pub imports: Box<[Import]>, // list of imported functions
pub funcs: Vec<Func>, // list of functions (some will be pre-generated for the pre-declared operators, e.g. + - * / % === and more)
pub globals: Vec<VarType>, // list of global variables
pub entry_point: FuncIdx, // index of function to run when the program is started
}
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
pub enum VarType {
Any, // used if we don't know the type contained in the variable. Most of the time we will use this. Generates a variant in the output program unless it gets optimised away.
Unassigned, // unassigned (due to hoisting)
Undefined,
Number,
Boolean,
String, // reference type
Func, // holds a function ptr and a closure
StructT { typeidx: usize }, // reference type; typeid starts from zero and should be in range [0, object_types.len()).
}
impl Default for VarType {
fn default() -> Self {
VarType::Any
}
}
impl VarType {
pub fn tag(self) -> i32 {
match self {
VarType::Any => panic!(
"ICE: IR->Wasm: Cannot query tag of Any - tag is the i32 value encoded in an Any"
),
VarType::Unassigned => 0,
VarType::Undefined => 1,
VarType::Number => 2,
VarType::Boolean => 3,
VarType::String => 4,
VarType::Func => 5,
VarType::StructT { typeidx } => (NUM_PRIMITIVE_TAG_TYPES + typeidx) as i32,
}
}
}
pub const NUM_PRIMITIVE_TAG_TYPES: usize = 6; // does not include Any
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Hash, Debug)]
pub struct Import {
pub module_name: String,
pub entity_name: String,
pub params: Box<[ImportValType]>, // list of function parameters
pub result: ImportValType, // return type
}
// Types that can be imported (subset of VarType)
#[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Hash, Debug)]
pub enum ImportValType {
Undefined, // compiles into nothing
Number, // compiles into f64 parameter
String, // compiles into i32(ptr) parameter, the host should look into our linear memory to figure out the length and the actual string content.
}
#[derive(Debug)]
pub struct Func {
pub params: Box<[VarType]>, // list of function parameters (including closure)
pub result: Option<VarType>, // if `None`, it means that this function never returns (e.g. it guarantees to trap or infinite loop, see the generated runtime error function)
pub expr: Expr, // body of the function, must either return Void or return the correct result type
pub signature_filter: Vec<(Box<[VarType]>, VarType, FuncIdx)>, // list of possibly acceptable signatures (param_types, return_type, constrained_func).
// If a signature is not in this list, then it will be guaranteed to error;
// but converse need not be true. All entries must be a subtype of `params`.
// `constrained_func` (funcidx) is a version of this function that has the specified param_types and return_type of this entry.
// (i.e. if the caller can guarantee to have the correct types,
// then it can emit code to call the constrained_func instead of the current one)
// this list should not contain the entry where all the param types and return type are identical to the current one
// (because there is no use for a self-reference)
}
#[derive(Debug, Clone)]
pub struct Expr {
pub vartype: Option<VarType>, // the type set that this Expr is guaranteed to evaluate to (if unknown, just use ValType::Any). Users of this expression will generate code that only works on this type. It may also affect the memory layout of the expr. Use `None` if this function is guaranteed to never return (aka it returns Void).
pub kind: ExprKind, // the variant kind of this expression
}
// Represents any lvalue (assignable value)
#[derive(Debug, Clone)]
pub enum TargetExpr {
// if the field is a struct, then `next` can (but not necessarily) refer to a field inside the struct
Global {
globalidx: usize,
next: Option<Box<StructField>>,
}, // for targetting a global variable
Local {
localidx: usize,
next: Option<Box<StructField>>,
}, // for targetting a local variable
}
#[derive(Debug, Clone)]
pub struct StructField {
pub typeidx: usize, // the struct type id (index into struct_types)
pub fieldidx: usize, // the index of the referred field in the struct
pub next: Option<Box<StructField>>, // if the field is a struct, then this can (but not necessarily) refer to a field inside the struct
}
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct OverloadEntry {
// pub signature: Box<[VarType]>, // this does not include the closure param
pub funcidx: FuncIdx,
pub has_closure_param: bool, // true if the closure value shall be passed into this overload
// (the first parameter of a function that has closure must be the static type of PrimFunc::closure)
}
#[derive(Debug, Clone)]
pub enum ExprKind {
PrimUndefined, // also functions as a "no-op"
PrimNumber {
val: f64,
}, // e.g. `2`
PrimBoolean {
val: bool,
}, // e.g. `true`
PrimString {
val: String,
}, // e.g. `"hello world"`, may be placed in a region of memory immune to garbage collection
PrimStructT {
typeidx: usize,
}, // a struct (Any will be set to Unassigned variant; String, Func::closure, StructT will be set to something that the GC can recognise as a "null pointer" for that VarType)
PrimFunc {
funcidxs: Box<[OverloadEntry]>, // overload set, matched in priority from back to front. Backend shall coalesce identical callstubs whenever possible.
closure: Box<Expr>, // Closure, that must be a pointer (that could be null) or undefined (encoded as a GC undefined/null, and no overload should want a closure param), and will be type-erased (the pointer requirement allows the GC to understand it)
}, // e.g. `() => {}`. Only the given funcidx will know the type of the closure.
TypeCast {
/*
Encodes a narrowing conversion; effectively does something like this:
if typeof(test) == expected {
let fresh = cast<expected>(test); // this line is only present if `create_narrow_local` is true
<true_expr>
}
else {
<false_expr>
}
We expect the TypeCast expression to be recognised and manipulated/eliminated by a type inference engine.
*/
test: Box<Expr>,
expected: VarType,
create_narrow_local: bool,
true_expr: Box<Expr>, // if `create_narrow_local` == true, will have an additional local implicitly declared and initialized to the one in `test`, but with the correct static type.
false_expr: Box<Expr>,
}, // returns whatever the branches can return, like a conditional. Generated by compiler at an if-stmt or function call. Also in is_number() etc builtins.
VarName {
source: TargetExpr,
}, // something that is located somewhere in memory (i.e. an lvalue), e.g. `x`. Note: even if `x` has type `any`, the Expr::vartype of this expr can still be more specific, if the compiler can prove its correctness. i.e. we can load from a variant without checking the tag, if we already proved that the data must be of a particular VarType.
PrimAppl {
prim_inst: PrimInst,
args: Box<[Expr]>,
}, // primitive operations (e.g. number+number) hardcoded into the compiler. Expr must have the correct VarType. Should not be added directly by semantic analyser, because parser is not type-aware.
Appl {
func: Box<Expr>,
args: Box<[Expr]>,
location: SourceLocation, // will be displayed in the error message if the signature mismatches
}, // function application (operators are functions too). Closure parameter is implicitly prepended to the argument list. Called using Source indirect calling convention (closure, length, and callerid as i32 params; others are Any and on unprotected stack). Static type of func must be func.
DirectAppl {
funcidx: FuncIdx,
args: Box<[Expr]>,
}, // direct function application (operators are functions too). No closure will be prepended.
Conditional {
cond: Box<Expr>,
true_expr: Box<Expr>,
false_expr: Box<Expr>,
}, // a ? b : c // Static type of cond must be bool.
Declaration {
local: VarType, // local variable being declared
init: Option<Box<Expr>>, // initialization expr for this declaration (if None, then it is default initialized in a way determined by the GC); the declaration itself is not accessible here
contained_expr: Box<Expr>, // expr in which the newly declared local is accessible
}, // has the type of the `expr` (returns the value of the contained expr)
Assign {
target: TargetExpr,
expr: Box<Expr>,
}, // Assigment statement (can assign from specific type to Any, but not vice versa); has Undefined type
Return {
expr: Box<Expr>,
}, // Return statmenent; has Void type
Break {
num_frames: usize, // the number of Blocks and Loops to jump out of; zero refers to the Block or Loop closest to this Break
expr: Box<Expr>, // the expr that the target Block should return, should not be noreturn, and should fit into the vartype of the target Block.
}, // jumps to the end of an enclosing Block or the beginning of an enclosing Loop; Break is noreturn
Block {
expr: Box<Expr>,
}, // Jump landing for Break; type must be at least as wide as expr.vartype and all Breaks that target this block
Sequence {
content: Vec<Expr>,
}, // returns the value of the last expression, or `undefined` if there are zero expressions
Trap {
code: u32,
location: SourceLocation, // will be displayed in the error message
}, // has Void type
}
// enum of possible primitive functions, used by pre-declared operators, or added during type-checking optimisation
// these functions expect a particular type signature
// this is subject to change
// but the code that converts IR to Wasm needs to use this to generate the appropriate bytecode.
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
pub enum PrimInst {
NumberAdd,
NumberSub,
NumberMul,
NumberDiv,
NumberRem,
NumberEq,
NumberNeq,
NumberGt,
NumberLt,
NumberGe,
NumberLe,
BooleanEq,
BooleanNeq,
BooleanAnd,
BooleanOr,
BooleanNot,
NumberNegate,
StringAdd,
StringEq,
StringNeq,
StringGt,
StringLt,
StringGe,
StringLe,
}
pub const NUM_PRIM_INST: u8 = PrimInst::StringLe as u8 + 1;
// enum of pre-declared operators
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
#[repr(u8)]
pub enum Builtin {
Plus,
Minus,
Times,
Divide,
Modulo,
Eq,
Neq,
Gt,
Lt,
Ge,
Le,
And,
Or,
Not,
UnaryMinus,
}
pub const NUM_BUILTINS: u8 = Builtin::UnaryMinus as u8 + 1;
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Position {
pub line: u32,
pub column: u32,
}
/*
Represents a location in a source file.
*/
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct SourceLocation {
pub file: u32,
pub start: Position,
pub end: Position,
}
impl Program {
// Creates an empty Program, and an array of pre-declared operators that can be used.
// `funcs` will have pre-declared operators, but might also have other primitive functions (e.g. typed version of pre-declared operators).
// The caller should only use the functions that match the funcidxs specified in the returned array of pre-declared operators.
// Other things in the `funcs` array should not be used.
pub fn new_with_imports(imports: Box<[Import]>) -> Program {
let mut program = Program {
struct_types: Default::default(),
imports: imports,
funcs: Default::default(),
globals: Default::default(),
entry_point: Default::default(),
};
//primfunc::add_prim_inst(program);
program
/*let (funcs, builtin_funcidxs) = primfunc::make_pregenerated_funcs(imports.len());
let mut ir_imports: Vec<Import> = Vec::new();
let mut returned_map: HashMap<String, FuncIdx> = HashMap::new();
for (s, i) in imports {
let idx: FuncIdx = ir_imports.len();
ir_imports.push(i);
returned_map.insert(s, idx);
}
(
Program {
struct_types: Default::default(),
imports: ir_imports.into_boxed_slice(),
funcs: funcs,
globals: Default::default(),
entry_point: Default::default(),
},
builtin_funcidxs,
returned_map,
)*/
}
/**
* Insert func with proper import offset. Returns the ir::FuncIdx (with proper offset)
*/
pub fn add_func(&mut self, func: Func) -> FuncIdx {
let ret = self.imports.len() + self.funcs.len();
self.funcs.push(func);
ret
}
pub fn get_func(&self, funcidx: FuncIdx) -> &Func {
assert!(funcidx >= self.imports.len());
&self.funcs[funcidx - self.imports.len()]
}
pub fn get_func_mut(&mut self, funcidx: FuncIdx) -> &mut Func {
assert!(funcidx >= self.imports.len());
&mut self.funcs[funcidx - self.imports.len()]
}
}
impl Func {
/**
* Creates a dummy function that should be replaced later.
*/
pub fn new() -> Func {
Func {
params: Box::new([]),
result: None,
expr: Expr {
vartype: Some(VarType::Undefined),
kind: ExprKind::PrimUndefined,
},
signature_filter: Default::default(),
}
}
pub fn new_with_params_and_result(params: &[VarType], result: VarType) -> Func {
Func {
params: params.into(),
result: Some(result),
expr: Expr {
vartype: Some(VarType::Undefined),
kind: ExprKind::PrimUndefined,
},
signature_filter: Default::default(),
}
}
pub fn signature(&self) -> (&[VarType], Option<VarType>) {
(&self.params, self.result)
}
}
impl PrimInst {
// returns the (param_types, result_type)
// if result is None, then it is guaranteed to never return
pub fn signature(&self) -> (&'static [VarType], Option<VarType>) {
match self {
Self::NumberAdd
| Self::NumberSub
| Self::NumberMul
| Self::NumberDiv
| Self::NumberRem => (&[VarType::Number, VarType::Number], Some(VarType::Number)),
Self::NumberEq
| Self::NumberNeq
| Self::NumberGt
| Self::NumberLt
| Self::NumberGe
| Self::NumberLe => (&[VarType::Number, VarType::Number], Some(VarType::Boolean)),
Self::BooleanEq | Self::BooleanNeq | Self::BooleanAnd | Self::BooleanOr => (
&[VarType::Boolean, VarType::Boolean],
Some(VarType::Boolean),
),
Self::BooleanNot => (&[VarType::Boolean], Some(VarType::Boolean)),
Self::NumberNegate => (&[VarType::Number], Some(VarType::Number)),
Self::StringAdd => (&[VarType::String, VarType::String], Some(VarType::String)),
Self::StringEq
| Self::StringNeq
| Self::StringGt
| Self::StringLt
| Self::StringGe
| Self::StringLe => (&[VarType::String, VarType::String], Some(VarType::Boolean)),
}
}
}
impl From<ImportValType> for VarType {
fn from(x: ImportValType) -> Self {
match x {
ImportValType::Undefined => VarType::Undefined,
ImportValType::Number => VarType::Number,
ImportValType::String => VarType::String,
}
}
}
impl Expr {
pub fn is_prim_undefined(&self) -> bool {
if let Expr {
vartype: Some(VarType::Undefined),
kind: ExprKind::PrimUndefined,
} = self
{
true
} else {
false
}
}
}