diff --git a/Makefile b/Makefile index 678bb2f..e26bcea 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ protect: .PHONY: README README: - $(PY) readme.py + $(PY) readme.py || echo "==> Failed to create README.md: ./main does not exist" .PHONY: test test: build diff --git a/README.md b/README.md index 95c6a67..6d5b6c0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ![CI Status](https://github.com/ethanuppal/cs3110_compiler/actions/workflows/ci.yaml/badge.svg) > "x86 is simple trust me bro" -> Last updated: 2024-05-07 11:28:52.101596 +> Last updated: 2024-05-08 22:10:33.402786 ``` $ ./main -h diff --git a/lib/core/util.ml b/lib/core/util.ml index 156226c..a7769cb 100644 --- a/lib/core/util.ml +++ b/lib/core/util.ml @@ -54,6 +54,15 @@ let merge_paths paths = |> List.filter (( = ) "" >> not) |> String.concat "/" +(** [basename path] is largest suffix of [path] not containing the character + ['/']. *) +let basename = + let rec basename_aux = function + | [] | '/' :: _ -> "" + | head :: tail -> basename_aux tail ^ String.make 1 head + in + String.to_seq >> List.of_seq >> List.rev >> basename_aux + (** [pp_of string_of] is a pretty printer for a type with the string conversion function [string_of] that simply prints the result of [string_of] inline. *) let pp_of string_of fmt x = Format.fprintf fmt "%s" (string_of x) diff --git a/lib/frontend/analysis.ml b/lib/frontend/analysis.ml index 0d205cb..4c6a25d 100644 --- a/lib/frontend/analysis.ml +++ b/lib/frontend/analysis.ml @@ -1,195 +1,222 @@ open Ast +open Util + +type analysis_error_info = + | GeneralInfo + | NameInfo of { symbol : string } + | TypeInfo of + [ `Mismatch of Type.t * Type.t + | `InvalidSig of string * Type.t list + | `DerefRValue of Type.t + ] exception - TypeInferenceError of { - domain : string; - symbol : string option; + AnalyzerError of { + info : analysis_error_info; + msg : string option; ast : (expr, stmt) Either.t; - unify : (Type.t * Type.t) option; } +let general_error ?(msg = "") ast = + AnalyzerError + { info = GeneralInfo; msg = (if msg = "" then None else Some msg); ast } + +let name_error symbol ?(msg = "") ast = + AnalyzerError + { + info = NameInfo { symbol }; + msg = (if msg = "" then None else Some msg); + ast; + } + +let type_mismatch_error exp_ty act_ty ?(msg = "") ast = + AnalyzerError + { + info = TypeInfo (`Mismatch (exp_ty, act_ty)); + msg = (if msg = "" then None else Some msg); + ast; + } + +let type_sig_error name invalid_tys ?(msg = "") ast = + AnalyzerError + { + info = TypeInfo (`InvalidSig (name, invalid_tys)); + msg = (if msg = "" then None else Some msg); + ast; + } + +let deref_rval_error ty ?(msg = "") ast = + AnalyzerError + { + info = TypeInfo (`DerefRValue ty); + msg = (if msg = "" then None else Some msg); + ast; + } + +(** [bind_var_to_type ctx name ty ast] binds [name] to have [ty] in [ctx] as + called in analyzing node [ast]. + + @raise AnalyzerError if [name] has already been bound in the top scope. *) +let bind_name_to_type ctx name ty ast = + match Context.get_local ctx name with + | None -> Context.insert ctx name ty + | Some _ -> raise (name_error name ~msg:"invalid redeclaration" ast) + +(** [get_var_type ctx name ast] is the type of [name] in [ctx] as called in + analyzing node [ast]. + + @raise AnalyzerError if [name] is not bound in [ctx]. *) +let get_var_type ctx name ast = + match Context.get ctx name with + | Some ty -> ty + | None -> raise (name_error name ~msg:"unbound variable" ast) + +let analyzer_error_to_string info msg _ = + let msg_str = + match msg with + | None -> "" + | Some msg -> ": " ^ msg + in + match info with + | GeneralInfo -> "Analyzer error" ^ msg_str + | NameInfo { symbol } -> "Name error" ^ msg_str ^ ": '" ^ symbol ^ "'" + | TypeInfo issue -> + let start_str = "Type error" ^ msg_str in + let rest_str = + match issue with + | `Mismatch (exp_ty, act_ty) -> + Printf.sprintf "expected %s but received %s" (Type.to_string exp_ty) + (Type.to_string act_ty) + | `InvalidSig (token, invalid_tys) -> + Printf.sprintf + "no overload for '%s' exists with parameter types (%s)" token + (invalid_tys |> List.map Type.to_string |> String.concat ", ") + | `DerefRValue ty -> + Printf.sprintf "cannot dereference r-value of type %s" + (Type.to_string ty) + in + start_str ^ ": " ^ rest_str + let () = Printexc.register_printer (function - | TypeInferenceError { domain; symbol; ast = _; unify } -> - let result = Printf.sprintf "Type %s error" domain in - let result = - match symbol with - | None -> result - | Some symbol -> result ^ " (symbol='" ^ symbol ^ "')" - in - let result = - match unify with - | None -> result - | Some (lhs, rhs) -> - result - ^ Printf.sprintf ": attempt to unify %s and %s" - (Type.to_string lhs) (Type.to_string rhs) - in - Some result + | AnalyzerError { info; msg; ast } -> + Some (analyzer_error_to_string info msg ast) | _ -> None) -(** After [infer_expr ctx hint expr], [expr] will be assigned a type based on - [ctx] and [hint]. +(** [infer_expr ctx expr] is the type [expr] will be assigned a type based on + [ctx]. - @raise TypeInferenceError on failure. *) -let rec infer_expr ctx hint expr = - match expr with - | Var var -> - (match Context.get ctx var.name with - | Some _ -> () - | None -> - raise - (TypeInferenceError - { - domain = "resolution"; - symbol = Some var.name; - ast = Left expr; - unify = None; - })); - var.ty <- Context.get ctx var.name; - if var.ty = None then - raise - (TypeInferenceError - { - domain = "resolution"; - symbol = Some var.name; - ast = Left expr; - unify = None; - }) - else if hint <> None && var.ty <> hint then - raise - (TypeInferenceError - { - domain = "unification"; - symbol = Some var.name; - ast = Left expr; - unify = Some (Option.get hint, Option.get var.ty); - }) - | ConstInt _ -> ( - match hint with - | None | Some (Type.Primitive Int63) | Some Any -> () - | Some other -> - raise - (TypeInferenceError - { - domain = "unification"; - symbol = None; - ast = Left expr; - unify = Some (other, Type.Primitive Int63); - })) - | ConstBool _ -> ( - match hint with - | None | Some (Type.Primitive Bool) | Some Any -> () - | Some other -> - raise - (TypeInferenceError - { - domain = "unification"; - symbol = None; - ast = Left expr; - unify = Some (other, Type.Primitive Bool); - })) - | Infix infix -> - (match infix.op with - | Plus | Minus | Times | Divide | Mod | BitAnd -> - infer_expr ctx (Some Type.int_prim_type) infix.lhs; - infer_expr ctx (Some Type.int_prim_type) infix.rhs; - infix.ty <- Some Type.int_prim_type - | Equals -> - infer_expr ctx None infix.lhs; - infer_expr ctx None infix.rhs; - infix.ty <- Some Type.bool_prim_type); - if hint <> None && hint <> Some Type.any_type && hint <> infix.ty then - raise - (TypeInferenceError - { - domain = "unification"; - symbol = None; - ast = Left expr; - unify = Some (Option.get hint, Option.get infix.ty); - }) - | Prefix prefix -> ( - match prefix.op with - | Plus | Minus -> - infer_expr ctx (Some Type.int_prim_type) prefix.rhs; - prefix.ty <- Some Type.int_prim_type - | Times -> - infer_expr ctx (Some Type.(Pointer any_type)) prefix.rhs; - prefix.ty <- Option.map Type.deref (type_of_expr prefix.rhs) - | BitAnd -> - if expr_is_const prefix.rhs then - raise - (TypeInferenceError - { - domain = "combination"; - symbol = None; - ast = Left prefix.rhs; - unify = None; - }); - infer_expr ctx None prefix.rhs; - prefix.ty <- - Some (Type.Pointer (Option.get (type_of_expr prefix.rhs))) - | _ -> + Side Effect: [expr.ty] will be updated to reflect the resulting type. + + @raise AnalyzerError on failure. *) +let rec infer_expr (ctx : Type.t Context.t) expr = + let infer_expr_aux expr = + match expr with + | Var var -> var.ty <- Some (get_var_type ctx var.name (Left expr)) + | ConstInt _ -> () + | ConstBool _ -> () + | Infix infix -> ( + let lhs_ty = infer_expr ctx infix.lhs in + let rhs_ty = infer_expr ctx infix.rhs in + let raise_error () = raise - (TypeInferenceError - { - domain = "combination"; - symbol = None; - ast = Left expr; - unify = None; - })) - -(** @raise TypeInferenceError on failure. *) -let rec infer_stmt ctx stmt = - match stmt with + (type_sig_error (op_to_string infix.op) [ lhs_ty; rhs_ty ] + (Left expr)) + in + match infix.op with + | Plus | Minus | Times | Divide | Mod -> + if lhs_ty = Type.int_prim_type && rhs_ty = Type.int_prim_type then + infix.ty <- Some Type.int_prim_type + else raise_error () + | Equals -> + if lhs_ty = rhs_ty then infix.ty <- Some Type.bool_prim_type + else raise_error () + | _ -> raise_error ()) + | Prefix prefix -> ( + let rhs_ty = infer_expr ctx prefix.rhs in + let raise_error () = + raise (type_sig_error (op_to_string prefix.op) [ rhs_ty ] (Left expr)) + in + match prefix.op with + | Plus | Minus -> + if rhs_ty = Type.int_prim_type then + prefix.ty <- Some Type.int_prim_type + else raise_error () + | Times -> ( + match Type.deref rhs_ty with + | Some ty -> prefix.ty <- Some ty + | None -> raise_error ()) + | BitAnd -> + if expr_is_const prefix.rhs then + raise (deref_rval_error rhs_ty (Left expr)); + prefix.ty <- Some (Type.Pointer rhs_ty) + | _ -> raise_error ()) + in + infer_expr_aux expr; + Option.get (type_of_expr expr) + +(* TODO: add Terminal and Nonterminal checks *) + +(** [infer_stmt ctx stmt] is the type [stmt] will be assigned a type based on + [ctx]. + + @raise AnalyzerError on failure. *) +let rec infer_stmt (ctx : Type.t Context.t) stmt = + (match stmt with | Declaration { name; hint; expr } -> - (infer_expr ctx hint expr; - match (hint, type_of_expr expr) with - | None, None -> - raise - (TypeInferenceError - { - domain = "resolution"; - symbol = Some name; - ast = Right stmt; - unify = None; - }) - | Some _, None | None, Some _ -> () - | Some t1, Some t2 -> - if t1 <> t2 && t1 <> Type.any_type then - raise - (TypeInferenceError - { - domain = "unification"; - symbol = Some name; - ast = Right stmt; - unify = Some (t1, t2); - })); - Context.insert ctx name (type_of_expr expr |> Option.get) - | Print expr -> infer_expr ctx None expr - | Function { name = _; body } -> infer body + let expr_ty = infer_expr ctx expr in + (match hint with + | None -> () + | Some hint_ty -> + if hint_ty <> expr_ty then + raise + (type_mismatch_error hint_ty expr_ty ~msg:"in let statement" + (Right stmt))); + bind_name_to_type ctx name expr_ty (Right stmt) + | Print expr -> infer_expr ctx expr |> ignore + | Function _ -> + raise + (general_error ~msg:"functions can only be written at top level" + (Right stmt)) | If { cond; body } -> - infer_expr ctx (Some Type.bool_prim_type) cond; - infer body + let cond_ty = infer_expr ctx cond in + if cond_ty <> Type.bool_prim_type then + raise + (type_mismatch_error Type.bool_prim_type cond_ty + ~msg:"in if statement condition" (Right stmt)); + infer_body ctx body | Assignment (name, expr) -> - let hint = - match Context.get ctx name with - | None -> - raise - (TypeInferenceError - { - domain = "resolution"; - symbol = Some name; - ast = Right stmt; - unify = None; - }) - | Some hint -> hint - in - infer_expr ctx (Some hint) expr - | Call _ -> failwith "Call case not implemented" + let exp_ty = get_var_type ctx name (Right stmt) in + let expr_ty = infer_expr ctx expr in + if exp_ty <> expr_ty then + raise + (type_mismatch_error exp_ty expr_ty + ~msg:"variable types cannot be modified" (Right stmt)) + | Call _ -> failwith "not impl"); + Type.Nonterminal -and infer prog = - let ctx = Context.make () in +(* TODO: final statement always needs to be Terminal *) +and infer_body ctx stmts = Context.push ctx; - List.iter (infer_stmt ctx) prog; + List.iter (infer_stmt ctx >> ignore) stmts; Context.pop ctx + +let infer prog = + let ctx : Type.t Context.t = Context.make () in + Context.push ctx; + prog + |> List.iter (fun stmt -> + match stmt with + | Function { name; params; return; body } -> + let fun_ty = Type.FunctionType { params; return } in + bind_name_to_type ctx name fun_ty (Right stmt); + Context.push ctx; + List.iter + (fun (name, ty) -> bind_name_to_type ctx name ty (Right stmt)) + params; + infer_body ctx body; + Context.pop ctx + | _ -> + raise + (general_error ~msg:"only functions can be written at top level" + (Right stmt))) diff --git a/lib/frontend/analysis.mli b/lib/frontend/analysis.mli index db8c0f6..77456d2 100644 --- a/lib/frontend/analysis.mli +++ b/lib/frontend/analysis.mli @@ -1,13 +1,21 @@ +type analysis_error_info = + | GeneralInfo + | NameInfo of { symbol : string } + | TypeInfo of + [ `Mismatch of Type.t * Type.t + | `InvalidSig of string * Type.t list + | `DerefRValue of Type.t + ] + exception - TypeInferenceError of { - domain : string; - symbol : string option; + AnalyzerError of { + info : analysis_error_info; + msg : string option; ast : (Ast.expr, Ast.stmt) Either.t; - unify : (Type.t * Type.t) option; } (** After [infer prog], the contents of [prog] will have resolved types based on inference rules. - @raise TypeInferenceError on failure. *) + @raise AnalyzerError on failure. *) val infer : Ast.prog -> unit diff --git a/lib/frontend/ast.ml b/lib/frontend/ast.ml index a5a7f5a..8b3fa10 100644 --- a/lib/frontend/ast.ml +++ b/lib/frontend/ast.ml @@ -48,6 +48,8 @@ and stmt = | Assignment of string * expr | Function of { name : string; + params : (string * Type.t) list; + return : Type.t; body : stmt list; } | Print of expr @@ -105,8 +107,12 @@ let stmt_to_string = in "let " ^ name ^ hint_str ^ " = " ^ expr_to_string expr | Assignment (name, expr) -> name ^ " = " ^ expr_to_string expr - | Function { name; body } -> - "func " ^ name ^ "() {\n" + | Function { name; params; return; body } -> + "func " ^ name ^ "(" + ^ (params + |> List.map (fun (name, ty) -> name ^ ": " ^ Type.to_string ty) + |> String.concat ", ") + ^ ") -> " ^ Type.to_string return ^ " {\n" ^ (body |> List.map (stmt_to_string_aux (indent ^ add_indent)) |> String.concat "") @@ -128,60 +134,8 @@ let pp_op fmt = let open Util in op_to_string >> Format.pp_print_string fmt -let rec pp_expr fmt = function - | Var { name; _ } -> Format.pp_print_string fmt name - | ConstInt i -> Format.pp_print_int fmt i - | ConstBool b -> Format.pp_print_bool fmt b - | Infix { lhs; op; rhs; _ } -> - Format.pp_print_string fmt "("; - pp_expr fmt lhs; - Format.pp_print_string fmt " "; - pp_op fmt op; - Format.pp_print_string fmt " "; - pp_expr fmt rhs; - Format.pp_print_string fmt ")" - | Prefix { op; rhs; _ } -> - Format.pp_print_string fmt "("; - pp_op fmt op; - pp_expr fmt rhs; - Format.pp_print_string fmt ")" - -let rec pp_stmt fmt = function - | Call name -> Format.fprintf fmt "%s()" name - | Declaration { name; hint; expr } -> - Format.fprintf fmt "let %s%s = " name - (let expr_type = type_of_expr expr in - let display_type = if expr_type = None then hint else expr_type in - match display_type with - | Some ty -> ": " ^ Type.to_string ty - | None -> ""); - pp_expr fmt expr - | Assignment (name, expr) -> - Format.fprintf fmt "%s = " name; - pp_expr fmt expr - | Function { name; body } -> - Format.fprintf fmt "func %s() {" name; - (* Go down a line and indent by two *) - Format.pp_print_break fmt 0 2; - Format.pp_force_newline fmt (); - Format.pp_open_hvbox fmt 0; - Format.pp_print_list pp_stmt fmt body; - Format.pp_close_box fmt (); - Format.pp_print_cut fmt (); - Format.pp_print_string fmt "}" - | If { cond; body } -> - Format.fprintf fmt "if %s {" (expr_to_string cond); - (* Go down a line and indent by two *) - Format.pp_print_break fmt 0 2; - Format.pp_force_newline fmt (); - Format.pp_open_hvbox fmt 0; - Format.pp_print_list pp_stmt fmt body; - Format.pp_close_box fmt (); - Format.pp_print_cut fmt (); - Format.pp_print_string fmt "}" - | Print e -> - Format.pp_print_string fmt "print "; - pp_expr fmt e +let pp_expr = Util.pp_of expr_to_string +let pp_stmt = Util.pp_of stmt_to_string let pp_prog fmt prog = Format.pp_open_vbox fmt 0; diff --git a/lib/frontend/context.ml b/lib/frontend/context.ml index 9dfa2f0..b55b7d6 100644 --- a/lib/frontend/context.ml +++ b/lib/frontend/context.ml @@ -22,4 +22,7 @@ let get ctx key = in get_aux !ctx +let get_local ctx key = + if is_empty ctx then None else Scope.find_opt (top ctx) key + let to_list ctx = !ctx |> List.map (Scope.to_seq >> List.of_seq) diff --git a/lib/frontend/context.mli b/lib/frontend/context.mli index e2b0c8d..06326b0 100644 --- a/lib/frontend/context.mli +++ b/lib/frontend/context.mli @@ -28,6 +28,10 @@ val insert : 'a t -> string -> 'a -> unit top of the stack in which [key] appears, or [None] if [key] is not bound. *) val get : 'a t -> string -> 'a option +(** [get_local ctx key] is the value associated with [key] in the top scope in + [ctx], or [None] of [key] is not bound. *) +val get_local : 'a t -> string -> 'a option + (** [to_list ctx] is [ctx] as a list of key-value pair lists, where each list is a scope. Scopes that were pushed later are earlier in the result. *) val to_list : 'a t -> (string * 'a) list list diff --git a/lib/frontend/ir_gen.ml b/lib/frontend/ir_gen.ml index 070692a..342ba7f 100644 --- a/lib/frontend/ir_gen.ml +++ b/lib/frontend/ir_gen.ml @@ -93,7 +93,9 @@ and generate_stmt_lst ctx cfg block lst = let generate prog = match prog with - | Function { name = "main"; body } :: _ -> + | Function { name = "main"; params; return; body } :: _ -> + if not (List.is_empty params) then failwith "fix params in ir gen"; + if return <> Type.unit_prim_type then failwith "fix return in ir gen"; let ctx = Context.make () in Context.push ctx; let cfg = Cfg.make "main" in diff --git a/lib/frontend/lexer.mll b/lib/frontend/lexer.mll index c23c52f..a583157 100644 --- a/lib/frontend/lexer.mll +++ b/lib/frontend/lexer.mll @@ -1,12 +1,14 @@ { open Parser + + exception LexerError of string } rule read = parse | eof { EOF } | [' ' '\t' '\r'] { read lexbuf } -| '\n' { NEWLINE } -| "//" [^ '\n']* '\n' { NEWLINE } +| '\n' { Lexing.new_line lexbuf; NEWLINE } +| "//" [^ '\n']* { NEWLINE } | "==" { EQUALS } | '=' { ASSIGN } | '+' { PLUS } @@ -20,6 +22,7 @@ rule read = parse | '{' { LBRACE } | '}' { RBRACE } | ':' { COLON } +| ',' { COMMA } | "->" { ARROW } | "Int" { INT_TYPE } | "Bool" { BOOL_TYPE } @@ -31,5 +34,11 @@ rule read = parse | "while" { WHILE } | "true" { CONST_TRUE } | "false" { CONST_FALSE } -| ['a'-'z' 'A'-'Z' '_'] ['0'-'9' 'a'-'z' 'A'-'Z' '_']* { VAR (Lexing.lexeme lexbuf) } -| ['0'-'9']+ { CONST_INT (int_of_string (Lexing.lexeme lexbuf)) } +| ['a'-'z' 'A'-'Z' '_'] ['0'-'9' 'a'-'z' 'A'-'Z' '_']* { IDEN (Lexing.lexeme lexbuf) } +| ['0'-'9']+ { INT_LIT (int_of_string (Lexing.lexeme lexbuf)) } +| _ as c + { + let pos = Lexing.lexeme_start_p lexbuf in + let lnum, cnum = pos.pos_lnum, (pos.pos_cnum - pos.pos_bol + 1) in + raise (LexerError (Printf.sprintf "Lexer error: unrecognized character '%c' at %s:%d:%d" c pos.pos_fname lnum cnum )) + } diff --git a/lib/frontend/parse_lex.ml b/lib/frontend/parse_lex.ml index 256099a..fd2e633 100644 --- a/lib/frontend/parse_lex.ml +++ b/lib/frontend/parse_lex.ml @@ -1,13 +1,20 @@ -exception ParseError of string +exception ParserError of string -(** [lex_and_parse input] is the list of statements represented by the source - code string [input].contents +(** [lex_and_parse ~filename:filename input] is the list of statements + represented by the source code string [input]. Optionally, + [~filename:filename] can be passed to indicate that the path of the source + was [filename]; by default, it is [""]. - @raise ParseError on parsing error. *) -let lex_and_parse input = - let lexbuf = Lexing.from_string input in - try Parser.main Lexer.read lexbuf - with Parser.Error -> raise (ParseError "unknown parser error") - -(* https://baturin.org/blog/declarative-parse-error-reporting-with-menhir/ *) -(* https://stackoverflow.com/questions/38505920/get-the-input-string-that-raises-parsing-error-inside-the-parser *) + @raise ParserError on parsing error. *) +let lex_and_parse ?(filename = "") input = + let syntax_error_msg lexbuf = + let pos = Lexing.lexeme_start_p lexbuf in + let lnum, cnum = (pos.pos_lnum, pos.pos_cnum - pos.pos_bol + 1) in + Printf.sprintf "Syntax error at %s:%d:%d" pos.pos_fname lnum cnum + in + let parse lexbuf = Parser.main Lexer.read lexbuf in + let lexbuf = Lexing.from_string ~with_positions:true input in + Lexing.set_filename lexbuf filename; + try parse lexbuf with + | Parser.Error -> raise (ParserError (syntax_error_msg lexbuf)) + | Lexer.LexerError err -> raise (ParserError err) diff --git a/lib/frontend/parser.mly b/lib/frontend/parser.mly index fbb0ecd..ef303fd 100644 --- a/lib/frontend/parser.mly +++ b/lib/frontend/parser.mly @@ -2,11 +2,11 @@ open Ast %} -%token CONST_INT +%token INT_LIT %token CONST_TRUE CONST_FALSE -%token VAR +%token IDEN %token PLUS MINUS TIMES DIVIDE MOD EQUALS BITAND -%token LPAR RPAR LBRACE RBRACE COLON ARROW +%token LPAR RPAR LBRACE RBRACE COLON ARROW COMMA %token PRINT ASSIGN LET FUNC IF ELSE WHILE %token NEWLINE EOF %token INT_TYPE BOOL_TYPE @@ -28,24 +28,24 @@ main: | stmt NEWLINE main { $1 :: $3 } ty: - | INT_TYPE { Type.Primitive Int63 } - | BOOL_TYPE { Type.Primitive Bool } + | INT_TYPE { Type.Prim Int } + | BOOL_TYPE { Type.Prim Bool } | ty TIMES { Type.Pointer ($1) } - | VAR { Type.Var $1 } + | IDEN { Type.Var $1 } expr: | LPAR expr RPAR { $2 } - | CONST_INT { ConstInt $1 } + | INT_LIT { ConstInt $1 } | CONST_TRUE { ConstBool true } | CONST_FALSE { ConstBool false } - | VAR { Var {name = $1; ty = None} } + | IDEN { Var {name = $1; ty = None} } | expr PLUS expr { Infix {lhs = $1; op = Plus; rhs = $3; ty = None} } | expr MINUS expr { Infix {lhs = $1; op = Minus; rhs = $3; ty = None} } | expr TIMES expr { Infix {lhs = $1; op = Times; rhs = $3; ty = None} } | expr DIVIDE expr { Infix {lhs = $1; op = Divide; rhs = $3; ty = None} } | expr MOD expr { Infix {lhs = $1; op = Mod; rhs = $3; ty = None} } | expr EQUALS expr { Infix {lhs = $1; op = Equals; rhs = $3; ty = None} } - | expr BITAND expr { Infix {lhs = $1; op = BitAnd; rhs = $3; ty = None} } + | BITAND expr { Prefix {op = BitAnd; rhs = $2; ty = None} } | PLUS expr { Prefix {op = Plus; rhs = $2; ty = None} } | MINUS expr { Prefix {op = Minus; rhs = $2; ty = None} } | TIMES expr { Prefix {op = Times; rhs = $2; ty = None} } @@ -56,11 +56,17 @@ body_till_rbrace: | stmt RBRACE {[$1]} | stmt NEWLINE body_till_rbrace { $1 :: $3 } +param: + | IDEN COLON ty { ($1, $3) } + +return_type: + | ARROW ty { $2 } + stmt: | IF expr LBRACE body_till_rbrace { If {cond = $2; body = $4 } } - | VAR LPAR RPAR { Call $1 } - | LET VAR COLON ty ASSIGN expr { Declaration {name = $2; hint = Some ($4); expr = $6} } - | LET VAR ASSIGN expr { Declaration {name = $2; hint = None; expr = $4} } - | VAR ASSIGN expr { Assignment ($1, $3) } - | FUNC VAR LPAR RPAR LBRACE body_till_rbrace { Function {name = $2; body = $6} } + | IDEN LPAR RPAR { Call $1 } + | LET IDEN COLON ty ASSIGN expr { Declaration {name = $2; hint = Some ($4); expr = $6} } + | LET IDEN ASSIGN expr { Declaration {name = $2; hint = None; expr = $4} } + | IDEN ASSIGN expr { Assignment ($1, $3) } + | FUNC; name = IDEN; LPAR; params = separated_list(COMMA, param); RPAR; return_opt = option(return_type); LBRACE; body = body_till_rbrace { Function {name; params; return = if return_opt = None then Type.unit_prim_type else Option.get (return_opt); body} } | PRINT expr { Print $2 } diff --git a/lib/frontend/type.ml b/lib/frontend/type.ml index e5c39d6..89b842e 100644 --- a/lib/frontend/type.ml +++ b/lib/frontend/type.ml @@ -1,13 +1,17 @@ +open Util + module Primitive = struct - (** [t] represents primitive type. *) + (** [t] represents a primitive type. *) type t = - | Int63 + | Int | Bool + | Unit (** [to_string prim_type] is the string representation of [prim_type]. *) let to_string = function - | Int63 -> "Int" + | Int -> "Int" | Bool -> "Bool" + | Unit -> "Unit" end type stmt_type = @@ -16,25 +20,30 @@ type stmt_type = (** [t] represents a type. *) type t = - | Primitive of Primitive.t + | Prim of Primitive.t | Pointer of t | Var of string - | Any + | FunctionType of { + params : (string * t) list; + return : t; + } (** [to_string ty] is the string representation of [ty]. *) let rec to_string = function - | Primitive prim -> Primitive.to_string prim + | Prim prim -> Primitive.to_string prim | Pointer ty -> to_string ty ^ "*" | Var tvar -> tvar - | Any -> "?" - -let int_prim_type = Primitive Int63 -let bool_prim_type = Primitive Bool -let any_type = Any + | FunctionType { params; return } -> + "(" + ^ (params |> List.map (snd >> to_string) |> String.concat ", ") + ^ ") -> " ^ to_string return -(** [deref ty] is [ty'] where [ty = Pointer ty'] for some [ty']. +let int_prim_type = Prim Int +let bool_prim_type = Prim Bool +let unit_prim_type = Prim Unit - Requires: [ty] is of the above form. *) +(** [deref ty] is [Some ty'] if [ty = Pointer ty'] for some [ty'] and [None] + otherwise. *) let deref = function - | Pointer ty -> ty - | _ -> failwith "precondition" + | Pointer ty' -> Some ty' + | _ -> None diff --git a/lib/user/driver.ml b/lib/user/driver.ml index edd3d91..8a5c7c5 100644 --- a/lib/user/driver.ml +++ b/lib/user/driver.ml @@ -24,7 +24,7 @@ let compile paths _ = Printf.printf "assumes [paths] has one file, ignores flags\n"; let source = Util.read_file (List.hd paths) in try - let statements = Parse_lex.lex_and_parse source in + let statements = Parse_lex.lex_and_parse ~filename:(List.hd paths) source in Analysis.infer statements; let ir = Ir_gen.generate statements in let main_cfg = List.hd ir in @@ -32,9 +32,7 @@ let compile paths _ = let simulator = Ir_sim.make () in Ir_sim.run simulator main_cfg; print_string (Ir_sim.output_of simulator) - with Parse_lex.ParseError msg -> print_error (msg ^ "\n") - -(* let was_passed flag = List.mem flag flags in *) + with Parse_lex.ParserError msg -> print_error (msg ^ "\n") let main args = let parse = Cli.parse args in diff --git a/test/snapshot/snapshot.ml b/test/snapshot/snapshot.ml index 4fd60d5..01e694d 100644 --- a/test/snapshot/snapshot.ml +++ b/test/snapshot/snapshot.ml @@ -1,7 +1,7 @@ open Alcotest open X86ISTMB -type transform = (string -> string) * speed_level +type transform = (string -> string -> string) * Alcotest.speed_level let ignore_file_name = "IGNORE" @@ -33,12 +33,12 @@ let make_test_suite root suite (transform_f, speed) = let input = read_file input_path in let expected = read_file output_path in try - let actual = transform_f input in + let actual = transform_f (Util.basename input_path) input in (check string) "Using the given input transformer should yield matching output to the \ expected." expected actual - with Parse_lex.ParseError msg -> fail msg + with Parse_lex.ParserError msg -> fail msg in let suite_name = Util.merge_paths [ root; suite ] in let snapshot_tests = diff --git a/test/snapshot/snapshot.mli b/test/snapshot/snapshot.mli index 3491940..60f2c22 100644 --- a/test/snapshot/snapshot.mli +++ b/test/snapshot/snapshot.mli @@ -1,7 +1,7 @@ -(** Let [(f, _)] be of type [transform]. Then, [f contents] is the result of - transforming the string [contents] (which will usually come from a snapshot - file) *) -type transform = (string -> string) * Alcotest.speed_level +(** Let [(f, _)] be of type [transform]. Then, [f filename contents] is the + result of transforming the string [contents] as contained in the snapshot + file [filename] *) +type transform = (string -> string -> string) * Alcotest.speed_level (** [make_test_suite root suite f] is a snapshot test suit from snapshots in [root/suite] using snapshot transformation [f] (see the documentation for diff --git a/test/snapshots/basic/IGNORE b/test/snapshots/basic/IGNORE index 9931458..882d8aa 100644 --- a/test/snapshots/basic/IGNORE +++ b/test/snapshots/basic/IGNORE @@ -1 +1 @@ -shadow1 +shadow0 diff --git a/test/snapshots/basic/comment1.in b/test/snapshots/basic/comment0.in similarity index 100% rename from test/snapshots/basic/comment1.in rename to test/snapshots/basic/comment0.in diff --git a/test/snapshots/basic/comment1.out b/test/snapshots/basic/comment0.out similarity index 100% rename from test/snapshots/basic/comment1.out rename to test/snapshots/basic/comment0.out diff --git a/test/snapshots/basic/shadow1.in b/test/snapshots/basic/shadow0.in similarity index 100% rename from test/snapshots/basic/shadow1.in rename to test/snapshots/basic/shadow0.in diff --git a/test/snapshots/basic/shadow1.out b/test/snapshots/basic/shadow0.out similarity index 100% rename from test/snapshots/basic/shadow1.out rename to test/snapshots/basic/shadow0.out diff --git a/test/snapshots/parse/parse0.in b/test/snapshots/parse/parse0.in new file mode 100644 index 0000000..a527aad --- /dev/null +++ b/test/snapshots/parse/parse0.in @@ -0,0 +1,2 @@ +// this is lexically incorrect at line 2, col 1 +@ diff --git a/test/snapshots/parse/parse0.out b/test/snapshots/parse/parse0.out new file mode 100644 index 0000000..af3ad16 --- /dev/null +++ b/test/snapshots/parse/parse0.out @@ -0,0 +1 @@ +Lexer error: unrecognized character '@' at parse0.in:2:1 diff --git a/test/snapshots/parse/parse1.in b/test/snapshots/parse/parse1.in new file mode 100644 index 0000000..1b52ade --- /dev/null +++ b/test/snapshots/parse/parse1.in @@ -0,0 +1,2 @@ +// this is syntactically incorrect at line 2, col 1 +) diff --git a/test/snapshots/parse/parse1.out b/test/snapshots/parse/parse1.out new file mode 100644 index 0000000..e0f2097 --- /dev/null +++ b/test/snapshots/parse/parse1.out @@ -0,0 +1 @@ +Syntax error at parse1.in:2:1 diff --git a/test/snapshots/type/type0.in b/test/snapshots/type/type0.in index ddf6c12..5ab39b4 100644 --- a/test/snapshots/type/type0.in +++ b/test/snapshots/type/type0.in @@ -1 +1,3 @@ -print true +func main() { + print true +} diff --git a/test/snapshots/type/type0.out b/test/snapshots/type/type0.out index ddf6c12..bc050e7 100644 --- a/test/snapshots/type/type0.out +++ b/test/snapshots/type/type0.out @@ -1 +1,3 @@ -print true +func main() -> Unit { + print true +} diff --git a/test/snapshots/type/type1.in b/test/snapshots/type/type1.in index 3f2421e..093c103 100644 --- a/test/snapshots/type/type1.in +++ b/test/snapshots/type/type1.in @@ -1,3 +1,5 @@ -let x = 1 -let y = 2 -let z = x + y +func main() { + let x = 1 + let y = 2 + let z = x + y +} diff --git a/test/snapshots/type/type1.out b/test/snapshots/type/type1.out index debe7a9..fc3b724 100644 --- a/test/snapshots/type/type1.out +++ b/test/snapshots/type/type1.out @@ -1,3 +1,5 @@ -let x: Int = 1 -let y: Int = 2 -let z: Int = (x + y) +func main() -> Unit { + let x: Int = 1 + let y: Int = 2 + let z: Int = (x + y) +} diff --git a/test/snapshots/type/type10.in b/test/snapshots/type/type10.in index 2e20394..f09e6af 100644 --- a/test/snapshots/type/type10.in +++ b/test/snapshots/type/type10.in @@ -1,2 +1,4 @@ -let x = 5 -x = 5 + y +func main() { + let x = 5 + x = 5 + y +} diff --git a/test/snapshots/type/type10.out b/test/snapshots/type/type10.out index bca4237..ede9528 100644 --- a/test/snapshots/type/type10.out +++ b/test/snapshots/type/type10.out @@ -1 +1 @@ -Type resolution error (symbol='y') +Name error: unbound variable: 'y' diff --git a/test/snapshots/type/type11.in b/test/snapshots/type/type11.in new file mode 100644 index 0000000..0547b3d --- /dev/null +++ b/test/snapshots/type/type11.in @@ -0,0 +1 @@ +let x = 1 diff --git a/test/snapshots/type/type11.out b/test/snapshots/type/type11.out new file mode 100644 index 0000000..e1e89ff --- /dev/null +++ b/test/snapshots/type/type11.out @@ -0,0 +1 @@ +Analyzer error: only functions can be written at top level diff --git a/test/snapshots/type/type12.in b/test/snapshots/type/type12.in new file mode 100644 index 0000000..6ad0ec1 --- /dev/null +++ b/test/snapshots/type/type12.in @@ -0,0 +1,12 @@ +func foo(a: Int) { + print a +} +func bar(a: Int, b: Bool) { + print a + print b +} +func baz(a: Bool, b: Int, c: Int*) { + print a + print b + print c +} diff --git a/test/snapshots/type/type12.out b/test/snapshots/type/type12.out new file mode 100644 index 0000000..1943fe2 --- /dev/null +++ b/test/snapshots/type/type12.out @@ -0,0 +1,12 @@ +func foo(a: Int) -> Unit { + print a +} +func bar(a: Int, b: Bool) -> Unit { + print a + print b +} +func baz(a: Bool, b: Int, c: Int*) -> Unit { + print a + print b + print c +} diff --git a/test/snapshots/type/type13.in b/test/snapshots/type/type13.in new file mode 100644 index 0000000..5439139 --- /dev/null +++ b/test/snapshots/type/type13.in @@ -0,0 +1,5 @@ +func foo(a: Int) { +} +func bar() { + print a +} diff --git a/test/snapshots/type/type13.out b/test/snapshots/type/type13.out new file mode 100644 index 0000000..cab224c --- /dev/null +++ b/test/snapshots/type/type13.out @@ -0,0 +1 @@ +Name error: unbound variable: 'a' diff --git a/test/snapshots/type/type2.in b/test/snapshots/type/type2.in index 4721ebf..f8da65f 100644 --- a/test/snapshots/type/type2.in +++ b/test/snapshots/type/type2.in @@ -1,2 +1,4 @@ -let x = 4 -let y = *4 +func main() { + let x = 4 + let y = *4 +} diff --git a/test/snapshots/type/type2.out b/test/snapshots/type/type2.out index aaf59a8..e6ea8b3 100644 --- a/test/snapshots/type/type2.out +++ b/test/snapshots/type/type2.out @@ -1 +1 @@ -Type unification error: attempt to unify ?* and Int +Type error: no overload for '*' exists with parameter types (Int) diff --git a/test/snapshots/type/type3.in b/test/snapshots/type/type3.in index 0e6fb9a..081af11 100644 --- a/test/snapshots/type/type3.in +++ b/test/snapshots/type/type3.in @@ -1,3 +1,5 @@ -let x = 1 -let y = true -print x + y +func main() { + let x = 1 + let y = true + print x + y +} diff --git a/test/snapshots/type/type3.out b/test/snapshots/type/type3.out index 3bb0c00..9fd2fc8 100644 --- a/test/snapshots/type/type3.out +++ b/test/snapshots/type/type3.out @@ -1 +1 @@ -Type unification error (symbol='y'): attempt to unify Int and Bool +Type error: no overload for '+' exists with parameter types (Int, Bool) diff --git a/test/snapshots/type/type4.in b/test/snapshots/type/type4.in index e821fa4..0938daa 100644 --- a/test/snapshots/type/type4.in +++ b/test/snapshots/type/type4.in @@ -1 +1,3 @@ -print x +func main() { + print x +} diff --git a/test/snapshots/type/type4.out b/test/snapshots/type/type4.out index 3367484..57ce13d 100644 --- a/test/snapshots/type/type4.out +++ b/test/snapshots/type/type4.out @@ -1 +1 @@ -Type resolution error (symbol='x') +Name error: unbound variable: 'x' diff --git a/test/snapshots/type/type5.in b/test/snapshots/type/type5.in index 4ce6970..e534cfb 100644 --- a/test/snapshots/type/type5.in +++ b/test/snapshots/type/type5.in @@ -1,4 +1,6 @@ -let x = true +func bar() { + let x = true +} func foo() { let x = 4 print +x diff --git a/test/snapshots/type/type5.out b/test/snapshots/type/type5.out index 112ebb1..f3442c0 100644 --- a/test/snapshots/type/type5.out +++ b/test/snapshots/type/type5.out @@ -1,5 +1,7 @@ -let x: Bool = true -func foo() { +func bar() -> Unit { + let x: Bool = true +} +func foo() -> Unit { let x: Int = 4 print (+x) } diff --git a/test/snapshots/type/type6.in b/test/snapshots/type/type6.in index 3e02516..8539797 100644 --- a/test/snapshots/type/type6.in +++ b/test/snapshots/type/type6.in @@ -1,5 +1,7 @@ -let x = 1 func foo() { let x = 1 } -print x +func main() { + let x = 1 + print x +} diff --git a/test/snapshots/type/type6.out b/test/snapshots/type/type6.out index 6008b04..349d676 100644 --- a/test/snapshots/type/type6.out +++ b/test/snapshots/type/type6.out @@ -1,5 +1,7 @@ -let x: Int = 1 -func foo() { +func foo() -> Unit { let x: Int = 1 } -print x +func main() -> Unit { + let x: Int = 1 + print x +} diff --git a/test/snapshots/type/type7.in b/test/snapshots/type/type7.in index 89f40b6..5e4711b 100644 --- a/test/snapshots/type/type7.in +++ b/test/snapshots/type/type7.in @@ -1,4 +1,6 @@ func foo() { let x = 1 } -print x +func main() { + print x +} diff --git a/test/snapshots/type/type7.out b/test/snapshots/type/type7.out index 3367484..57ce13d 100644 --- a/test/snapshots/type/type7.out +++ b/test/snapshots/type/type7.out @@ -1 +1 @@ -Type resolution error (symbol='x') +Name error: unbound variable: 'x' diff --git a/test/snapshots/type/type8.in b/test/snapshots/type/type8.in index 9e328c3..5d4b3fa 100644 --- a/test/snapshots/type/type8.in +++ b/test/snapshots/type/type8.in @@ -1 +1,3 @@ -let x = (1 == 2) +func main() { + let x = (1 == 2) +} diff --git a/test/snapshots/type/type8.out b/test/snapshots/type/type8.out index 561d7a0..ecaa43c 100644 --- a/test/snapshots/type/type8.out +++ b/test/snapshots/type/type8.out @@ -1 +1,3 @@ -let x: Bool = (1 == 2) +func main() -> Unit { + let x: Bool = (1 == 2) +} diff --git a/test/snapshots/type/type9.in b/test/snapshots/type/type9.in index 6f74c72..d32f143 100644 --- a/test/snapshots/type/type9.in +++ b/test/snapshots/type/type9.in @@ -1,3 +1,5 @@ -if 1 + 2 { - let x = 5 +func main() { + if 1 + 2 { + let x = 5 + } } diff --git a/test/snapshots/type/type9.out b/test/snapshots/type/type9.out index 11421a5..fc3c2cc 100644 --- a/test/snapshots/type/type9.out +++ b/test/snapshots/type/type9.out @@ -1 +1 @@ -Type unification error: attempt to unify Bool and Int +Type error: in if statement condition: expected Bool but received Int diff --git a/test/test_snapshots.ml b/test/test_snapshots.ml index 9596383..3fd1b49 100644 --- a/test/test_snapshots.ml +++ b/test/test_snapshots.ml @@ -1,24 +1,24 @@ let snapshots_root = "test/snapshots" let type_suite = - let transform input = + let transform filename input = let open X86ISTMB in try - let statements = Parse_lex.lex_and_parse input in + let statements = Parse_lex.lex_and_parse ~filename input in Analysis.infer statements; statements |> List.map Ast.stmt_to_string |> String.concat "" with - | Analysis.TypeInferenceError err -> - Printexc.to_string (Analysis.TypeInferenceError err) ^ "\n" + | Analysis.AnalyzerError err -> + Printexc.to_string (Analysis.AnalyzerError err) ^ "\n" | e -> raise e in Snapshot.make_test_suite snapshots_root "type" (transform, `Quick) -(** [ir_transform] is the result of running the IR simulator on the source code - [input]. *) -let ir_transform input = +(** [ir_transform filename input] is the result of running the IR simulator on + the source code [input]. *) +let ir_transform filename input = let open X86ISTMB in - let statements = Parse_lex.lex_and_parse input in + let statements = Parse_lex.lex_and_parse ~filename input in Analysis.infer statements; let ir = Ir_gen.generate statements in let main_cfg = List.hd ir in @@ -33,3 +33,13 @@ let ir_suite = (** not sure why this is separate from [ir_suite]. *) let basic_suite = Snapshot.make_test_suite snapshots_root "basic" (ir_transform, `Quick) + +let parse_transform filename input = + let open X86ISTMB in + try + Parse_lex.lex_and_parse ~filename input |> ignore; + "" + with Parse_lex.ParserError err -> err ^ "\n" + +let parse_suite = + Snapshot.make_test_suite snapshots_root "parse" (parse_transform, `Quick) diff --git a/test/test_util.ml b/test/test_util.ml index f4b59f4..364a276 100644 --- a/test/test_util.ml +++ b/test/test_util.ml @@ -1,27 +1,37 @@ +open Alcotest +open X86ISTMB + +let test_merge_paths () = + (check string) "An empty list yields the empty path." "" (Util.merge_paths []); + (check string) "A singleton list of an empty string yields the empty path." "" + (Util.merge_paths [ "" ]); + (check string) "A list of empty strings yields the empty path." "" + (Util.merge_paths [ ""; ""; "" ]); + (check string) "Slashes are preserved at the front." "/" + (Util.merge_paths [ "/" ]); + (check string) "Slashes are preserved at the front." "/a" + (Util.merge_paths [ "/a" ]); + (check string) "Slashes are preserved at the front." "/a/b" + (Util.merge_paths [ "/a"; "b" ]); + (check string) + "Path components should be trimmed of internal slashes and a single slash \ + inserted between" + "a/b/c/d/e/f" + (Util.merge_paths [ "a"; "/b"; "c/"; "/d/"; "e"; "f" ]) + +let test_basename () = + (check string) "Empty path is preserved" "" (Util.basename ""); + (check string) "Filenames are preserved" "foo" (Util.basename "foo"); + (check string) "Last component is extracted" "foo" (Util.basename "/foo"); + (check string) "Last component is extracted" "foo" (Util.basename "bar/foo"); + (check string) "Last component is extracted" "foo.baz" + (Util.basename "bar/foo.baz"); + (check string) "Last component is extracted" "foo.baz" + (Util.basename "bop/bong/birp/bar/foo.baz") + let test_suite = - let open Alcotest in - let open X86ISTMB in - let test_merge_paths = - let test () = - (check string) "An empty list yields the empty path." "" - (Util.merge_paths []); - (check string) - "A singleton list of an empty string yields the empty path." "" - (Util.merge_paths [ "" ]); - (check string) "A list of empty strings yields the empty path." "" - (Util.merge_paths [ ""; ""; "" ]); - (check string) "Slashes are preserved at the front." "/" - (Util.merge_paths [ "/" ]); - (check string) "Slashes are preserved at the front." "/a" - (Util.merge_paths [ "/a" ]); - (check string) "Slashes are preserved at the front." "/a/b" - (Util.merge_paths [ "/a"; "b" ]); - (check string) - "Path components should be trimmed of internal slashes and a single \ - slash inserted between" - "a/b/c/d/e/f" - (Util.merge_paths [ "a"; "/b"; "c/"; "/d/"; "e"; "f" ]) - in - test_case "Util.merge_paths" `Quick test - in - ("lib/util.ml", [ test_merge_paths ]) + ( "lib/util.ml", + [ + test_case "Util.merge_paths" `Quick test_merge_paths; + test_case "Util.test_basename" `Quick test_basename; + ] ) diff --git a/test/test_x86ISTMB.ml b/test/test_x86ISTMB.ml index 65c1263..a4c3ad4 100644 --- a/test/test_x86ISTMB.ml +++ b/test/test_x86ISTMB.ml @@ -54,6 +54,7 @@ let () = Test_snapshots.ir_suite; Test_snapshots.type_suite; Test_snapshots.basic_suite; + Test_snapshots.parse_suite; Test_liveliness.test_suite; Test_passes.test_suite; Test_digraph.test_suite;