From 52611d266330d1c7ff7c5f5f5779f9b148961235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=A1s=20Ojeda=20B=C3=A4r?= Date: Thu, 3 Nov 2022 18:51:46 +0100 Subject: [PATCH] Multicore support for RISC-V (#11418) --- asmcomp/riscv/emit.mlp | 256 ++++++--- asmcomp/riscv/proc.ml | 25 +- asmcomp/riscv/selection.ml | 3 + runtime/caml/stack.h | 8 + runtime/riscv.S | 1053 ++++++++++++++++++++++++------------ 5 files changed, 915 insertions(+), 430 deletions(-) diff --git a/asmcomp/riscv/emit.mlp b/asmcomp/riscv/emit.mlp index 76b9b7ba239..2d8b5f168f8 100644 --- a/asmcomp/riscv/emit.mlp +++ b/asmcomp/riscv/emit.mlp @@ -52,9 +52,6 @@ let slot_offset env loc cls = (* Output a symbol *) -let emit_symbol s = - emit_symbol '$' s - let emit_jump op s = if !Clflags.dlcode || !Clflags.pic_code then `{emit_string op} {emit_symbol s}@plt` @@ -81,11 +78,14 @@ let rodata_space = (* Names for special regs *) -let reg_tmp = phys_reg 23 -let reg_t2 = phys_reg 16 -let reg_domain_state_ptr = phys_reg 26 -let reg_trap = phys_reg 24 -let reg_alloc_ptr = phys_reg 25 +let reg_tmp = phys_reg 23 (* t1 *) +let reg_tmp2 = phys_reg 22 (* t0 *) +let reg_t2 = phys_reg 16 (* t2 *) +let reg_domain_state_ptr = phys_reg 26 (* s11 *) +let reg_trap_ptr = phys_reg 24 (* s1 *) +let reg_alloc_ptr = phys_reg 25 (* s10 *) +let reg_stack_arg_begin = phys_reg 9 (* s3 *) +let reg_stack_arg_end = phys_reg 10 (* s4 *) (* Output a pseudo-register *) @@ -96,48 +96,58 @@ let reg_name = function let emit_reg r = emit_string (reg_name r) -(* Adjust sp by the given byte amount *) +(* Adjust sp by the given byte amount, clobbers reg_tmp *) + +let emit_stack_adjustment n = + if n <> 0 then begin + if is_immediate n then + ` addi sp, sp, {emit_int n}\n` + else begin + ` li {emit_reg reg_tmp}, {emit_int n}\n`; + ` add sp, sp, {emit_reg reg_tmp}\n` + end; + cfi_adjust_cfa_offset (-n) + end -let emit_stack_adjustment = function - | 0 -> () - | n when is_immediate n -> - ` addi sp, sp, {emit_int n}\n` - | n -> - ` li {emit_reg reg_tmp}, {emit_int n}\n`; - ` add sp, sp, {emit_reg reg_tmp}\n` +(* Output add-immediate instruction, clobbers reg_tmp2 *) -(* Adjust stack_offset and emit corresponding CFI directive *) +let emit_addimm rd rs n = + if is_immediate n then + ` addi {emit_reg rd}, {emit_reg rs}, {emit_int n}\n` + else begin + ` li {emit_reg reg_tmp2}, {emit_int n}\n`; + ` add {emit_reg rd}, {emit_reg rs}, {emit_reg reg_tmp2}\n` + end -let adjust_stack_offset env delta = - env.stack_offset <- env.stack_offset + delta; - cfi_adjust_cfa_offset delta +(* Output memory operation with a possibly non-immediate offset, + clobbers reg_tmp *) -let emit_mem_op ?(base = "sp") op src ofs = +let emit_mem_op op reg ofs addr = if is_immediate ofs then - ` {emit_string op} {emit_string src}, {emit_int ofs}({emit_string base})\n` + ` {emit_string op} {emit_string reg}, {emit_int ofs}({emit_string addr})\n` else begin ` li {emit_reg reg_tmp}, {emit_int ofs}\n`; - ` add {emit_reg reg_tmp}, {emit_string base}, {emit_reg reg_tmp}\n`; - ` {emit_string op} {emit_string src}, 0({emit_reg reg_tmp})\n` + ` add {emit_reg reg_tmp}, {emit_string addr}, {emit_reg reg_tmp}\n`; + ` {emit_string op} {emit_string reg}, 0({emit_reg reg_tmp})\n` end let reload_ra n = - emit_mem_op "ld" "ra" (n - size_addr) + emit_mem_op "ld" "ra" (n - 8) "sp" let store_ra n = - emit_mem_op "sd" "ra" (n - size_addr) + emit_mem_op "sd" "ra" (n - 8) "sp" -let emit_store ?base src ofs = - emit_mem_op ?base "sd" (reg_name src) ofs +let emit_store rs ofs rd = + emit_mem_op "sd" (reg_name rs) ofs rd -let emit_load ?base dst ofs = - emit_mem_op ?base "ld" (reg_name dst) ofs +let emit_load rd ofs rs = + emit_mem_op "ld" (reg_name rd) ofs rs -let emit_float_load ?base dst ofs = - emit_mem_op ?base "fld" (reg_name dst) ofs +let emit_float_load rd ofs rs = + emit_mem_op "fld" (reg_name rd) ofs rs -let emit_float_store ?base src ofs = - emit_mem_op ?base "fsd" (reg_name src) ofs +let emit_float_store rs ofs rd = + emit_mem_op "fsd" (reg_name rs) ofs rd (* Record live pointers at call points *) @@ -243,10 +253,9 @@ let emit_instr env i = assert (env.f.fun_prologue_required); let n = frame_size env in emit_stack_adjustment (-n); - if n > 0 then cfi_adjust_cfa_offset n; if env.f.fun_contains_calls then begin store_ra n; - cfi_offset ~reg:1 (* ra *) ~offset:(-size_addr) + cfi_offset ~reg:1 (* ra *) ~offset:(-8) end; | Lop(Imove | Ispill | Ireload) -> let src = i.arg.(0) and dst = i.res.(0) in @@ -260,16 +269,16 @@ let emit_instr env i = ` fmv.x.d {emit_reg dst}, {emit_reg src}\n` | {loc = Reg _; typ = (Val | Int | Addr)}, {loc = Stack s} -> let (base, ofs) = slot_offset env s (register_class dst) in - emit_store ~base src ofs + emit_store src ofs base | {loc = Reg _; typ = Float}, {loc = Stack s} -> let (base, ofs) = slot_offset env s (register_class dst) in - emit_float_store ~base src ofs + emit_float_store src ofs base | {loc = Stack s; typ = (Val | Int | Addr)}, {loc = Reg _} -> let (base, ofs) = slot_offset env s (register_class src) in - emit_load ~base dst ofs + emit_load dst ofs base | {loc = Stack s; typ = Float}, {loc = Reg _} -> let (base, ofs) = slot_offset env s (register_class src) in - emit_float_load ~base dst ofs + emit_float_load dst ofs base | {loc = Stack _}, {loc = Stack _} | {loc = Unknown}, _ | _, {loc = Unknown} -> Misc.fatal_error "Emit: Imove" @@ -302,20 +311,43 @@ let emit_instr env i = emit_stack_adjustment n; ` {emit_tail func}\n` end - | Lop(Iextcall{func; alloc = true}) -> - ` la {emit_reg reg_t2}, {emit_symbol func}\n`; - ` {emit_call "caml_c_call"}\n`; - record_frame env i.live (Dbg_other i.dbg) - | Lop(Iextcall{func; alloc = false}) -> - ` {emit_call func}\n` + | Lop(Iextcall{func; alloc; stack_ofs}) -> + if stack_ofs > 0 then begin + ` mv {emit_reg reg_stack_arg_begin}, sp\n`; + ` addi {emit_reg reg_stack_arg_end}, sp, {emit_int (Misc.align stack_ofs 16)}\n`; + ` la {emit_reg reg_t2}, {emit_symbol func}\n`; + ` {emit_call "caml_c_call_stack_args"}\n`; + record_frame env i.live (Dbg_other i.dbg) + end else if alloc then begin + ` la {emit_reg reg_t2}, {emit_symbol func}\n`; + ` {emit_call "caml_c_call"}\n`; + record_frame env i.live (Dbg_other i.dbg) + end else begin + (* store ocaml stack in s0, which is marked as being destroyed + at noalloc calls *) + ` mv s0, sp\n`; + cfi_remember_state (); + cfi_def_cfa_register ~reg:21; + let ofs = Domainstate.(idx_of_field Domain_c_stack) * 8 in + ` ld sp, {emit_int ofs}({emit_reg reg_domain_state_ptr})\n`; + ` {emit_call func}\n`; + ` mv sp, s0\n`; + cfi_restore_state () + end | Lop(Istackoffset n) -> assert (n mod 16 = 0); emit_stack_adjustment (-n); - adjust_stack_offset env n - | Lop(Iload { memory_chunk = Single; addressing_mode = Iindexed ofs; _ } ) -> + env.stack_offset <- env.stack_offset + n + | Lop(Iload { memory_chunk = Single; addressing_mode = Iindexed ofs; is_atomic } ) -> + assert (not is_atomic); ` flw {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`; ` fcvt.d.s {emit_reg i.res.(0)}, {emit_reg i.res.(0)}\n` - | Lop(Iload { memory_chunk; addressing_mode = Iindexed ofs } ) -> + | Lop(Iload { memory_chunk = Word_int | Word_val; addressing_mode = Iindexed ofs; is_atomic } ) -> + if is_atomic then ` fence rw, rw\n`; + ` ld {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n`; + if is_atomic then ` fence r, rw\n` + | Lop(Iload { memory_chunk; addressing_mode = Iindexed ofs; is_atomic } ) -> + assert (not is_atomic); let instr = match memory_chunk with | Byte_unsigned -> "lbu" @@ -324,8 +356,7 @@ let emit_instr env i = | Sixteen_signed -> "lh" | Thirtytwo_unsigned -> "lwu" | Thirtytwo_signed -> "lw" - | Word_int | Word_val -> "ld" - | Single -> assert false + | Word_int | Word_val | Single -> assert false | Double -> "fld" in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg i.arg.(0)})\n` @@ -333,37 +364,50 @@ let emit_instr env i = (* ft0 is marked as destroyed for this operation *) ` fcvt.s.d ft0, {emit_reg i.arg.(0)}\n`; ` fsw ft0, {emit_int ofs}({emit_reg i.arg.(1)})\n` + | Lop(Istore((Word_int | Word_val), Iindexed ofs, assignement)) -> + if assignement then begin + ` fence r, w\n`; + ` sd {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n` + end else + ` sd {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n`; | Lop(Istore(chunk, Iindexed ofs, _)) -> let instr = match chunk with | Byte_unsigned | Byte_signed -> "sb" | Sixteen_unsigned | Sixteen_signed -> "sh" | Thirtytwo_unsigned | Thirtytwo_signed -> "sw" - | Word_int | Word_val -> "sd" - | Single -> assert false + | Word_int | Word_val | Single -> assert false | Double -> "fsd" in ` {emit_string instr} {emit_reg i.arg.(0)}, {emit_int ofs}({emit_reg i.arg.(1)})\n` | Lop(Ialloc {bytes; dbginfo}) -> let lbl_frame_lbl = record_frame_label env i.live (Dbg_alloc dbginfo) in - let lbl_after_alloc = new_label () in - let lbl_call_gc = new_label () in - let n = -bytes in - let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in - if is_immediate n then - ` addi {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_int n}\n` - else begin - ` li {emit_reg reg_tmp}, {emit_int n}\n`; - ` add {emit_reg reg_alloc_ptr}, {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}\n` - end; - ` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; - ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; - `{emit_label lbl_after_alloc}:\n`; - ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, {emit_int size_addr}\n`; - env.call_gc_sites <- - { gc_lbl = lbl_call_gc; - gc_return_lbl = lbl_after_alloc; - gc_frame_lbl = lbl_frame_lbl } :: env.call_gc_sites + if env.f.fun_fast then begin + let lbl_after_alloc = new_label () in + let lbl_call_gc = new_label () in + let n = -bytes in + let offset = Domainstate.(idx_of_field Domain_young_limit) * 8 in + emit_addimm reg_alloc_ptr reg_alloc_ptr n; + ` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; + ` bltu {emit_reg reg_alloc_ptr}, {emit_reg reg_tmp}, {emit_label lbl_call_gc}\n`; + `{emit_label lbl_after_alloc}:\n`; + ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n`; + env.call_gc_sites <- + { gc_lbl = lbl_call_gc; + gc_return_lbl = lbl_after_alloc; + gc_frame_lbl = lbl_frame_lbl } :: env.call_gc_sites + end else begin + begin match bytes with + | 16 -> ` {emit_call "caml_alloc1"}\n` + | 24 -> ` {emit_call "caml_alloc2"}\n` + | 32 -> ` {emit_call "caml_alloc3"}\n` + | _ -> + ` li {emit_reg reg_t2}, {emit_int bytes}\n`; + ` {emit_call "caml_allocN"}\n` + end; + `{emit_label lbl_frame_lbl}:\n`; + ` addi {emit_reg i.res.(0)}, {emit_reg reg_alloc_ptr}, 8\n` + end | Lop(Ipoll { return_label }) -> let lbl_frame_lbl = record_frame_label env i.live (Dbg_alloc []) in let lbl_after_poll = match return_label with @@ -438,8 +482,8 @@ let emit_instr env i = let instr = name_for_specific sop in ` {emit_string instr} {emit_reg i.res.(0)}, {emit_reg i.arg.(0)}, {emit_reg i.arg.(1)}, {emit_reg i.arg.(2)}\n` | Lop (Idls_get) -> - (* Here to maintain build *) - assert false + let ofs = Domainstate.(idx_of_field Domain_dls_root) * 8 in + ` ld {emit_reg i.res.(0)}, {emit_int ofs}({emit_reg reg_domain_state_ptr})\n` | Lreloadretaddr -> let n = frame_size env in reload_ra n @@ -505,7 +549,6 @@ let emit_instr env i = | Some lbl -> ` bgtz {emit_reg reg_tmp}, {emit_label lbl}\n` end | Lswitch jumptbl -> - (* t0 is marked as destroyed for this operation *) let lbl = new_label() in ` la {emit_reg reg_tmp}, {emit_label lbl}\n`; ` slli t0, {emit_reg i.arg.(0)}, 2\n`; @@ -520,32 +563,33 @@ let emit_instr env i = | Ladjust_trap_depth { delta_traps } -> (* each trap occupes 16 bytes on the stack *) let delta = 16 * delta_traps in - adjust_stack_offset env delta + cfi_adjust_cfa_offset delta; + env.stack_offset <- env.stack_offset + delta | Lpushtrap {lbl_handler} -> ` la {emit_reg reg_tmp}, {emit_label lbl_handler}\n`; ` addi sp, sp, -16\n`; - adjust_stack_offset env 16; - emit_store reg_tmp size_addr; - emit_store reg_trap 0; - ` mv {emit_reg reg_trap}, sp\n` + env.stack_offset <- env.stack_offset + 16; + ` sd {emit_reg reg_trap_ptr}, 0(sp)\n`; + ` sd {emit_reg reg_tmp}, 8(sp)\n`; + cfi_adjust_cfa_offset 16; + ` mv {emit_reg reg_trap_ptr}, sp\n` | Lpoptrap -> - emit_load reg_trap 0; + ` ld {emit_reg reg_trap_ptr}, 0(sp)\n`; ` addi sp, sp, 16\n`; - adjust_stack_offset env (-16) + cfi_adjust_cfa_offset (-16); + env.stack_offset <- env.stack_offset - 16 | Lraise k -> begin match k with | Lambda.Raise_regular -> - let offset = Domainstate.(idx_of_field Domain_backtrace_pos) * 8 in - ` sd zero, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; ` {emit_call "caml_raise_exn"}\n`; record_frame env Reg.Set.empty (Dbg_raise i.dbg) | Lambda.Raise_reraise -> - ` {emit_call "caml_raise_exn"}\n`; + ` {emit_call "caml_reraise_exn"}\n`; record_frame env Reg.Set.empty (Dbg_raise i.dbg) | Lambda.Raise_notrace -> - ` mv sp, {emit_reg reg_trap}\n`; - emit_load reg_tmp size_addr; - emit_load reg_trap 0; + ` mv sp, {emit_reg reg_trap_ptr}\n`; + ` ld {emit_reg reg_tmp}, 8(sp)\n`; + ` ld {emit_reg reg_trap_ptr}, 0(sp)\n`; ` addi sp, sp, 16\n`; ` jr {emit_reg reg_tmp}\n` end @@ -566,9 +610,47 @@ let fundecl fundecl = `{emit_symbol fundecl.fun_name}:\n`; emit_debug_info fundecl.fun_dbg; cfi_startproc(); + + (* Dynamic stack checking *) + let stack_threshold_size = Config.stack_threshold * 8 in (* bytes *) + let { max_frame_size; contains_nontail_calls } = + preproc_stack_check + ~fun_body:fundecl.fun_body ~frame_size:(frame_size env) ~trap_size:16 + in + let handle_overflow = ref None in + if contains_nontail_calls || max_frame_size >= stack_threshold_size then begin + let overflow = new_label () and ret = new_label () in + let threshold_offset = Domainstate.stack_ctx_words * 8 + stack_threshold_size in + let f = max_frame_size + threshold_offset in + let offset = Domainstate.(idx_of_field Domain_current_stack) * 8 in + ` ld {emit_reg reg_tmp}, {emit_int offset}({emit_reg reg_domain_state_ptr})\n`; + emit_addimm reg_tmp reg_tmp f; + ` bltu sp, {emit_reg reg_tmp}, {emit_label overflow}\n`; + `{emit_label ret}:\n`; + handle_overflow := Some (overflow, ret) + end; + emit_all env fundecl.fun_body; List.iter emit_call_gc env.call_gc_sites; List.iter emit_call_bound_error env.bound_error_sites; + + begin match !handle_overflow with + | None -> () + | Some (overflow, ret) -> + `{emit_label overflow}:\n`; + (* Pass the desired frame size on the stack, since all of the + argument-passing registers may be in use. *) + let s = Config.stack_threshold + max_frame_size / 8 in + ` li {emit_reg reg_tmp}, {emit_int s}\n`; + ` addi sp, sp, -16\n`; + ` sd {emit_reg reg_tmp}, 0(sp)\n`; + ` sd ra, 8(sp)\n`; + ` {emit_call "caml_call_realloc_stack"}\n`; + ` ld ra, 8(sp)\n`; + ` addi sp, sp, 16\n`; + ` j {emit_label ret}\n` + end; + cfi_endproc(); ` .size {emit_symbol fundecl.fun_name}, .-{emit_symbol fundecl.fun_name}\n`; (* Emit the float literals *) diff --git a/asmcomp/riscv/proc.ml b/asmcomp/riscv/proc.ml index 9a58eae194b..1e16f40fcf5 100644 --- a/asmcomp/riscv/proc.ml +++ b/asmcomp/riscv/proc.ml @@ -38,8 +38,7 @@ let word_addressed = false s2-s9 8-15 arguments/results (preserved by C) t2-t6 16-20 temporary s0 21 general purpose (preserved by C) - t0 22 temporary - t1 23 temporary (used by code generator) + t0, t1 22-23 temporaries (used by call veneers) s1 24 trap pointer (preserved by C) s10 25 allocation pointer (preserved by C) s11 26 domain pointer (preserved by C) @@ -87,7 +86,7 @@ let register_class r = | Val | Int | Addr -> 0 | Float -> 1 -let num_available_registers = [| 23; 32 |] +let num_available_registers = [| 22; 32 |] let first_available_register = [| 0; 100 |] @@ -241,25 +240,27 @@ let regs_are_volatile _ = false (* Registers destroyed by operations *) -let destroyed_at_c_call = - (* s0-s11 and fs0-fs11 are callee-save. However s2 needs to be in this - list since it is clobbered by caml_c_call itself. *) +let destroyed_at_c_noalloc_call = + (* s0-s11 and fs0-fs11 are callee-save, but s0 is + used to preserve OCaml sp. *) Array.of_list(List.map phys_reg - [0; 1; 2; 3; 4; 5; 6; 7; 8; 16; 17; 18; 19; 20; 22; + [0; 1; 2; 3; 4; 5; 6; 7; 16; 17; 18; 19; 20; 21 (* s0 *); 100; 101; 102; 103; 104; 105; 106; 107; 110; 111; 112; 113; 114; 115; 116; 117; 128; 129; 130; 131]) let destroyed_at_alloc = (* t0-t6 are used for PLT stubs *) - if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20; 22|] - else [| |] + if !Clflags.dlcode then Array.map phys_reg [|16; 17; 18; 19; 20|] + else [| phys_reg 16 |] (* t2 is used to pass the argument to caml_allocN *) let destroyed_at_oper = function - | Iop(Icall_ind | Icall_imm _ | Iextcall{alloc = true; _}) -> all_phys_regs - | Iop(Iextcall{alloc = false; _}) -> destroyed_at_c_call + | Iop(Icall_ind | Icall_imm _) -> all_phys_regs + | Iop(Iextcall{alloc; stack_ofs; _}) -> + assert (stack_ofs >= 0); + if alloc || stack_ofs > 0 then all_phys_regs + else destroyed_at_c_noalloc_call | Iop(Ialloc _) | Iop(Ipoll _) -> destroyed_at_alloc | Iop(Istore(Single, _, _)) -> [| phys_reg 100 |] - | Iswitch _ -> [| phys_reg 22 |] (* t0 *) | _ -> [||] let destroyed_at_raise = all_phys_regs diff --git a/asmcomp/riscv/selection.ml b/asmcomp/riscv/selection.ml index 767bdd613d1..0183cb329ff 100644 --- a/asmcomp/riscv/selection.ml +++ b/asmcomp/riscv/selection.ml @@ -57,6 +57,9 @@ method! select_operation op args dbg = (Ispecific (Imultsubf true), [arg1; arg2; arg3]) | (Cnegf, [Cop(Caddf, [Cop(Cmulf, [arg1; arg2], _); arg3], _)]) -> (Ispecific (Imultaddf true), [arg1; arg2; arg3]) + | (Cstore (Word_int | Word_val as memory_chunk, Assignment), [arg1; arg2]) -> + (* Use trivial addressing mode for non-initializing stores *) + (Istore (memory_chunk, Iindexed 0, true), [arg2; arg1]) | _ -> super#select_operation op args dbg diff --git a/runtime/caml/stack.h b/runtime/caml/stack.h index 0c2e0b2feec..a9cf511003f 100644 --- a/runtime/caml/stack.h +++ b/runtime/caml/stack.h @@ -67,7 +67,15 @@ #endif #ifdef TARGET_riscv +/* Size of the gc_regs structure, in words. + See riscv.S and riscv/proc.ml for the indices */ +#define Wosize_gc_regs (2 + 22 /* int regs */ + 20 /* float regs */) #define Saved_return_address(sp) *((intnat *)((sp) - 8)) +/* RISC-V does not use a frame pointer, but requires the stack to be + 16-aligned, so when pushing the return address to the stack there + is an extra word of padding after it that needs to be skipped when + walking the stack. */ +#define Pop_frame_pointer(sp) sp += sizeof(value) #endif /* Declaration of variables used in the asm code */ diff --git a/runtime/riscv.S b/runtime/riscv.S index bf806250cb6..2f1210eb59d 100644 --- a/runtime/riscv.S +++ b/runtime/riscv.S @@ -18,15 +18,21 @@ #include "caml/m.h" -#define ARG_DOMAIN_STATE_PTR t0 #define DOMAIN_STATE_PTR s11 #define TRAP_PTR s1 #define ALLOC_PTR s10 -#define TMP t1 -#define ARG t2 +#define ADDITIONAL_ARG t2 +#define STACK_ARG_BEGIN s3 +#define STACK_ARG_END s4 +#define TMP t0 +#define TMP2 t1 -#define STORE sd -#define LOAD ld +#define C_ARG_1 a0 +#define C_ARG_2 a1 +#define C_ARG_3 a2 +#define C_ARG_4 a3 + +/* Support for CFI directives */ #if defined(ASM_CFI_SUPPORTED) #define CFI_STARTPROC .cfi_startproc @@ -34,12 +40,18 @@ #define CFI_ADJUST(n) .cfi_adjust_cfa_offset n #define CFI_REGISTER(r1,r2) .cfi_register r1,r2 #define CFI_OFFSET(r,n) .cfi_offset r,n +#define CFI_DEF_CFA_REGISTER(r) .cfi_def_cfa_register r +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state #else #define CFI_STARTPROC #define CFI_ENDPROC #define CFI_ADJUST(n) #define CFI_REGISTER(r1,r2) #define CFI_OFFSET(r,n) +#define CFI_DEF_CFA_REGISTER(r) +#define CFI_REMEMBER_STATE +#define CFI_RESTORE_STATE #endif .set domain_curr_field, 0 @@ -51,10 +63,13 @@ #define Caml_state(var) (8*domain_field_caml_##var)(DOMAIN_STATE_PTR) +/* Globals and labels */ +#define L(lbl) .L##lbl + #define FUNCTION(name) \ .align 2; \ .globl name; \ - .type name, @function; \ + .type name, @function; \ name:; \ CFI_STARTPROC @@ -70,369 +85,745 @@ name:; \ #define PLT(r) r #endif +#define OBJECT(name) \ + .data; \ + .align 3; \ + .globl name; \ + .type name, @object; \ +name: +#define END_OBJECT(name) \ + .size name, .-name + +/* Stack switching operations */ + +/* struct stack_info */ +#define Stack_sp(reg) 0(reg) +#define Stack_exception(reg) 8(reg) +#define Stack_handler(reg) 16(reg) +#define Stack_handler_from_cont(reg) 15(reg) + +/* struct c_stack_link */ +#define Cstack_stack(reg) 0(reg) +#define Cstack_sp(reg) 8(reg) +#define Cstack_prev(reg) 16(reg) + +/* struct stack_handler */ +#define Handler_value(reg) 0(reg) +#define Handler_exception(reg) 8(reg) +#define Handler_effect(reg) 16(reg) +#define Handler_parent(reg) 24(reg) + +/* Switch from OCaml to C stack. */ +.macro SWITCH_OCAML_TO_C + /* Fill in Caml_state->current_stack->sp */ + ld TMP, Caml_state(current_stack) + sd sp, Stack_sp(TMP) + /* Fill in Caml_state->c_stack */ + ld TMP2, Caml_state(c_stack) + sd TMP, Cstack_stack(TMP2) + sd sp, Cstack_sp(TMP2) + /* Switch to C stack */ + mv sp, TMP2 + CFI_REMEMBER_STATE +.endm + +/* Switch from C to OCaml stack. */ +.macro SWITCH_C_TO_OCAML + ld sp, Cstack_sp(sp) + CFI_RESTORE_STATE +.endm + +/* Save all of the registers that may be in use to a free gc_regs bucket + and store ALLOC_PTR and TRAP_PTR back to Caml_state + At the end the saved registers are placed in Caml_state(gc_regs) + */ +.macro SAVE_ALL_REGS + /* First, save the young_ptr & exn_handler */ + sd ALLOC_PTR, Caml_state(young_ptr) + sd TRAP_PTR, Caml_state(exn_handler) + /* Now, use TMP to point to the gc_regs bucket */ + ld TMP, Caml_state(gc_regs_buckets) + ld TMP2, 0(TMP) /* next ptr */ + sd TMP2, Caml_state(gc_regs_buckets) + /* Save allocatable integer registers */ + sd a0, (2*8)(TMP) + sd a1, (3*8)(TMP) + sd a2, (4*8)(TMP) + sd a3, (5*8)(TMP) + sd a4, (6*8)(TMP) + sd a5, (7*8)(TMP) + sd a6, (8*8)(TMP) + sd a7, (9*8)(TMP) + sd s2, (10*8)(TMP) + sd s3, (11*8)(TMP) + sd s4, (12*8)(TMP) + sd s5, (13*8)(TMP) + sd s6, (14*8)(TMP) + sd s7, (15*8)(TMP) + sd s8, (16*8)(TMP) + sd s9, (17*8)(TMP) + sd t2, (18*8)(TMP) + sd t3, (19*8)(TMP) + sd t4, (20*8)(TMP) + sd t5, (21*8)(TMP) + sd t6, (22*8)(TMP) + sd s0, (23*8)(TMP) + /* Save caller-save floating-point registers + (callee-saves are preserved by C functions) */ + fsd ft0, (24*8)(TMP) + fsd ft1, (25*8)(TMP) + fsd ft2, (26*8)(TMP) + fsd ft3, (27*8)(TMP) + fsd ft4, (28*8)(TMP) + fsd ft5, (29*8)(TMP) + fsd ft6, (30*8)(TMP) + fsd ft7, (31*8)(TMP) + fsd fa0, (32*8)(TMP) + fsd fa1, (33*8)(TMP) + fsd fa2, (34*8)(TMP) + fsd fa3, (35*8)(TMP) + fsd fa4, (36*8)(TMP) + fsd fa5, (37*8)(TMP) + fsd fa6, (38*8)(TMP) + fsd fa7, (39*8)(TMP) + fsd ft8, (40*8)(TMP) + fsd ft9, (41*8)(TMP) + fsd ft10, (42*8)(TMP) + fsd ft11, (43*8)(TMP) + addi TMP, TMP, 16 + sd TMP, Caml_state(gc_regs) +.endm + +/* Undo SAVE_ALL_REGS by loading the registers saved in Caml_state(gc_regs) + and refreshing ALLOC_PTR & TRAP_PTR from Caml_state */ +.macro RESTORE_ALL_REGS + /* Restore a0, a1, freeing up the next ptr slot */ + ld TMP, Caml_state(gc_regs) + addi TMP, TMP, -16 + /* Restore registers */ + ld a0, (2*8)(TMP) + ld a1, (3*8)(TMP) + ld a2, (4*8)(TMP) + ld a3, (5*8)(TMP) + ld a4, (6*8)(TMP) + ld a5, (7*8)(TMP) + ld a6, (8*8)(TMP) + ld a7, (9*8)(TMP) + ld s2, (10*8)(TMP) + ld s3, (11*8)(TMP) + ld s4, (12*8)(TMP) + ld s5, (13*8)(TMP) + ld s6, (14*8)(TMP) + ld s7, (15*8)(TMP) + ld s8, (16*8)(TMP) + ld s9, (17*8)(TMP) + ld t2, (18*8)(TMP) + ld t3, (19*8)(TMP) + ld t4, (20*8)(TMP) + ld t5, (21*8)(TMP) + ld t6, (22*8)(TMP) + ld s0, (23*8)(TMP) + fld ft0, (24*8)(TMP) + fld ft1, (25*8)(TMP) + fld ft2, (26*8)(TMP) + fld ft3, (27*8)(TMP) + fld ft4, (28*8)(TMP) + fld ft5, (29*8)(TMP) + fld ft6, (30*8)(TMP) + fld ft7, (31*8)(TMP) + fld fa0, (32*8)(TMP) + fld fa1, (33*8)(TMP) + fld fa2, (34*8)(TMP) + fld fa3, (35*8)(TMP) + fld fa4, (36*8)(TMP) + fld fa5, (37*8)(TMP) + fld fa6, (38*8)(TMP) + fld fa7, (39*8)(TMP) + fld ft8, (40*8)(TMP) + fld ft9, (41*8)(TMP) + fld ft10, (42*8)(TMP) + fld ft11, (43*8)(TMP) + /* Put gc_regs struct back in bucket linked list */ + ld TMP2, Caml_state(gc_regs_buckets) + sd TMP2, 0(TMP) /* next ptr */ + sd TMP, Caml_state(gc_regs_buckets) + /* Reload new allocation pointer & exn handler */ + ld ALLOC_PTR, Caml_state(young_ptr) + ld TRAP_PTR, Caml_state(exn_handler) +.endm + .section .text /* Invoke the garbage collector. */ .globl caml_system__code_begin caml_system__code_begin: +FUNCTION(caml_call_realloc_stack) + /* Save return address */ + CFI_OFFSET(ra, -8) + addi sp, sp, -16 + sd ra, 8(sp) + CFI_ADJUST(16) + /* Save all registers (including ALLOC_PTR & TRAP_PTR) */ + SAVE_ALL_REGS + ld C_ARG_1, 16(sp) /* argument */ + SWITCH_OCAML_TO_C + call PLT(caml_try_realloc_stack) + SWITCH_C_TO_OCAML + beqz a0, 1f + RESTORE_ALL_REGS + /* Free stack space and return to caller */ + ld ra, 8(sp) + addi sp, sp, 16 + ret +1: RESTORE_ALL_REGS + /* Raise the Stack_overflow exception */ + ld ra, 8(sp) + addi sp, sp, 16 + addi sp, sp, 16 /* pop argument */ + la a0, caml_exn_Stack_overflow + j caml_raise_exn +END_FUNCTION(caml_call_realloc_stack) + FUNCTION(caml_call_gc) -.Lcaml_call_gc: - /* Record return address */ - STORE ra, Caml_state(last_return_address) - /* Record lowest stack address */ - STORE sp, Caml_state(bottom_of_stack) - /* Set up stack space, saving return address */ - /* (1 reg for RA, 1 reg for FP, 23 allocatable int regs, - 20 caller-save float regs) * 8 */ - /* + 1 for alignment */ - addi sp, sp, -0x170 - CFI_ADJUST(0x170) - STORE ra, 0x8(sp) - CFI_OFFSET(ra, -0x170+8) - /* Save allocatable integer registers on the stack, - in the order given in proc.ml */ - STORE a0, 0x10(sp) - STORE a1, 0x18(sp) - STORE a2, 0x20(sp) - STORE a3, 0x28(sp) - STORE a4, 0x30(sp) - STORE a5, 0x38(sp) - STORE a6, 0x40(sp) - STORE a7, 0x48(sp) - STORE s2, 0x50(sp) - STORE s3, 0x58(sp) - STORE s4, 0x60(sp) - STORE s5, 0x68(sp) - STORE s6, 0x70(sp) - STORE s7, 0x78(sp) - STORE s8, 0x80(sp) - STORE s9, 0x88(sp) - STORE t2, 0x90(sp) - STORE t3, 0x98(sp) - STORE t4, 0xa0(sp) - STORE t5, 0xa8(sp) - STORE t6, 0xb0(sp) - STORE s0, 0xb8(sp) - STORE t0, 0xc0(sp) - /* Save caller-save floating-point registers on the stack - (callee-saves are preserved by caml_garbage_collection) */ - fsd ft0, 0xd0(sp) - fsd ft1, 0xd8(sp) - fsd ft2, 0xe0(sp) - fsd ft3, 0xe8(sp) - fsd ft4, 0xf0(sp) - fsd ft5, 0xf8(sp) - fsd ft6, 0x100(sp) - fsd ft7, 0x108(sp) - fsd fa0, 0x110(sp) - fsd fa1, 0x118(sp) - fsd fa2, 0x120(sp) - fsd fa3, 0x128(sp) - fsd fa4, 0x130(sp) - fsd fa5, 0x138(sp) - fsd fa6, 0x140(sp) - fsd fa7, 0x148(sp) - fsd ft8, 0x150(sp) - fsd ft9, 0x158(sp) - fsd ft10, 0x160(sp) - fsd ft11, 0x168(sp) - /* Store pointer to saved integer registers in caml_gc_regs */ - addi TMP, sp, 0x10 - STORE TMP, Caml_state(gc_regs) - /* Save current allocation pointer for debugging purposes */ - STORE ALLOC_PTR, Caml_state(young_ptr) - /* Save trap pointer in case an exception is raised during GC */ - STORE TRAP_PTR, Caml_state(exception_pointer) - /* Call the garbage collector */ +L(caml_call_gc): + /* Save return address */ + CFI_OFFSET(ra, -8) + addi sp, sp, -16 + sd ra, 8(sp) + CFI_ADJUST(16) + /* Store all registers (including ALLOC_PTR & TRAP_PTR) */ + SAVE_ALL_REGS + SWITCH_OCAML_TO_C + /* Call the garbage collector */ call PLT(caml_garbage_collection) - /* Restore registers */ - LOAD a0, 0x10(sp) - LOAD a1, 0x18(sp) - LOAD a2, 0x20(sp) - LOAD a3, 0x28(sp) - LOAD a4, 0x30(sp) - LOAD a5, 0x38(sp) - LOAD a6, 0x40(sp) - LOAD a7, 0x48(sp) - LOAD s2, 0x50(sp) - LOAD s3, 0x58(sp) - LOAD s4, 0x60(sp) - LOAD s5, 0x68(sp) - LOAD s6, 0x70(sp) - LOAD s7, 0x78(sp) - LOAD s8, 0x80(sp) - LOAD s9, 0x88(sp) - LOAD t2, 0x90(sp) - LOAD t3, 0x98(sp) - LOAD t4, 0xa0(sp) - LOAD t5, 0xa8(sp) - LOAD t6, 0xb0(sp) - LOAD s0, 0xb8(sp) - LOAD t0, 0xc0(sp) - fld ft0, 0xd0(sp) - fld ft1, 0xd8(sp) - fld ft2, 0xe0(sp) - fld ft3, 0xe8(sp) - fld ft4, 0xf0(sp) - fld ft5, 0xf8(sp) - fld ft6, 0x100(sp) - fld ft7, 0x108(sp) - fld fa0, 0x110(sp) - fld fa1, 0x118(sp) - fld fa2, 0x120(sp) - fld fa3, 0x128(sp) - fld fa4, 0x130(sp) - fld fa5, 0x138(sp) - fld fa6, 0x140(sp) - fld fa7, 0x148(sp) - fld ft8, 0x150(sp) - fld ft9, 0x158(sp) - fld ft10, 0x160(sp) - fld ft11, 0x168(sp) - /* Reload new allocation pointer */ - LOAD ALLOC_PTR, Caml_state(young_ptr) - /* Free stack space and return to caller */ - LOAD ra, 0x8(sp) - addi sp, sp, 0x170 - CFI_ADJUST(-0x170) + SWITCH_C_TO_OCAML + RESTORE_ALL_REGS + /* Free stack space and return to caller */ + ld ra, 8(sp) + addi sp, sp, 16 ret END_FUNCTION(caml_call_gc) -/* Call a C function from OCaml */ -/* Function to call is in ARG */ +FUNCTION(caml_alloc1) + ld TMP, Caml_state(young_limit) + addi ALLOC_PTR, ALLOC_PTR, -16 + bltu ALLOC_PTR, TMP, L(caml_call_gc) + ret +END_FUNCTION(caml_alloc1) -FUNCTION(caml_c_call) - /* Preserve return address in callee-save register s2 */ - mv s2, ra - CFI_REGISTER(ra, s2) - /* Record lowest stack address and return address */ - STORE ra, Caml_state(last_return_address) - STORE sp, Caml_state(bottom_of_stack) - /* Make the exception handler alloc ptr available to the C code */ - STORE ALLOC_PTR, Caml_state(young_ptr) - STORE TRAP_PTR, Caml_state(exception_pointer) - /* Call the function */ - jalr ARG - /* Reload alloc ptr */ - LOAD ALLOC_PTR, Caml_state(young_ptr) - /* Return */ - jr s2 -END_FUNCTION(caml_c_call) +FUNCTION(caml_alloc2) + ld TMP, Caml_state(young_limit) + addi ALLOC_PTR, ALLOC_PTR, -24 + bltu ALLOC_PTR, TMP, L(caml_call_gc) + ret +END_FUNCTION(caml_alloc2) -/* Raise an exception from OCaml */ -FUNCTION(caml_raise_exn) - /* Test if backtrace is active */ - LOAD TMP, Caml_state(backtrace_active) - bnez TMP, 2f -1: /* Cut stack at current trap handler */ - mv sp, TRAP_PTR - /* Pop previous handler and jump to it */ - LOAD TMP, 8(sp) - LOAD TRAP_PTR, 0(sp) - addi sp, sp, 16 - CFI_ADJUST(-16) - jr TMP -2: /* Preserve exception bucket in callee-save register s2 */ - mv s2, a0 - /* Stash the backtrace */ - mv a1, ra - mv a2, sp - mv a3, TRAP_PTR - call PLT(caml_stash_backtrace) - /* Restore exception bucket and raise */ - mv a0, s2 - j 1b -END_FUNCTION(caml_raise_exn) +FUNCTION(caml_alloc3) + ld TMP, Caml_state(young_limit) + addi ALLOC_PTR, ALLOC_PTR, -32 + bltu ALLOC_PTR, TMP, L(caml_call_gc) + ret +END_FUNCTION(caml_alloc3) - .globl caml_reraise_exn - .type caml_reraise_exn, @function +FUNCTION(caml_allocN) + ld TMP, Caml_state(young_limit) + sub ALLOC_PTR, ALLOC_PTR, ADDITIONAL_ARG + bltu ALLOC_PTR, TMP, L(caml_call_gc) + ret +END_FUNCTION(caml_allocN) -/* Raise an exception from C */ +/* Call a C function from OCaml */ +/* Function to call is in ADDITIONAL_ARG */ -FUNCTION(caml_raise_exception) - mv DOMAIN_STATE_PTR, a0 - mv a0, a1 - LOAD TRAP_PTR, Caml_state(exception_pointer) - LOAD ALLOC_PTR, Caml_state(young_ptr) - LOAD TMP, Caml_state(backtrace_active) - bnez TMP, 2f -1: /* Cut stack at current trap handler */ - mv sp, TRAP_PTR - LOAD TMP, 8(sp) - LOAD TRAP_PTR, 0(sp) +FUNCTION(caml_c_call) + CFI_OFFSET(ra, -8) + addi sp, sp, -16 + sd ra, 8(sp) + CFI_ADJUST(16) + /* Switch form OCaml to C */ + SWITCH_OCAML_TO_C + /* Make the exception handler alloc ptr available to the C code */ + sd ALLOC_PTR, Caml_state(young_ptr) + sd TRAP_PTR, Caml_state(exn_handler) + /* Call the function */ + jalr ADDITIONAL_ARG + /* Reload alloc ptr */ + ld ALLOC_PTR, Caml_state(young_ptr) + /* Load ocaml stack */ + SWITCH_C_TO_OCAML + /* Return */ + ld ra, 8(sp) addi sp, sp, 16 - CFI_ADJUST(-16) - jr TMP -2: /* Preserve exception bucket in callee-save register s2 */ - mv s2, a0 - LOAD a1, Caml_state(last_return_address) - LOAD a2, Caml_state(bottom_of_stack) - mv a3, TRAP_PTR - call PLT(caml_stash_backtrace) - mv a0, s2 + ret +END_FUNCTION(caml_c_call) + +FUNCTION(caml_c_call_stack_args) + /* Arguments: + C arguments : a0 to a7, fa0 to fa7 + C function : ADDITIONAL_ARG + C stack args : begin=STACK_ARG_BEGIN + end=STACK_ARG_END */ + CFI_OFFSET(ra, -8) + addi sp, sp, -16 + sd ra, 8(sp) + CFI_ADJUST(16) + /* Switch from OCaml to C */ + SWITCH_OCAML_TO_C + /* Make the exception handler alloc ptr available to the C code */ + sd ALLOC_PTR, Caml_state(young_ptr) + sd TRAP_PTR, Caml_state(exn_handler) + /* Store sp to restore after call */ + mv s2, sp + /* Copy arguments from OCaml to C stack + NB: STACK_ARG_{BEGIN,END} are 16-byte aligned */ +1: addi STACK_ARG_END, STACK_ARG_END, -16 + bltu STACK_ARG_END, STACK_ARG_BEGIN, 2f + ld TMP, 0(STACK_ARG_END) + ld TMP2, 8(STACK_ARG_END) + addi sp, sp, -16 + sd TMP, 0(sp) + sd TMP2, 8(sp) + CFI_ADJUST(16) j 1b -END_FUNCTION(caml_raise_exception) +2: /* Call the function */ + jalr ADDITIONAL_ARG + /* Restore stack */ + mv sp, s2 + /* Reload alloc ptr */ + ld ALLOC_PTR, Caml_state(young_ptr) + /* Switch from C to OCaml */ + SWITCH_C_TO_OCAML + /* Return */ + ld ra, 8(sp) + addi sp, sp, 16 + ret +END_FUNCTION(caml_c_call_stack_args) /* Start the OCaml program */ FUNCTION(caml_start_program) - mv ARG_DOMAIN_STATE_PTR, a0 - la ARG, caml_program - /* Code shared with caml_callback* */ - /* Address of OCaml code to call is in ARG */ - /* Arguments to the OCaml code are in a0 ... a7 */ -.Ljump_to_caml: - /* Set up stack frame and save callee-save registers */ - addi sp, sp, -0xd0 - CFI_ADJUST(0xd0) - STORE ra, 0xc0(sp) - CFI_OFFSET(ra, -0xd0+0xc0) - STORE s0, 0x0(sp) - STORE s1, 0x8(sp) - STORE s2, 0x10(sp) - STORE s3, 0x18(sp) - STORE s4, 0x20(sp) - STORE s5, 0x28(sp) - STORE s6, 0x30(sp) - STORE s7, 0x38(sp) - STORE s8, 0x40(sp) - STORE s9, 0x48(sp) - STORE s10, 0x50(sp) - STORE s11, 0x58(sp) - fsd fs0, 0x60(sp) - fsd fs1, 0x68(sp) - fsd fs2, 0x70(sp) - fsd fs3, 0x78(sp) - fsd fs4, 0x80(sp) - fsd fs5, 0x88(sp) - fsd fs6, 0x90(sp) - fsd fs7, 0x98(sp) - fsd fs8, 0xa0(sp) - fsd fs9, 0xa8(sp) - fsd fs10, 0xb0(sp) - fsd fs11, 0xb8(sp) + /* domain state is passed as arg from C */ + mv TMP, C_ARG_1 + la TMP2, caml_program + +/* Code shared with caml_callback* */ +/* Adress of domain state is in TMP */ +/* Address of OCaml code to call is in TMP2 */ +/* Arguments to the OCaml code are in a0...a7 */ + +L(jump_to_caml): + /* Set up stack frame and save callee-save registers */ + CFI_OFFSET(ra, -200) + addi sp, sp, -208 + sd ra, 8(sp) + CFI_ADJUST(208) + sd s0, (2*8)(sp) + sd s1, (3*8)(sp) + sd s2, (4*8)(sp) + sd s3, (5*8)(sp) + sd s4, (6*8)(sp) + sd s5, (7*8)(sp) + sd s6, (8*8)(sp) + sd s7, (9*8)(sp) + sd s8, (10*8)(sp) + sd s9, (11*8)(sp) + sd s10, (12*8)(sp) + sd s11, (13*8)(sp) + fsd fs0, (14*8)(sp) + fsd fs1, (15*8)(sp) + fsd fs2, (16*8)(sp) + fsd fs3, (17*8)(sp) + fsd fs4, (18*8)(sp) + fsd fs5, (19*8)(sp) + fsd fs6, (20*8)(sp) + fsd fs7, (21*8)(sp) + fsd fs8, (22*8)(sp) + fsd fs9, (23*8)(sp) + fsd fs10, (24*8)(sp) + fsd fs11, (25*8)(sp) + /* Load domain state pointer from argument */ + mv DOMAIN_STATE_PTR, TMP + /* Reload allocation pointer */ + ld ALLOC_PTR, Caml_state(young_ptr) + /* Build (16-byte aligned) struct c_stack_link on the C stack */ + ld t2, Caml_state(c_stack) addi sp, sp, -32 + sd t2, Cstack_prev(sp) + sd x0, Cstack_stack(sp) + sd x0, Cstack_sp(sp) CFI_ADJUST(32) - /* Load domain state pointer from argument */ - mv DOMAIN_STATE_PTR, ARG_DOMAIN_STATE_PTR - /* Setup a callback link on the stack */ - LOAD TMP, Caml_state(bottom_of_stack) - STORE TMP, 0(sp) - LOAD TMP, Caml_state(last_return_address) - STORE TMP, 8(sp) - LOAD TMP, Caml_state(gc_regs) - STORE TMP, 16(sp) - /* set up a trap frame */ - addi sp, sp, -16 - CFI_ADJUST(16) - LOAD TMP, Caml_state(exception_pointer) - STORE TMP, 0(sp) - lla TMP, .Ltrap_handler - STORE TMP, 8(sp) - mv TRAP_PTR, sp - LOAD ALLOC_PTR, Caml_state(young_ptr) - STORE x0, Caml_state(last_return_address) - jalr ARG -.Lcaml_retaddr: /* pop trap frame, restoring caml_exception_pointer */ - LOAD TMP, 0(sp) - STORE TMP, Caml_state(exception_pointer) + sd sp, Caml_state(c_stack) + /* Load the OCaml stack */ + ld t2, Caml_state(current_stack) + ld t2, Stack_sp(t2) + /* Store the gc_regs for callbacks during a GC */ + ld t3, Caml_state(gc_regs) + addi t2, t2, -8 + sd t3, 0(t2) + /* Store the stack pointer to allow DWARF unwind */ + addi t2, t2, -8 + sd sp, 0(t2) /* C_stack_sp */ + /* Setup a trap frame to catch exceptions escaping the OCaml code */ + ld t3, Caml_state(exn_handler) + la t4, L(trap_handler) + addi t2, t2, -16 + sd t3, 0(t2) + sd t4, 8(t2) + mv TRAP_PTR, t2 + /* Switch stacks and call the OCaml code */ + mv sp, t2 + CFI_REMEMBER_STATE + /* Call the OCaml code */ + jalr TMP2 +L(caml_retaddr): + /* Pop the trap frame, restoring Caml_state->exn_handler */ + ld t2, 0(sp) + addi sp, sp, 16 + CFI_ADJUST(-16) + sd t2, Caml_state(exn_handler) +L(return_result): + /* Restore GC regs */ + ld t2, 0(sp) + ld t3, 8(sp) addi sp, sp, 16 CFI_ADJUST(-16) -.Lreturn_result: /* pop callback link, restoring global variables */ - LOAD TMP, 0(sp) - STORE TMP, Caml_state(bottom_of_stack) - LOAD TMP, 8(sp) - STORE TMP, Caml_state(last_return_address) - LOAD TMP, 16(sp) - STORE TMP, Caml_state(gc_regs) + sd t3, Caml_state(gc_regs) + /* Update allocation pointer */ + sd ALLOC_PTR, Caml_state(young_ptr) + /* Return to C stack */ + ld t2, Caml_state(current_stack) + sd sp, Stack_sp(t2) + ld t3, Caml_state(c_stack) + mv sp, t3 + CFI_RESTORE_STATE + /* Pop the struct c_stack_link */ + ld t2, Cstack_prev(sp) addi sp, sp, 32 CFI_ADJUST(-32) - /* Update allocation pointer */ - STORE ALLOC_PTR, Caml_state(young_ptr) - /* reload callee-save registers and return */ - LOAD ra, 0xc0(sp) - LOAD s0, 0x0(sp) - LOAD s1, 0x8(sp) - LOAD s2, 0x10(sp) - LOAD s3, 0x18(sp) - LOAD s4, 0x20(sp) - LOAD s5, 0x28(sp) - LOAD s6, 0x30(sp) - LOAD s7, 0x38(sp) - LOAD s8, 0x40(sp) - LOAD s9, 0x48(sp) - LOAD s10, 0x50(sp) - LOAD s11, 0x58(sp) - fld fs0, 0x60(sp) - fld fs1, 0x68(sp) - fld fs2, 0x70(sp) - fld fs3, 0x78(sp) - fld fs4, 0x80(sp) - fld fs5, 0x88(sp) - fld fs6, 0x90(sp) - fld fs7, 0x98(sp) - fld fs8, 0xa0(sp) - fld fs9, 0xa8(sp) - fld fs10, 0xb0(sp) - fld fs11, 0xb8(sp) - addi sp, sp, 0xd0 - CFI_ADJUST(-0xd0) + sd t2, Caml_state(c_stack) + /* Reload callee-save register and return address */ + ld s0, (2*8)(sp) + ld s1, (3*8)(sp) + ld s2, (4*8)(sp) + ld s3, (5*8)(sp) + ld s4, (6*8)(sp) + ld s5, (7*8)(sp) + ld s6, (8*8)(sp) + ld s7, (9*8)(sp) + ld s8, (10*8)(sp) + ld s9, (11*8)(sp) + ld s10, (12*8)(sp) + ld s11, (13*8)(sp) + fld fs0, (14*8)(sp) + fld fs1, (15*8)(sp) + fld fs2, (16*8)(sp) + fld fs3, (17*8)(sp) + fld fs4, (18*8)(sp) + fld fs5, (19*8)(sp) + fld fs6, (20*8)(sp) + fld fs7, (21*8)(sp) + fld fs8, (22*8)(sp) + fld fs9, (23*8)(sp) + fld fs10, (24*8)(sp) + fld fs11, (25*8)(sp) + ld ra, 8(sp) + addi sp, sp, 208 + CFI_ADJUST(-208) + /* Return to C caller */ ret - .type .Lcaml_retaddr, @function - .size .Lcaml_retaddr, .-.Lcaml_retaddr END_FUNCTION(caml_start_program) +/* The trap handler */ + .align 2 -.Ltrap_handler: +L(trap_handler): CFI_STARTPROC - STORE TRAP_PTR, Caml_state(exception_pointer) + /* Save exception pointer */ + sd TRAP_PTR, Caml_state(exn_handler) + /* Encode exception pointer */ ori a0, a0, 2 - j .Lreturn_result - .type .Ltrap_handler, @function -END_FUNCTION(.Ltrap_handler) + /* Return it */ + j L(return_result) + CFI_ENDPROC + +/* Exceptions */ + +.macro JUMP_TO_TRAP_PTR + /* Cut stack at current trap handler */ + mv sp, TRAP_PTR + /* Pop previous handler and jump to it */ + ld TMP, 8(sp) + ld TRAP_PTR, 0(sp) + addi sp, sp, 16 + jr TMP +.endm + +/* Raise an exception from OCaml */ +FUNCTION(caml_raise_exn) + /* Test if backtrace is active */ + ld TMP, Caml_state(backtrace_active) + bnez TMP, 2f +1: + JUMP_TO_TRAP_PTR +2: /* Zero backtrace_pos */ + sd x0, Caml_state(backtrace_pos) +L(caml_reraise_exn_stash): + /* Preserve exception bucket in callee-save register s2 */ + mv s2, a0 + /* Stash the backtrace */ + /* arg1: exn bucket, already in a0 */ + mv a1, ra /* arg2: pc of raise */ + mv a2, sp /* arg3: sp of raise */ + mv a3, TRAP_PTR /* arg4: sp of handler */ + /* Switch to C stack */ + ld TMP, Caml_state(c_stack) + mv sp, TMP + call PLT(caml_stash_backtrace) + /* Restore exception bucket and raise */ + mv a0, s2 + j 1b +END_FUNCTION(caml_raise_exn) + +FUNCTION(caml_reraise_exn) + ld TMP, Caml_state(backtrace_active) + bnez TMP, L(caml_reraise_exn_stash) + JUMP_TO_TRAP_PTR +END_FUNCTION(caml_reraise_exn) + +/* Raise an exception from C */ + +FUNCTION(caml_raise_exception) + /* Load the domain state ptr */ + mv DOMAIN_STATE_PTR, C_ARG_1 + /* Load the exception bucket */ + mv a0, C_ARG_2 + /* Reload trap ptr and alloc ptr */ + ld TRAP_PTR, Caml_state(exn_handler) + ld ALLOC_PTR, Caml_state(young_ptr) + /* Discard the C stack pointer and reset to ocaml stack */ + ld TMP, Caml_state(current_stack) + ld TMP, Stack_sp(TMP) + mv sp, TMP + /* Restore frame and link on return to OCaml */ + ld ra, 8(sp) + addi sp, sp, 16 + j caml_raise_exn +END_FUNCTION(caml_raise_exception) /* Callback from C to OCaml */ FUNCTION(caml_callback_asm) - /* Initial shuffling of arguments */ - /* a0 = Caml_state, a1 = closure, (a2) = args */ - mv ARG_DOMAIN_STATE_PTR, a0 - LOAD a0, 0(a2) /* a0 = first arg */ - /* a1 = closure environment */ - LOAD ARG, 0(a1) /* code pointer */ - j .Ljump_to_caml + /* Initial shuffling of arguments */ + /* (a0 = Caml_state, a1 = closure, 0(a2) = first arg) */ + mv TMP, a0 + ld a0, 0(a2) /* a0 = first arg */ + /* a1 = closure environment */ + ld TMP2, 0(a1) /* code pointer */ + j L(jump_to_caml) END_FUNCTION(caml_callback_asm) FUNCTION(caml_callback2_asm) - /* Initial shuffling of arguments */ - /* a0 = Caml_state, a1 = closure, (a2) = args */ - mv ARG_DOMAIN_STATE_PTR, a0 - mv TMP, a1 - LOAD a0, 0(a2) - LOAD a1, 8(a2) - mv a2, TMP - la ARG, caml_apply2 - j .Ljump_to_caml + /* Initial shuffling of arguments */ + /* (a0 = Caml_state, a1 = closure, 0(a2) = arg1, 8(a2) = arg2) */ + mv TMP, a0 + mv TMP2, a1 + ld a0, 0(a2) /* a0 = first arg */ + ld a1, 8(a2) /* a1 = second arg */ + mv a2, TMP2 /* a2 = closure environment */ + la TMP2, caml_apply2 + j L(jump_to_caml) END_FUNCTION(caml_callback2_asm) FUNCTION(caml_callback3_asm) - /* Initial shuffling of arguments */ - /* a0 = Caml_state, a1 = closure, (a2) = args */ - mv ARG_DOMAIN_STATE_PTR, a0 - mv a3, a1 - LOAD a0, 0(a2) - LOAD a1, 8(a2) - LOAD a2, 16(a2) - la ARG, caml_apply3 - j .Ljump_to_caml + /* Initial shuffling of arguments */ + /* (a0 = Caml_state, a1 = closure, 0(a2) = arg1, 8(a2) = arg2, + 16(a2) = arg3) */ + mv TMP, a0 + mv a3, a1 /* a3 = closure environment */ + ld a0, 0(a2) /* a0 = first arg */ + ld a1, 8(a2) /* a1 = second arg */ + ld a2, 16(a2) /* a2 = third arg */ + la TMP2, caml_apply3 + j L(jump_to_caml) END_FUNCTION(caml_callback3_asm) +/* Fibers */ + +/* Switch between OCaml stacks. Clobbers TMP and switches TRAP_PTR + Preserves old_stack and new_stack registers */ +.macro SWITCH_OCAML_STACKS old_stack, new_stack + /* Save frame pointer and return address for old_stack */ + addi sp, sp, -16 + sd ra, 8(sp) + CFI_ADJUST(16) + /* Save OCaml SP and exn_handler in the stack info */ + sd sp, Stack_sp(\old_stack) + sd TRAP_PTR, Stack_exception(\old_stack) + /* switch stacks */ + sd \new_stack, Caml_state(current_stack) + ld TMP, Stack_sp(\new_stack) + mv sp, TMP + /* restore exn_handler for new stack */ + ld TRAP_PTR, Stack_exception(\new_stack) + /* Restore frame pointer and return address for new_stack */ + ld ra, 8(sp) + addi sp, sp, 16 +.endm + +/* + * A continuation is a one word object that points to a fiber. A fiber [f] will + * point to its parent at Handler_parent(Stack_handler(f)). In the following, + * the [last_fiber] refers to the last fiber in the linked-list formed by the + * parent pointer. + */ + +FUNCTION(caml_perform) + /* a0: effect to perform + a1: freshly allocated continuation */ + ld a2, Caml_state(current_stack) /* a2 := old stack */ + addi a3, a2, 1 /* a3 := Val_ptr(old stack) */ + sd a3, 0(a1) /* Iniitalize continuation */ +L(do_perform): + /* a0: effect to perform + a1: continuation + a2: old_stack + a3: last_fiber */ + + ld t3, Stack_handler(a2) /* t3 := old stack -> handler */ + ld t4, Handler_parent(t3) /* t4 := parent stack */ + beqz t4, 1f + SWITCH_OCAML_STACKS a2, t4 + /* we have to null the Handler_parent after the switch because + the Handler_parent is needed to unwind the stack for backtraces */ + sd x0, Handler_parent(t3) /* Set parent of performer to NULL */ + ld TMP, Handler_effect(t3) + mv a2, a3 /* a2 := last_fiber */ + mv a3, TMP /* a3 := effect handler */ + tail PLT(caml_apply3) +1: + /* switch back to original performer before raising Unhandled + (no-op unless this is a reperform) */ + ld t4, 0(a1) /* load performer stack from continuation */ + addi t4, t4, -1 /* t4 := Ptr_val(t4) */ + ld t3, Caml_state(current_stack) + SWITCH_OCAML_STACKS t3, t4 + /* No parent stack. Raise Unhandled. */ + la a0, caml_exn_Unhandled + j caml_raise_exn +END_FUNCTION(caml_perform) + +FUNCTION(caml_reperform) + /* a0: effect to perform + a1: continuation + a2: last_fiber */ + ld TMP, Stack_handler_from_cont(a2) + ld a2, Caml_state(current_stack) /* a2 := old stack */ + sd a2, Handler_parent(TMP) /* Append to last_fiber */ + addi a3, a2, 1 /* a3 (last_fiber) := Val_ptr(old stack) */ + j L(do_perform) +END_FUNCTION(caml_reperform) + +FUNCTION(caml_resume) + /* a0: new fiber + a1: fun + a2: arg */ + addi a0, a0, -1 /* a0 = Ptr_val(a0) */ + ld a3, 0(a1) /* code pointer */ + /* Check if stack null, then already used */ + beqz a0, 2f + /* Find end of list of stacks (put in t2) */ + mv TMP, a0 +1: ld t2, Stack_handler(TMP) + ld TMP, Handler_parent(t2) + bnez TMP, 1b + /* Add current stack to the end */ + ld t3, Caml_state(current_stack) + sd t3, Handler_parent(t2) + SWITCH_OCAML_STACKS t3, a0 + mv a0, a2 + jr a3 +2: la a0, caml_exn_Continuation_already_taken + j caml_raise_exn +END_FUNCTION(caml_resume) + +/* Run a function on a new stack, then either + return the value or invoke exception handler */ +FUNCTION(caml_runstack) + /* a0: fiber + a1: fun + a2: arg */ + CFI_OFFSET(ra, -8) + addi sp, sp, -16 + sd ra, 8(sp) + CFI_ADJUST(16) + addi a0, a0, -1 /* a0 := Ptr_val(a0) */ + ld a3, 0(a1) /* code pointer */ + /* save old stack pointer and exception handler */ + ld t2, Caml_state(current_stack) /* t2 := old stack */ + sd sp, Stack_sp(t2) + sd TRAP_PTR, Stack_exception(t2) + /* Load new stack pointer and set parent */ + ld TMP, Stack_handler(a0) + sd t2, Handler_parent(TMP) + sd a0, Caml_state(current_stack) + ld t3, Stack_sp(a0) /* t3 := sp of new stack */ + /* Create an exception handler on the target stack + after 16byte DWARF & gc_regs block (which is unused here) */ + addi t3, t3, -32 + la TMP, L(fiber_exn_handler) + sd TMP, 8(t3) + /* link the previous exn_handler so that copying stacks works */ + ld TMP, Stack_exception(a0) + sd TMP, 0(t3) + mv TRAP_PTR, t3 + /* Switch to the new stack */ + mv sp, t3 + CFI_REMEMBER_STATE + /* Call the function on the new stack */ + mv a0, a2 + jalr a3 +L(frame_runstack): + addi t2, sp, 32 /* t2 := stack_handler */ + ld s2, Handler_value(t2) /* saved across C call */ +1: + mv s3, a0 /* save return across C call */ + ld a0, Caml_state(current_stack) /* arg to caml_free_stack */ + /* restore parent stack and exn_handler into Caml_state */ + ld TMP, Handler_parent(t2) + sd TMP, Caml_state(current_stack) + ld TRAP_PTR, Stack_exception(TMP) + sd TRAP_PTR, Caml_state(exn_handler) + /* free old stack by switching directly to c_stack; + is a no-alloc call */ + ld s4, Stack_sp(TMP) /* saved across C call */ + CFI_RESTORE_STATE + CFI_REMEMBER_STATE + ld TMP, Caml_state(c_stack) + mv sp, TMP + call PLT(caml_free_stack) + /* switch directly to parent stack with correct return */ + mv a0, s3 + mv a1, s2 + mv sp, s4 + CFI_RESTORE_STATE + ld TMP, 0(s2) /* code pointer */ + /* Invoke handle_value (or handle_exn) */ + ld ra, 8(sp) + addi sp, sp, 16 + CFI_ADJUST(-16) + jr TMP +L(fiber_exn_handler): + addi t2, sp, 16 /* t2 := stack_handler */ + ld s2, Handler_exception(t2) + j 1b +END_FUNCTION(caml_runstack) + FUNCTION(caml_ml_array_bound_error) - /* Load address of [caml_array_bound_error_asm] in ARG */ - la ARG, caml_array_bound_error_asm - /* Call that function */ - tail caml_c_call + /* Load address of [caml_array_bound_error_asm] in ADDITIONAL_ARG */ + la ADDITIONAL_ARG, caml_array_bound_error_asm + /* Call that function */ + j caml_c_call END_FUNCTION(caml_ml_array_bound_error) .globl caml_system__code_end @@ -440,14 +831,14 @@ caml_system__code_end: /* GC roots for callback */ - .section .data +OBJECT(caml_system.frametable) + .quad 2 /* two descriptors */ + .quad L(caml_retaddr) /* return address into callback */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ .align 3 - .globl caml_system__frametable - .type caml_system__frametable, @object -caml_system__frametable: - .quad 1 /* one descriptor */ - .quad .Lcaml_retaddr /* return address into callback */ - .short -1 /* negative frame size => use callback link */ - .short 0 /* no roots */ + .quad L(frame_runstack) /* return address into fiber handler */ + .short -1 /* negative frame size => use callback link */ + .short 0 /* no roots */ .align 3 - .size caml_system__frametable, .-caml_system__frametable +END_OBJECT(caml_system.frametable)