From 8c5f59c0cfe5ff7e68dc6d6e74b271cf11a8df5c Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 23 Jun 2020 10:43:08 -0700 Subject: [PATCH] wasmtime: Implement `table.get` and `table.set` These instructions have fast, inline JIT paths for the common cases, and only call out to host VM functions for the slow paths. This required some changes to `cranelift-wasm`'s `FuncEnvironment`: instead of taking a `FuncCursor` to insert an instruction sequence within the current basic block, `FuncEnvironment::translate_table_{get,set}` now take a `&mut FunctionBuilder` so that they can create whole new basic blocks. This is necessary for implementing GC read/write barriers that involve branching (e.g. checking for null, or whether a store buffer is at capacity). Furthermore, it required that the `load`, `load_complex`, and `store` instructions handle loading and storing through an `r{32,64}` rather than just `i{32,64}` addresses. This involved making `r{32,64}` types acceptable instantiations of the `iAddr` type variable, plus a few new instruction encodings. Part of #929 --- Cargo.lock | 1 + build.rs | 22 +- .../codegen/meta/src/isa/x86/encodings.rs | 40 ++ .../codegen/meta/src/shared/instructions.rs | 4 +- cranelift/codegen/src/binemit/relaxation.rs | 6 +- cranelift/codegen/src/ir/valueloc.rs | 4 +- cranelift/codegen/src/postopt.rs | 6 +- .../codegen/src/redundant_reload_remover.rs | 6 +- cranelift/frontend/src/frontend.rs | 5 + cranelift/wasm/src/code_translator.rs | 9 +- cranelift/wasm/src/environ/dummy.rs | 7 +- cranelift/wasm/src/environ/spec.rs | 4 +- cranelift/wasm/src/sections_translator.rs | 4 +- crates/environ/Cargo.toml | 1 + crates/environ/src/func_environ.rs | 375 +++++++++++++++++- crates/runtime/src/externref.rs | 88 +++- crates/runtime/src/libcalls.rs | 21 + crates/runtime/src/vmcontext.rs | 4 + tests/all/gc.rs | 328 ++++++++++++++- .../many_table_gets_lead_to_gc.wast | 35 ++ 20 files changed, 894 insertions(+), 76 deletions(-) create mode 100644 tests/misc_testsuite/reference-types/many_table_gets_lead_to_gc.wast diff --git a/Cargo.lock b/Cargo.lock index 31c4c940d70b..347b12ccce7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2423,6 +2423,7 @@ dependencies = [ "bincode", "cranelift-codegen", "cranelift-entity", + "cranelift-frontend", "cranelift-wasm", "directories", "errno", diff --git a/build.rs b/build.rs index 848420aaa8f8..0e511a5717b5 100644 --- a/build.rs +++ b/build.rs @@ -205,18 +205,20 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_i16x8_arith2") => return true, ("simd", "simd_i8x16_arith2") => return true, - ("reference_types", "table_copy_on_imported_tables") - | ("reference_types", "externref_id_function") - | ("reference_types", "table_size") - | ("reference_types", "simple_ref_is_null") - | ("reference_types", "table_grow_with_funcref") => { - // TODO(#1886): Ignore if this isn't x64, because Cranelift only - // supports reference types on x64. - return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64"; + // Still working on implementing these. See #929. + ("reference_types", "global") + | ("reference_types", "linking") + | ("reference_types", "ref_func") + | ("reference_types", "ref_null") + | ("reference_types", "table_fill") => { + return true; } - // Still working on implementing these. See #929. - ("reference_types", _) => return true, + // TODO(#1886): Ignore reference types tests if this isn't x64, + // because Cranelift only supports reference types on x64. + ("reference_types", _) => { + return env::var("CARGO_CFG_TARGET_ARCH").unwrap() != "x86_64"; + } _ => {} }, diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 71a549647c0e..ad06f519c13f 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -231,6 +231,32 @@ impl PerCpuModeEncodings { }); } + /// Add encodings for `inst.r32` to X86_32. + /// Add encodings for `inst.r32` to X86_64 with and without REX. + /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. + fn enc_r32_r64_instp( + &mut self, + inst: &Instruction, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst.bind(R32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + + // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise + // reg-alloc would never use r8 and up. + self.enc64_func(inst.bind(R32), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(R32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(R64), template.rex().w(), |builder| { + builder.inst_predicate(instp) + }); + } + /// Add encodings for `inst.r32` to X86_32. /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. fn enc_r32_r64_rex_only(&mut self, inst: impl Into, template: Template) { @@ -810,6 +836,11 @@ fn define_memory( recipe.opcodes(&MOV_LOAD), is_load_complex_length_two.clone(), ); + e.enc_r32_r64_instp( + load_complex, + recipe.opcodes(&MOV_LOAD), + is_load_complex_length_two.clone(), + ); e.enc_x86_64_instp( uload32_complex, recipe.opcodes(&MOV_LOAD), @@ -855,6 +886,11 @@ fn define_memory( recipe.opcodes(&MOV_STORE), is_store_complex_length_three.clone(), ); + e.enc_r32_r64_instp( + store_complex, + recipe.opcodes(&MOV_STORE), + is_store_complex_length_three.clone(), + ); e.enc_x86_64_instp( istore32_complex, recipe.opcodes(&MOV_STORE), @@ -948,6 +984,10 @@ fn define_memory( e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0); e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0); } + for &ty in &[R64, R32] { + e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); + e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); + } // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index be8d65ecc270..8e0063b4598f 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -211,7 +211,7 @@ fn define_control_flow( let iAddr = &TypeVar::new( "iAddr", "An integer address type", - TypeSetBuilder::new().ints(32..64).build(), + TypeSetBuilder::new().ints(32..64).refs(32..64).build(), ); { @@ -744,7 +744,7 @@ pub(crate) fn define( let iAddr = &TypeVar::new( "iAddr", "An integer address type", - TypeSetBuilder::new().ints(32..64).build(), + TypeSetBuilder::new().ints(32..64).refs(32..64).build(), ); let Ref = &TypeVar::new( diff --git a/cranelift/codegen/src/binemit/relaxation.rs b/cranelift/codegen/src/binemit/relaxation.rs index 554d9afa0040..142e400985dc 100644 --- a/cranelift/codegen/src/binemit/relaxation.rs +++ b/cranelift/codegen/src/binemit/relaxation.rs @@ -302,7 +302,11 @@ fn fallthroughs(func: &mut Function) { Opcode::Fallthrough => { // Somebody used a fall-through instruction before the branch relaxation pass. // Make sure it is correct, i.e. the destination is the layout successor. - debug_assert_eq!(destination, succ, "Illegal fall-through in {}", block) + debug_assert_eq!( + destination, succ, + "Illegal fallthrough from {} to {}, but {}'s successor is {}", + block, destination, block, succ + ) } Opcode::Jump => { // If this is a jump to the successor block, change it to a fall-through. diff --git a/cranelift/codegen/src/ir/valueloc.rs b/cranelift/codegen/src/ir/valueloc.rs index 9d81a5538174..6fbe7e0195a1 100644 --- a/cranelift/codegen/src/ir/valueloc.rs +++ b/cranelift/codegen/src/ir/valueloc.rs @@ -41,7 +41,7 @@ impl ValueLoc { pub fn unwrap_reg(self) -> RegUnit { match self { Self::Reg(ru) => ru, - _ => panic!("Expected register: {:?}", self), + _ => panic!("unwrap_reg expected register, found {:?}", self), } } @@ -49,7 +49,7 @@ impl ValueLoc { pub fn unwrap_stack(self) -> StackSlot { match self { Self::Stack(ss) => ss, - _ => panic!("Expected stack slot: {:?}", self), + _ => panic!("unwrap_stack expected stack slot, found {:?}", self), } } diff --git a/cranelift/codegen/src/postopt.rs b/cranelift/codegen/src/postopt.rs index 426aab00b8a7..ada14e1ff8ee 100644 --- a/cranelift/codegen/src/postopt.rs +++ b/cranelift/codegen/src/postopt.rs @@ -386,7 +386,11 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI } let ok = pos.func.update_encoding(inst, isa).is_ok(); - debug_assert!(ok); + debug_assert!( + ok, + "failed to update encoding for `{}`", + pos.func.dfg.display_inst(inst, isa) + ); } //---------------------------------------------------------------------- diff --git a/cranelift/codegen/src/redundant_reload_remover.rs b/cranelift/codegen/src/redundant_reload_remover.rs index 79b505da8620..501c67ab6bd0 100644 --- a/cranelift/codegen/src/redundant_reload_remover.rs +++ b/cranelift/codegen/src/redundant_reload_remover.rs @@ -635,7 +635,11 @@ impl RedundantReloadRemover { // Load is completely redundant. Convert it to a no-op. dfg.replace(inst).fill_nop(arg); let ok = func.update_encoding(inst, isa).is_ok(); - debug_assert!(ok, "fill_nop encoding missing for this type"); + debug_assert!( + ok, + "fill_nop encoding missing for this type: `{}`", + func.dfg.display_inst(inst, isa) + ); } Transform::ChangeToCopyToSSA(ty, reg) => { // We already have the relevant value in some other register. Convert the diff --git a/cranelift/frontend/src/frontend.rs b/cranelift/frontend/src/frontend.rs index b45166b38396..f7e5fae4d646 100644 --- a/cranelift/frontend/src/frontend.rs +++ b/cranelift/frontend/src/frontend.rs @@ -206,6 +206,11 @@ impl<'a> FunctionBuilder<'a> { } } + /// Get the block that this builder is currently at. + pub fn current_block(&self) -> Option { + self.position.expand() + } + /// Set the source location that should be assigned to all new instructions. pub fn set_srcloc(&mut self, srcloc: ir::SourceLoc) { self.srcloc = srcloc; diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 9a55e772f1cd..7344f0c0b5fc 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1186,19 +1186,14 @@ pub fn translate_operator( let table_index = TableIndex::from_u32(*index); let table = state.get_or_create_table(builder.func, *index, environ)?; let index = state.pop1(); - state.push1(environ.translate_table_get( - builder.cursor(), - table_index, - table, - index, - )?); + state.push1(environ.translate_table_get(builder, table_index, table, index)?); } Operator::TableSet { table: index } => { let table_index = TableIndex::from_u32(*index); let table = state.get_or_create_table(builder.func, *index, environ)?; let value = state.pop1(); let index = state.pop1(); - environ.translate_table_set(builder.cursor(), table_index, table, value, index)?; + environ.translate_table_set(builder, table_index, table, value, index)?; } Operator::TableCopy { dst_table: dst_table_index, diff --git a/cranelift/wasm/src/environ/dummy.rs b/cranelift/wasm/src/environ/dummy.rs index 76e47da91d1a..25f5c3a5929e 100644 --- a/cranelift/wasm/src/environ/dummy.rs +++ b/cranelift/wasm/src/environ/dummy.rs @@ -22,6 +22,7 @@ use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{self, InstBuilder}; use cranelift_codegen::isa::TargetFrontendConfig; use cranelift_entity::{EntityRef, PrimaryMap, SecondaryMap}; +use cranelift_frontend::FunctionBuilder; use std::boxed::Box; use std::string::String; use std::vec::Vec; @@ -452,17 +453,17 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ fn translate_table_get( &mut self, - mut pos: FuncCursor, + builder: &mut FunctionBuilder, _table_index: TableIndex, _table: ir::Table, _index: ir::Value, ) -> WasmResult { - Ok(pos.ins().null(self.reference_type())) + Ok(builder.ins().null(self.reference_type())) } fn translate_table_set( &mut self, - _pos: FuncCursor, + _builder: &mut FunctionBuilder, _table_index: TableIndex, _table: ir::Table, _value: ir::Value, diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 32f74206449d..eacc90233f33 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -368,7 +368,7 @@ pub trait FuncEnvironment: TargetEnvironment { /// Translate a `table.get` WebAssembly instruction. fn translate_table_get( &mut self, - pos: FuncCursor, + builder: &mut FunctionBuilder, table_index: TableIndex, table: ir::Table, index: ir::Value, @@ -377,7 +377,7 @@ pub trait FuncEnvironment: TargetEnvironment { /// Translate a `table.set` WebAssembly instruction. fn translate_table_set( &mut self, - pos: FuncCursor, + builder: &mut FunctionBuilder, table_index: TableIndex, table: ir::Table, value: ir::Value, diff --git a/cranelift/wasm/src/sections_translator.rs b/cranelift/wasm/src/sections_translator.rs index 5eb7272662ca..7075fbc19bf8 100644 --- a/cranelift/wasm/src/sections_translator.rs +++ b/cranelift/wasm/src/sections_translator.rs @@ -349,7 +349,9 @@ pub fn parse_element_section<'data>( let index = ElemIndex::from_u32(index as u32); environ.declare_passive_element(index, segments)?; } - ElementKind::Declared => return Err(wasm_unsupported!("element kind declared")), + ElementKind::Declared => { + // Nothing to do here. + } } } Ok(()) diff --git a/crates/environ/Cargo.toml b/crates/environ/Cargo.toml index 8d640ed1d270..cf096a4303a6 100644 --- a/crates/environ/Cargo.toml +++ b/crates/environ/Cargo.toml @@ -15,6 +15,7 @@ edition = "2018" anyhow = "1.0" cranelift-codegen = { path = "../../cranelift/codegen", version = "0.65.0", features = ["enable-serde"] } cranelift-entity = { path = "../../cranelift/entity", version = "0.65.0", features = ["enable-serde"] } +cranelift-frontend = { path = "../../cranelift/frontend", version = "0.65.0" } cranelift-wasm = { path = "../../cranelift/wasm", version = "0.65.0", features = ["enable-serde"] } wasmparser = "0.58.0" lightbeam = { path = "../lightbeam", optional = true, version = "0.18.0" } diff --git a/crates/environ/src/func_environ.rs b/crates/environ/src/func_environ.rs index 909a9cd5474f..d64718c6190d 100644 --- a/crates/environ/src/func_environ.rs +++ b/crates/environ/src/func_environ.rs @@ -9,6 +9,7 @@ use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{AbiParam, ArgumentPurpose, Function, InstBuilder, Signature}; use cranelift_codegen::isa::{self, TargetFrontendConfig}; use cranelift_entity::EntityRef; +use cranelift_frontend::FunctionBuilder; use cranelift_wasm::{ self, FuncIndex, GlobalIndex, GlobalVariable, MemoryIndex, SignatureIndex, TableIndex, TargetEnvironment, WasmError, WasmResult, WasmType, @@ -169,6 +170,11 @@ declare_builtin_functions! { table_grow_funcref(vmctx, i32, i32, pointer) -> (i32); /// Returns an index for Wasm's `table.grow` instruction for `externref`s. table_grow_externref(vmctx, i32, i32, reference) -> (i32); + /// Returns an index to drop a `VMExternRef`. + drop_externref(pointer) -> (); + /// Returns an index to do a GC and then insert a `VMExternRef` into the + /// `VMExternRefActivationsTable`. + activations_table_insert_with_gc(vmctx, reference) -> (); } impl BuiltinFunctionIndex { @@ -392,6 +398,40 @@ impl<'module_environment> FuncEnvironment<'module_environment> { (base, func_addr) } + + /// Generate code to increment or decrement the given `externref`'s + /// reference count. + /// + /// The new reference count is returned. + fn mutate_extenref_ref_count( + &mut self, + builder: &mut FunctionBuilder, + externref: ir::Value, + delta: i64, + ) -> ir::Value { + debug_assert!(delta == -1 || delta == 1); + + let pointer_type = self.pointer_type(); + let ref_count_offset = ir::immediates::Offset32::new( + i32::try_from(VMOffsets::vm_extern_data_ref_count()).unwrap(), + ); + + let old_ref_count = builder.ins().load( + pointer_type, + ir::MemFlags::trusted(), + externref, + ref_count_offset, + ); + let new_ref_count = builder.ins().iadd_imm(old_ref_count, delta); + builder.ins().store( + ir::MemFlags::trusted(), + new_ref_count, + externref, + ref_count_offset, + ); + + new_ref_count + } } // TODO: This is necessary as if Lightbeam used `FuncEnvironment` directly it would cause @@ -593,9 +633,10 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m readonly: false, }); - let element_size = match self.module.table_plans[index].style { - TableStyle::CallerChecksSignature => u64::from(self.pointer_type().bytes()), - }; + let element_size = u64::from( + self.reference_type(self.module.table_plans[index].table.wasm_ty) + .bytes(), + ); Ok(func.create_table(ir::TableData { base_gv, @@ -646,27 +687,325 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m fn translate_table_get( &mut self, - _: cranelift_codegen::cursor::FuncCursor<'_>, - _: TableIndex, - _: ir::Table, - _: ir::Value, + builder: &mut FunctionBuilder, + table_index: TableIndex, + table: ir::Table, + index: ir::Value, ) -> WasmResult { - Err(WasmError::Unsupported( - "the `table.get` instruction is not supported yet".into(), - )) + let pointer_type = self.pointer_type(); + + let plan = &self.module.table_plans[table_index]; + match plan.table.wasm_ty { + WasmType::FuncRef => match plan.style { + TableStyle::CallerChecksSignature => { + let table_entry_addr = builder.ins().table_addr(pointer_type, table, index, 0); + Ok(builder.ins().load( + pointer_type, + ir::MemFlags::trusted(), + table_entry_addr, + 0, + )) + } + }, + WasmType::ExternRef => { + // Our read barrier for `externref` tables is roughly equivalent + // to the following pseudocode: + // + // ``` + // let elem = table[index] + // if elem is not null: + // let (next, end) = VMExternRefActivationsTable bump region + // if next != end: + // elem.ref_count += 1 + // *next = elem + // next += 1 + // else: + // call activations_table_insert_with_gc(elem) + // return elem + // ``` + // + // This ensures that all `externref`s coming out of tables and + // onto the stack are safely held alive by the + // `VMExternRefActivationsTable`. + + let reference_type = self.reference_type(WasmType::ExternRef); + + let continue_block = builder.create_block(); + let non_null_elem_block = builder.create_block(); + let gc_block = builder.create_block(); + let no_gc_block = builder.create_block(); + let current_block = builder.current_block().unwrap(); + builder + .func + .layout + .insert_block_after(non_null_elem_block, current_block); + builder + .func + .layout + .insert_block_after(no_gc_block, non_null_elem_block); + builder + .func + .layout + .insert_block_after(gc_block, no_gc_block); + builder + .func + .layout + .insert_block_after(continue_block, gc_block); + + // Load the table element. + let elem_addr = builder.ins().table_addr(pointer_type, table, index, 0); + let elem = + builder + .ins() + .load(reference_type, ir::MemFlags::trusted(), elem_addr, 0); + + let elem_is_null = builder.ins().is_null(elem); + builder.ins().brnz(elem_is_null, continue_block, &[]); + builder.ins().jump(non_null_elem_block, &[]); + + // Load the `VMExternRefActivationsTable::next` bump finger and + // the `VMExternRefActivationsTable::end` bump boundary. + builder.switch_to_block(non_null_elem_block); + let vmctx = self.vmctx(&mut builder.func); + let vmctx = builder.ins().global_value(pointer_type, vmctx); + let activations_table = builder.ins().load( + pointer_type, + ir::MemFlags::trusted(), + vmctx, + i32::try_from(self.offsets.vmctx_externref_activations_table()).unwrap(), + ); + let next = builder.ins().load( + pointer_type, + ir::MemFlags::trusted(), + activations_table, + i32::try_from(self.offsets.vm_extern_ref_activation_table_next()).unwrap(), + ); + let end = builder.ins().load( + pointer_type, + ir::MemFlags::trusted(), + activations_table, + i32::try_from(self.offsets.vm_extern_ref_activation_table_end()).unwrap(), + ); + + // If `next == end`, then we are at full capacity. Call a + // builtin to do a GC and insert this reference into the + // just-swept table for us. + let at_capacity = builder.ins().icmp(ir::condcodes::IntCC::Equal, next, end); + builder.ins().brnz(at_capacity, gc_block, &[]); + builder.ins().jump(no_gc_block, &[]); + builder.switch_to_block(gc_block); + let builtin_idx = BuiltinFunctionIndex::activations_table_insert_with_gc(); + let builtin_sig = self + .builtin_function_signatures + .activations_table_insert_with_gc(builder.func); + let (vmctx, builtin_addr) = self + .translate_load_builtin_function_address(&mut builder.cursor(), builtin_idx); + builder + .ins() + .call_indirect(builtin_sig, builtin_addr, &[vmctx, elem]); + builder.ins().jump(continue_block, &[]); + + // If `next != end`, then: + // + // * increment this reference's ref count, + // * store the reference into the bump table at `*next`, + // * and finally increment the `next` bump finger. + builder.switch_to_block(no_gc_block); + self.mutate_extenref_ref_count(builder, elem, 1); + builder.ins().store(ir::MemFlags::trusted(), elem, next, 0); + + let new_next = builder + .ins() + .iadd_imm(next, i64::from(reference_type.bytes())); + builder.ins().store( + ir::MemFlags::trusted(), + new_next, + activations_table, + i32::try_from(self.offsets.vm_extern_ref_activation_table_next()).unwrap(), + ); + + builder.ins().jump(continue_block, &[]); + builder.switch_to_block(continue_block); + + builder.seal_block(non_null_elem_block); + builder.seal_block(gc_block); + builder.seal_block(no_gc_block); + builder.seal_block(continue_block); + + Ok(elem) + } + ty => Err(WasmError::Unsupported(format!( + "unsupported table type for `table.get` instruction: {:?}", + ty + ))), + } } fn translate_table_set( &mut self, - _: cranelift_codegen::cursor::FuncCursor<'_>, - _: TableIndex, - _: ir::Table, - _: ir::Value, - _: ir::Value, + builder: &mut FunctionBuilder, + table_index: TableIndex, + table: ir::Table, + value: ir::Value, + index: ir::Value, ) -> WasmResult<()> { - Err(WasmError::Unsupported( - "the `table.set` instruction is not supported yet".into(), - )) + let pointer_type = self.pointer_type(); + + let plan = &self.module.table_plans[table_index]; + match plan.table.wasm_ty { + WasmType::FuncRef => match plan.style { + TableStyle::CallerChecksSignature => { + let table_entry_addr = builder.ins().table_addr(pointer_type, table, index, 0); + builder + .ins() + .store(ir::MemFlags::trusted(), value, table_entry_addr, 0); + Ok(()) + } + }, + WasmType::ExternRef => { + // Our write barrier for `externref`s being copied out of the + // stack and into a table is roughly equivalent to the following + // pseudocode: + // + // ``` + // if value != null: + // value.ref_count += 1 + // let current_elem = table[index] + // table[index] = value + // if current_elem != null: + // current_elem.ref_count -= 1 + // if current_elem.ref_count == 0: + // call drop_externref(current_elem) + // ``` + // + // This write barrier is responsible for ensuring that: + // + // 1. The value's ref count is incremented now that the + // table is holding onto it. This is required for memory safety. + // + // 2. The old table element, if any, has its ref count + // decremented, and that the wrapped data is dropped if the + // ref count reaches zero. This is not required for memory + // safety, but is required to avoid leaks. Furthermore, the + // destructor might GC or touch this table, so we must only + // drop the old table element *after* we've replaced it with + // the new `value`! + + let current_block = builder.current_block().unwrap(); + let inc_ref_count_block = builder.create_block(); + builder + .func + .layout + .insert_block_after(inc_ref_count_block, current_block); + let check_current_elem_block = builder.create_block(); + builder + .func + .layout + .insert_block_after(check_current_elem_block, inc_ref_count_block); + let dec_ref_count_block = builder.create_block(); + builder + .func + .layout + .insert_block_after(dec_ref_count_block, check_current_elem_block); + let drop_block = builder.create_block(); + builder + .func + .layout + .insert_block_after(drop_block, dec_ref_count_block); + let continue_block = builder.create_block(); + builder + .func + .layout + .insert_block_after(continue_block, drop_block); + + // Calculate the table address of the current element and do + // bounds checks. This is the first thing we do, because we + // don't want to modify any ref counts if this `table.set` is + // going to trap. + let table_entry_addr = builder.ins().table_addr(pointer_type, table, index, 0); + + // If value is not null, increment `value`'s ref count. + // + // This has to come *before* decrementing the current table + // element's ref count, because it might reach ref count == zero, + // causing us to deallocate the current table element. However, + // if `value` *is* the current table element (and therefore this + // whole `table.set` is a no-op), then we would incorrectly + // deallocate `value` and leave it in the table, leading to use + // after free. + let value_is_null = builder.ins().is_null(value); + builder + .ins() + .brnz(value_is_null, check_current_elem_block, &[]); + builder.ins().jump(inc_ref_count_block, &[]); + builder.switch_to_block(inc_ref_count_block); + self.mutate_extenref_ref_count(builder, value, 1); + builder.ins().jump(check_current_elem_block, &[]); + + // Grab the current element from the table, and store the new + // `value` into the table. + // + // Note that we load the current element as a pointer, not a + // reference. This is so that if we call out-of-line to run its + // destructor, and its destructor triggers GC, this reference is + // not recorded in the stack map (which would lead to the GC + // saving a reference to a deallocated object, and then using it + // after its been freed). + builder.switch_to_block(check_current_elem_block); + let current_elem = + builder + .ins() + .load(pointer_type, ir::MemFlags::trusted(), table_entry_addr, 0); + builder + .ins() + .store(ir::MemFlags::trusted(), value, table_entry_addr, 0); + + // If the current element is non-null, decrement its reference + // count. And if its reference count has reached zero, then make + // an out-of-line call to deallocate it. + let current_elem_is_null = + builder + .ins() + .icmp_imm(ir::condcodes::IntCC::Equal, current_elem, 0); + builder + .ins() + .brz(current_elem_is_null, dec_ref_count_block, &[]); + builder.ins().jump(continue_block, &[]); + + builder.switch_to_block(dec_ref_count_block); + let ref_count = self.mutate_extenref_ref_count(builder, current_elem, -1); + builder.ins().brz(ref_count, drop_block, &[]); + builder.ins().jump(continue_block, &[]); + + // Call the `drop_externref` builtin to (you guessed it) drop + // the `externref`. + builder.switch_to_block(drop_block); + let builtin_idx = BuiltinFunctionIndex::drop_externref(); + let builtin_sig = self + .builtin_function_signatures + .drop_externref(builder.func); + let (_vmctx, builtin_addr) = self + .translate_load_builtin_function_address(&mut builder.cursor(), builtin_idx); + builder + .ins() + .call_indirect(builtin_sig, builtin_addr, &[current_elem]); + builder.ins().jump(continue_block, &[]); + + builder.switch_to_block(continue_block); + + builder.seal_block(inc_ref_count_block); + builder.seal_block(check_current_elem_block); + builder.seal_block(dec_ref_count_block); + builder.seal_block(drop_block); + builder.seal_block(continue_block); + + Ok(()) + } + ty => Err(WasmError::Unsupported(format!( + "unsupported table type for `table.set` instruction: {:?}", + ty + ))), + } } fn translate_table_fill( diff --git a/crates/runtime/src/externref.rs b/crates/runtime/src/externref.rs index 2272c3bcb865..48ef51f54cc3 100644 --- a/crates/runtime/src/externref.rs +++ b/crates/runtime/src/externref.rs @@ -168,7 +168,7 @@ use wasmtime_environ::{ir::Stackmap, StackMapInformation}; pub struct VMExternRef(NonNull); #[repr(C)] -struct VMExternData { +pub(crate) struct VMExternData { // Implicit, dynamically-sized member that always preceded an // `VMExternData`. // @@ -237,7 +237,7 @@ impl VMExternData { } /// Drop the inner value and then free this `VMExternData` heap allocation. - unsafe fn drop_and_dealloc(mut data: NonNull) { + pub(crate) unsafe fn drop_and_dealloc(mut data: NonNull) { // Note: we introduce a block scope so that we drop the live // reference to the data before we free the heap allocation it // resides within after this block. @@ -614,17 +614,39 @@ impl VMExternRefActivationsTable { } } - fn insert_precise_stack_root(&self, root: NonNull) { - let mut precise_stack_roots = self.precise_stack_roots.borrow_mut(); + fn insert_precise_stack_root( + precise_stack_roots: &mut HashSet, + root: NonNull, + ) { let root = unsafe { VMExternRef::clone_from_raw(root.as_ptr() as *mut _) }; precise_stack_roots.insert(VMExternRefWithTraits(root)); } /// Sweep the bump allocation table after we've discovered our precise stack /// roots. - fn sweep(&self) { + fn sweep(&self, precise_stack_roots: &mut HashSet) { + // Swap out the over-approximated set so we can distinguish between the + // over-approximation before we started sweeping, and any new elements + // we might insert into the table because of re-entering Wasm via an + // `externref`'s destructor. The new elements must be kept alive for + // memory safety, but we keep this set around because we likely want to + // reuse its allocation/capacity for the new `precise_stack_roots` in + // the next GC cycle. + let mut old_over_approximated = mem::replace( + &mut *self.over_approximated_stack_roots.borrow_mut(), + Default::default(), + ); + // Sweep our bump chunk. + // + // Just in case an `externref` destructor calls back into Wasm, passing + // more `externref`s into that Wasm, which requires the `externref`s to + // be inserted into this `VMExternRefActivationsTable`, make sure `next + // == end` so that they go into the over-approximation hash set. let num_filled = self.num_filled_in_bump_chunk(); + unsafe { + *self.next.get() = self.end; + } for slot in self.chunk.iter().take(num_filled) { unsafe { *slot.get() = None; @@ -637,22 +659,35 @@ impl VMExternRefActivationsTable { "after sweeping the bump chunk, all slots should be `None`" ); - // Reset our `next` bump allocation finger. + // Reset our `next` finger to the start of the bump allocation chunk. unsafe { let next = self.chunk.as_ptr() as *mut TableElem; debug_assert!(!next.is_null()); *self.next.get() = NonNull::new_unchecked(next); } - // The current `precise_roots` becomes our new over-appoximated set for - // the next GC cycle. - let mut precise_roots = self.precise_stack_roots.borrow_mut(); + // The current `precise_stack_roots` becomes our new over-appoximated + // set for the next GC cycle. let mut over_approximated = self.over_approximated_stack_roots.borrow_mut(); - mem::swap(&mut *precise_roots, &mut *over_approximated); + mem::swap(&mut *precise_stack_roots, &mut *over_approximated); - // And finally, the new `precise_roots` should be cleared and remain - // empty until the next GC cycle. - precise_roots.clear(); + // And finally, the new `precise_stack_roots` should be cleared and + // remain empty until the next GC cycle. + // + // However, if an `externref` destructor called re-entered Wasm with + // more `externref`s, then the temp over-approximated set we were using + // during sweeping (now `precise_stack_roots`) is not empty, and we need + // to keep its references alive in our new over-approximated set. + over_approximated.extend(precise_stack_roots.drain()); + + // If we didn't re-enter Wasm during destructors (likely), + // `precise_stack_roots` has zero capacity, and the old + // over-approximated has a bunch of capacity. Reuse whichever set has + // most capacity. + if old_over_approximated.capacity() > precise_stack_roots.capacity() { + old_over_approximated.clear(); + *precise_stack_roots = old_over_approximated; + } } /// Set the stack canary around a call into Wasm. @@ -944,6 +979,20 @@ pub unsafe fn gc( stack_maps_registry: &StackMapRegistry, externref_activations_table: &VMExternRefActivationsTable, ) { + // We borrow the precise stack roots `RefCell` for the whole duration of + // GC. Whether it is dynamically borrowed serves as a flag for detecting + // re-entrancy into GC. Re-entrancy can occur if we do a GC, drop an + // `externref`, and that `externref`'s destructor then triggers another + // GC. Whenever we detect re-entrancy, we return and give the first, + // outermost GC call priority. + let mut precise_stack_roots = match externref_activations_table + .precise_stack_roots + .try_borrow_mut() + { + Err(_) => return, + Ok(roots) => roots, + }; + log::debug!("start GC"); debug_assert!({ @@ -952,7 +1001,6 @@ pub unsafe fn gc( // into the activations table's bump-allocated space at the // end. Therefore, it should always be empty upon entering this // function. - let precise_stack_roots = externref_activations_table.precise_stack_roots.borrow(); precise_stack_roots.is_empty() }); @@ -971,7 +1019,7 @@ pub unsafe fn gc( true }); } - externref_activations_table.sweep(); + externref_activations_table.sweep(&mut precise_stack_roots); log::debug!("end GC"); return; } @@ -1029,7 +1077,10 @@ pub unsafe fn gc( have an entry in the VMExternRefActivationsTable" ); if let Some(r) = NonNull::new(r) { - externref_activations_table.insert_precise_stack_root(r); + VMExternRefActivationsTable::insert_precise_stack_root( + &mut precise_stack_roots, + r, + ); } } } @@ -1056,11 +1107,10 @@ pub unsafe fn gc( // would free those missing roots while they are still in use, leading to // use-after-free. if found_canary { - externref_activations_table.sweep(); + externref_activations_table.sweep(&mut precise_stack_roots); } else { log::warn!("did not find stack canary; skipping GC sweep"); - let mut roots = externref_activations_table.precise_stack_roots.borrow_mut(); - roots.clear(); + precise_stack_roots.clear(); } log::debug!("end GC"); diff --git a/crates/runtime/src/libcalls.rs b/crates/runtime/src/libcalls.rs index facdc8d7466f..eb62d21311cf 100644 --- a/crates/runtime/src/libcalls.rs +++ b/crates/runtime/src/libcalls.rs @@ -60,6 +60,7 @@ use crate::externref::VMExternRef; use crate::table::Table; use crate::traphandlers::raise_lib_trap; use crate::vmcontext::{VMCallerCheckedAnyfunc, VMContext}; +use std::ptr::NonNull; use wasmtime_environ::wasm::{ DataIndex, DefinedMemoryIndex, ElemIndex, MemoryIndex, TableElementType, TableIndex, }; @@ -409,3 +410,23 @@ pub unsafe extern "C" fn wasmtime_data_drop(vmctx: *mut VMContext, data_index: u let instance = (&mut *vmctx).instance(); instance.data_drop(data_index) } + +/// Drop a `VMExternRef`. +pub unsafe extern "C" fn wasmtime_drop_externref(externref: *mut u8) { + let externref = externref as *mut crate::externref::VMExternData; + let externref = NonNull::new(externref).unwrap(); + crate::externref::VMExternData::drop_and_dealloc(externref); +} + +/// Do a GC and insert the given `externref` into the +/// `VMExternRefActivationsTable`. +pub unsafe extern "C" fn wasmtime_activations_table_insert_with_gc( + vmctx: *mut VMContext, + externref: *mut u8, +) { + let externref = VMExternRef::clone_from_raw(externref); + let instance = (&mut *vmctx).instance(); + let activations_table = &**instance.externref_activations_table(); + let registry = &**instance.stack_map_registry(); + activations_table.insert_with_gc(externref, registry); +} diff --git a/crates/runtime/src/vmcontext.rs b/crates/runtime/src/vmcontext.rs index d6df7ab75c15..c98c75b362da 100644 --- a/crates/runtime/src/vmcontext.rs +++ b/crates/runtime/src/vmcontext.rs @@ -555,6 +555,10 @@ impl VMBuiltinFunctionsArray { wasmtime_imported_memory_fill as usize; ptrs[BuiltinFunctionIndex::memory_init().index() as usize] = wasmtime_memory_init as usize; ptrs[BuiltinFunctionIndex::data_drop().index() as usize] = wasmtime_data_drop as usize; + ptrs[BuiltinFunctionIndex::drop_externref().index() as usize] = + wasmtime_drop_externref as usize; + ptrs[BuiltinFunctionIndex::activations_table_insert_with_gc().index() as usize] = + wasmtime_activations_table_insert_with_gc as usize; if cfg!(debug_assertions) { for i in 0..ptrs.len() { diff --git a/tests/all/gc.rs b/tests/all/gc.rs index 4b4d8849dbf5..09eacd93e4f2 100644 --- a/tests/all/gc.rs +++ b/tests/all/gc.rs @@ -3,6 +3,26 @@ use std::cell::Cell; use std::rc::Rc; use wasmtime::*; +struct SetFlagOnDrop(Rc>); + +impl Drop for SetFlagOnDrop { + fn drop(&mut self) { + self.0.set(true); + } +} + +struct GcOnDrop { + store: Store, + gc_count: Rc>, +} + +impl Drop for GcOnDrop { + fn drop(&mut self) { + self.store.gc(); + self.gc_count.set(self.gc_count.get() + 1); + } +} + #[test] fn smoke_test_gc() -> anyhow::Result<()> { let (store, module) = ref_types_module( @@ -57,15 +77,7 @@ fn smoke_test_gc() -> anyhow::Result<()> { drop(r); assert!(inner_dropped.get()); - return Ok(()); - - struct SetFlagOnDrop(Rc>); - - impl Drop for SetFlagOnDrop { - fn drop(&mut self) { - self.0.set(true); - } - } + Ok(()) } #[test] @@ -210,3 +222,301 @@ fn many_live_refs() -> anyhow::Result<()> { } } } + +#[test] +fn drop_externref_via_table_set() -> anyhow::Result<()> { + let (store, module) = ref_types_module( + r#" + (module + (table $t 1 externref) + + (func (export "table-set") (param externref) + (table.set $t (i32.const 0) (local.get 0)) + ) + ) + "#, + )?; + + let instance = Instance::new(&store, &module, &[])?; + let table_set = instance.get_func("table-set").unwrap(); + + let foo_is_dropped = Rc::new(Cell::new(false)); + let bar_is_dropped = Rc::new(Cell::new(false)); + + let foo = ExternRef::new(SetFlagOnDrop(foo_is_dropped.clone())); + let bar = ExternRef::new(SetFlagOnDrop(bar_is_dropped.clone())); + + { + let args = vec![Val::ExternRef(Some(foo))]; + table_set.call(&args)?; + } + store.gc(); + assert!(!foo_is_dropped.get()); + assert!(!bar_is_dropped.get()); + + { + let args = vec![Val::ExternRef(Some(bar))]; + table_set.call(&args)?; + } + store.gc(); + assert!(foo_is_dropped.get()); + assert!(!bar_is_dropped.get()); + + table_set.call(&[Val::ExternRef(None)])?; + assert!(foo_is_dropped.get()); + assert!(bar_is_dropped.get()); + + Ok(()) +} + +#[test] +fn gc_in_externref_dtor() -> anyhow::Result<()> { + let (store, module) = ref_types_module( + r#" + (module + (table $t 1 externref) + + (func (export "table-set") (param externref) + (table.set $t (i32.const 0) (local.get 0)) + ) + ) + "#, + )?; + + let instance = Instance::new(&store, &module, &[])?; + let table_set = instance.get_func("table-set").unwrap(); + + let gc_count = Rc::new(Cell::new(0)); + + // Put a `GcOnDrop` into the table. + { + let args = vec![Val::ExternRef(Some(ExternRef::new(GcOnDrop { + store: store.clone(), + gc_count: gc_count.clone(), + })))]; + table_set.call(&args)?; + } + + // Remove the `GcOnDrop` from the `VMExternRefActivationsTable`. + store.gc(); + + // Overwrite the `GcOnDrop` table element, causing it to be dropped, and + // triggering a GC. + table_set.call(&[Val::ExternRef(None)])?; + assert_eq!(gc_count.get(), 1); + + Ok(()) +} + +#[test] +fn touch_own_table_element_in_externref_dtor() -> anyhow::Result<()> { + let (store, module) = ref_types_module( + r#" + (module + (table $t (export "table") 1 externref) + + (func (export "table-set") (param externref) + (table.set $t (i32.const 0) (local.get 0)) + ) + ) + "#, + )?; + + let instance = Instance::new(&store, &module, &[])?; + let table = instance.get_table("table").unwrap(); + let table_set = instance.get_func("table-set").unwrap(); + + let touched = Rc::new(Cell::new(false)); + + { + let args = vec![Val::ExternRef(Some(ExternRef::new(TouchTableOnDrop { + table, + touched: touched.clone(), + })))]; + table_set.call(&args)?; + } + + // Remove the `TouchTableOnDrop` from the `VMExternRefActivationsTable`. + store.gc(); + + table_set.call(&[Val::ExternRef(Some(ExternRef::new("hello".to_string())))])?; + assert!(touched.get()); + + return Ok(()); + + struct TouchTableOnDrop { + table: Table, + touched: Rc>, + } + + impl Drop for TouchTableOnDrop { + fn drop(&mut self) { + // From the `Drop` implementation, we see the new table element, not + // `self`. + let elem = self.table.get(0).unwrap().unwrap_externref().unwrap(); + assert!(elem.data().is::()); + assert_eq!(elem.data().downcast_ref::().unwrap(), "hello"); + self.touched.set(true); + } + } +} + +#[test] +fn gc_during_gc_when_passing_refs_into_wasm() -> anyhow::Result<()> { + let (store, module) = ref_types_module( + r#" + (module + (table $t 1 externref) + (func (export "f") (param externref) + (table.set $t (i32.const 0) (local.get 0)) + ) + ) + "#, + )?; + + let instance = Instance::new(&store, &module, &[])?; + let f = instance.get_func("f").unwrap(); + + let gc_count = Rc::new(Cell::new(0)); + + for _ in 0..1024 { + let args = vec![Val::ExternRef(Some(ExternRef::new(GcOnDrop { + store: store.clone(), + gc_count: gc_count.clone(), + })))]; + f.call(&args)?; + } + + f.call(&[Val::ExternRef(None)])?; + store.gc(); + assert_eq!(gc_count.get(), 1024); + + Ok(()) +} + +#[test] +fn gc_during_gc_from_many_table_gets() -> anyhow::Result<()> { + let (store, module) = ref_types_module( + r#" + (module + (import "" "" (func $observe_ref (param externref))) + (table $t 1 externref) + (func (export "init") (param externref) + (table.set $t (i32.const 0) (local.get 0)) + ) + (func (export "run") (param i32) + (loop $continue + (if (i32.eqz (local.get 0)) (return)) + (call $observe_ref (table.get $t (i32.const 0))) + (local.set 0 (i32.sub (local.get 0) (i32.const 1))) + (br $continue) + ) + ) + ) + "#, + )?; + + let observe_ref = Func::new( + &store, + FuncType::new( + vec![ValType::ExternRef].into_boxed_slice(), + vec![].into_boxed_slice(), + ), + |_caller, _params, _results| Ok(()), + ); + + let instance = Instance::new(&store, &module, &[observe_ref.into()])?; + let init = instance.get_func("init").unwrap(); + let run = instance.get_func("run").unwrap(); + + let gc_count = Rc::new(Cell::new(0)); + + // Initialize the table element with a `GcOnDrop`. This also puts it in the + // `VMExternRefActivationsTable`. + { + let args = vec![Val::ExternRef(Some(ExternRef::new(GcOnDrop { + store: store.clone(), + gc_count: gc_count.clone(), + })))]; + init.call(&args)?; + } + + // Overwrite the `GcOnDrop` with another reference. The `GcOnDrop` is still + // in the `VMExternRefActivationsTable`. + { + let args = vec![Val::ExternRef(Some(ExternRef::new(String::from("hello"))))]; + init.call(&args)?; + } + + // Now call `run`, which does a bunch of `table.get`s, filling up the + // `VMExternRefActivationsTable`'s bump region, and eventually triggering a + // GC that will deallocate our `GcOnDrop` which will also trigger a nested + // GC. + run.call(&[Val::I32(1024)])?; + + // We should have done our nested GC. + assert_eq!(gc_count.get(), 1); + + Ok(()) +} + +#[test] +fn pass_externref_into_wasm_during_destructor_in_gc() -> anyhow::Result<()> { + let (store, module) = ref_types_module( + r#" + (module + (table $t 1 externref) + + (func (export "f") (param externref) + nop + ) + ) + "#, + )?; + + let instance = Instance::new(&store, &module, &[])?; + let f = instance.get_func("f").unwrap(); + let r = ExternRef::new("hello"); + let did_call = Rc::new(Cell::new(false)); + + // Put a `CallOnDrop` into the `VMExternRefActivationsTable`. + { + let args = vec![Val::ExternRef(Some(ExternRef::new(CallOnDrop( + f.clone(), + r.clone(), + did_call.clone(), + ))))]; + f.call(&args)?; + } + + // One ref count for `r`, one for the `CallOnDrop`. + assert_eq!(r.strong_count(), 2); + + // Do a GC, which will see that the only reference holding the `CallOnDrop` + // is the `VMExternRefActivationsTable`, and will drop it. Dropping it will + // cause it to call into `f` again. + store.gc(); + assert!(did_call.get()); + + // The `CallOnDrop` is no longer holding onto `r`, but the + // `VMExternRefActivationsTable` is. + assert_eq!(r.strong_count(), 2); + + // GC again to empty the `VMExternRefActivationsTable`. Now `r` is the only + // thing holding its `externref` alive. + store.gc(); + assert_eq!(r.strong_count(), 1); + + return Ok(()); + + struct CallOnDrop(Func, ExternRef, Rc>); + + impl Drop for CallOnDrop { + fn drop(&mut self) { + self.0 + .call(&[Val::ExternRef(Some(self.1.clone()))]) + .unwrap(); + self.2.set(true); + } + } +} diff --git a/tests/misc_testsuite/reference-types/many_table_gets_lead_to_gc.wast b/tests/misc_testsuite/reference-types/many_table_gets_lead_to_gc.wast new file mode 100644 index 000000000000..b0827dbdabbc --- /dev/null +++ b/tests/misc_testsuite/reference-types/many_table_gets_lead_to_gc.wast @@ -0,0 +1,35 @@ +(module + (table $t 1 externref) + + (func (export "init") (param externref) + (table.set $t (i32.const 0) (local.get 0)) + ) + + (func (export "get-many-externrefs") (param $i i32) + (loop $continue + ;; Exit when our loop counter `$i` reaches zero. + (if (i32.eqz (local.get $i)) + (return) + ) + + ;; Get an `externref` out of the table. This could cause the + ;; `VMExternRefActivationsTable`'s bump region to reach full capacity, + ;; which triggers a GC. + ;; + ;; Set the table element back into the table, just so that the element is + ;; still considered live at the time of the `table.get`, it ends up in the + ;; stack map, and we poke more of our GC bits. + (table.set $t (i32.const 0) (table.get $t (i32.const 0))) + + ;; Decrement our loop counter `$i`. + (local.set $i (i32.sub (local.get $i) (i32.const 1))) + + ;; Continue to the next loop iteration. + (br $continue) + ) + unreachable + ) +) + +(invoke "init" (ref.extern 1)) +(invoke "get-many-externrefs" (i32.const 8192))