Skip to content

Commit

Permalink
Add support for funcrefs inside GC objects
Browse files Browse the repository at this point in the history
  • Loading branch information
fitzgen committed Oct 1, 2024
1 parent eb07b15 commit 71efbcf
Show file tree
Hide file tree
Showing 11 changed files with 521 additions and 22 deletions.
92 changes: 83 additions & 9 deletions crates/cranelift/src/gc/enabled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ use cranelift_codegen::{
cursor::FuncCursor,
ir::{self, condcodes::IntCC, InstBuilder},
};
use cranelift_entity::packed_option::ReservedValue;
use cranelift_frontend::FunctionBuilder;
use cranelift_wasm::{
wasm_unsupported, ModuleInternedTypeIndex, StructFieldsVec, TargetEnvironment, TypeIndex,
WasmCompositeType, WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, WasmStorageType,
WasmValType,
ModuleInternedTypeIndex, StructFieldsVec, TargetEnvironment, TypeIndex, WasmCompositeType,
WasmHeapTopType, WasmHeapType, WasmRefType, WasmResult, WasmStorageType, WasmValType,
};
use wasmtime_environ::{
GcArrayLayout, GcLayout, GcStructLayout, PtrSize, I31_DISCRIMINANT, NON_NULL_NON_I31_MASK,
Expand Down Expand Up @@ -86,9 +86,40 @@ fn read_field_at_addr(
WasmHeapTopType::Any | WasmHeapTopType::Extern => gc_compiler(func_env)?
.translate_read_gc_reference(func_env, builder, r, addr, flags)?,
WasmHeapTopType::Func => {
return Err(wasm_unsupported!(
"funcrefs inside the GC heap are not yet implemented"
));
let expected_ty = match r.heap_type {
WasmHeapType::Func => ModuleInternedTypeIndex::reserved_value(),
WasmHeapType::ConcreteFunc(ty) => ty.unwrap_module_type_index(),
WasmHeapType::NoFunc => {
let null = builder.ins().iconst(func_env.pointer_type(), 0);
if !r.nullable {
// Because `nofunc` is uninhabited, and this
// reference is non-null, this is unreachable
// code. Unconditionally trap via conditional
// trap instructions to avoid inserting block
// terminators in the middle of this block.
builder
.ins()
.trapz(null, ir::TrapCode::User(DEBUG_ASSERT_TRAP_CODE));
}
return Ok(null);
}
_ => unreachable!("not a function heap type"),
};
let expected_ty = builder
.ins()
.iconst(ir::types::I32, i64::from(expected_ty.as_bits()));

let vmctx = func_env.vmctx_val(&mut builder.cursor());

let func_ref_id = builder.ins().load(ir::types::I32, flags, addr, 0);
let get_interned_func_ref = func_env
.builtin_functions
.get_interned_func_ref(builder.func);

let call_inst = builder
.ins()
.call(get_interned_func_ref, &[vmctx, func_ref_id, expected_ty]);
builder.func.dfg.first_result(call_inst)
}
},
},
Expand All @@ -103,6 +134,51 @@ fn read_field_at_addr(
Ok(value)
}

fn write_func_ref_at_addr(
func_env: &mut FuncEnvironment<'_>,
builder: &mut FunctionBuilder<'_>,
ref_type: WasmRefType,
flags: ir::MemFlags,
field_addr: ir::Value,
func_ref: ir::Value,
) -> WasmResult<()> {
assert_eq!(ref_type.heap_type.top(), WasmHeapTopType::Func);

let vmctx = func_env.vmctx_val(&mut builder.cursor());

let intern_func_ref_for_gc_heap = func_env
.builtin_functions
.intern_func_ref_for_gc_heap(builder.func);

let func_ref = if ref_type.heap_type == WasmHeapType::NoFunc {
let null = builder.ins().iconst(func_env.pointer_type(), 0);
if !ref_type.nullable {
// Because `nofunc` is uninhabited, and this reference is
// non-null, this is unreachable code. Unconditionally trap
// via conditional trap instructions to avoid inserting
// block terminators in the middle of this block.
builder
.ins()
.trapz(null, ir::TrapCode::User(DEBUG_ASSERT_TRAP_CODE));
}
null
} else {
func_ref
};

// Convert the raw `funcref` into a `FuncRefTableId` for use in the
// GC heap.
let call_inst = builder
.ins()
.call(intern_func_ref_for_gc_heap, &[vmctx, func_ref]);
let func_ref_id = builder.func.dfg.first_result(call_inst);

// Store the id in the field.
builder.ins().store(flags, func_ref_id, field_addr, 0);

Ok(())
}

fn write_field_at_addr(
func_env: &mut FuncEnvironment<'_>,
builder: &mut FunctionBuilder<'_>,
Expand All @@ -121,9 +197,7 @@ fn write_field_at_addr(
builder.ins().istore16(flags, new_val, field_addr, 0);
}
WasmStorageType::Val(WasmValType::Ref(r)) if r.heap_type.top() == WasmHeapTopType::Func => {
return Err(wasm_unsupported!(
"funcrefs inside the GC heap are not yet implemented"
))
write_func_ref_at_addr(func_env, builder, r, flags, field_addr, new_val)?;
}
WasmStorageType::Val(WasmValType::Ref(r)) => {
gc_compiler(func_env)?
Expand Down
4 changes: 1 addition & 3 deletions crates/cranelift/src/gc/enabled/drc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,7 @@ impl DrcCompiler {
WasmStorageType::Val(WasmValType::Ref(r))
if r.heap_type.top() == WasmHeapTopType::Func =>
{
return Err(wasm_unsupported!(
"funcrefs inside the GC heap are not yet implemented"
));
write_func_ref_at_addr(func_env, builder, r, flags, field_addr, val)?;
}
WasmStorageType::Val(WasmValType::Ref(r)) => {
self.translate_init_gc_reference(func_env, builder, r, field_addr, val, flags)?;
Expand Down
37 changes: 36 additions & 1 deletion crates/environ/src/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,40 @@ macro_rules! foreach_builtin_function {
align: i32
) -> reference;

// Intern a `funcref` into the GC heap, returning its
// `FuncRefTableId`.
//
// This libcall may not GC.
#[cfg(feature = "gc")]
intern_func_ref_for_gc_heap(
vmctx: vmctx,
func_ref: pointer
) -> i32;

// Get the raw `VMFuncRef` pointer associated with a
// `FuncRefTableId` from an earlier `intern_func_ref_for_gc_heap`
// call.
//
// This libcall may not GC.
//
// Passes in the `ModuleInternedTypeIndex` of the funcref's expected
// type, or `ModuleInternedTypeIndex::reserved_value()` if we are
// getting the function reference as an untyped `funcref` rather
// than a typed `(ref $ty)`.
//
// TODO: We will want to eventually expose the table directly to
// Wasm code, so that it doesn't need to make a libcall to go from
// id to `VMFuncRef`. That will be a little tricky: it will also
// require updating the pointer to the slab in the `VMContext` (or
// `VMRuntimeLimits` or wherever we put it) when the slab is
// resized.
#[cfg(feature = "gc")]
get_interned_func_ref(
vmctx: vmctx,
func_ref_id: i32,
module_interned_type_index: i32
) -> pointer;

// Builtin implementation of the `array.new_data` instruction.
#[cfg(feature = "gc")]
array_new_data(
Expand Down Expand Up @@ -141,8 +175,9 @@ macro_rules! foreach_builtin_function {
vmctx: vmctx,
array_interned_type_index: i32,
array: reference,
dst_index: i32,
dst: i32,
elem_index: i32,
src: i32,
len: i32
);

Expand Down
7 changes: 7 additions & 0 deletions crates/wasmtime/src/runtime/vm/gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ mod disabled;
#[cfg(not(feature = "gc"))]
pub use disabled::*;

mod func_ref;
mod gc_ref;
mod gc_runtime;
mod host_data;
mod i31;

pub use func_ref::*;
pub use gc_ref::*;
pub use gc_runtime::*;
pub use host_data::*;
Expand Down Expand Up @@ -42,16 +44,21 @@ pub struct GcStore {

/// The `externref` host data table for this GC heap.
pub host_data_table: ExternRefHostDataTable,

/// The function-references table for this GC heap.
pub func_ref_table: FuncRefTable,
}

impl GcStore {
/// Create a new `GcStore`.
pub fn new(allocation_index: GcHeapAllocationIndex, gc_heap: Box<dyn GcHeap>) -> Self {
let host_data_table = ExternRefHostDataTable::default();
let func_ref_table = FuncRefTable::default();
Self {
allocation_index,
gc_heap,
host_data_table,
func_ref_table,
}
}

Expand Down
46 changes: 37 additions & 9 deletions crates/wasmtime/src/runtime/vm/gc/enabled/arrayref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use crate::{
prelude::*,
runtime::vm::{GcHeap, GcStore, VMGcRef},
store::{AutoAssertNoGc, StoreOpaque},
AnyRef, ExternRef, HeapType, RootedGcRefImpl, StorageType, Val, ValType,
vm::{FuncRefTableId, SendSyncPtr},
AnyRef, ExternRef, Func, HeapType, RootedGcRefImpl, StorageType, Val, ValType,
};
use core::fmt;
use wasmtime_environ::{GcArrayLayout, VMGcKind};
Expand Down Expand Up @@ -163,7 +164,20 @@ impl VMArrayRef {
let raw = data.read_u32(offset);
Val::AnyRef(AnyRef::_from_raw(store, raw))
}
HeapType::Func => todo!("funcrefs inside gc objects not yet implemented"),
HeapType::Func => {
let func_ref_id = data.read_u32(offset);
let func_ref_id = FuncRefTableId::from_raw(func_ref_id);
let func_ref = store
.unwrap_gc_store()
.func_ref_table
.get_untyped(func_ref_id);
Val::FuncRef(unsafe {
Func::from_vm_func_ref(
store,
func_ref.map_or(core::ptr::null_mut(), |f| f.as_ptr()),
)
})
}
otherwise => unreachable!("not a top type: {otherwise:?}"),
},
}
Expand Down Expand Up @@ -236,7 +250,17 @@ impl VMArrayRef {
data.write_u32(offset, gc_ref.map_or(0, |r| r.as_raw_u32()));
}

Val::FuncRef(_) => todo!("funcrefs inside gc objects not yet implemented"),
Val::FuncRef(f) => {
let func_ref = match f {
Some(f) => Some(SendSyncPtr::new(f.vm_func_ref(store))),
None => None,
};
let id = unsafe { store.gc_store_mut()?.func_ref_table.intern(func_ref) };
store
.gc_store_mut()?
.gc_object_data(self.as_gc_ref())
.write_u32(offset, id.into_raw());
}
}
Ok(())
}
Expand Down Expand Up @@ -329,12 +353,16 @@ impl VMArrayRef {
.write_u32(offset, x);
}

Val::FuncRef(_) => {
// TODO: we can't trust the GC heap, which means we can't read
// native VMFuncRef pointers out of it and trust them. That
// means we need to do the same side table kind of thing we do
// with `externref` host data here. This isn't implemented yet.
bail!("funcrefs in GC objects are not yet implemented")
Val::FuncRef(f) => {
let func_ref = match f {
Some(f) => Some(SendSyncPtr::new(f.vm_func_ref(store))),
None => None,
};
let id = unsafe { store.gc_store_mut()?.func_ref_table.intern(func_ref) };
store
.gc_store_mut()?
.gc_object_data(self.as_gc_ref())
.write_u32(offset, id.into_raw());
}
}
Ok(())
Expand Down
96 changes: 96 additions & 0 deletions crates/wasmtime/src/runtime/vm/gc/func_ref.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//! Implementation of the side table for `funcref`s in the GC heap.
//!
//! The actual `VMFuncRef`s are kept in a side table, rather than inside the GC
//! heap, for the same reasons that an `externref`'s host data is kept in a side
//! table. We cannot trust any data coming from the GC heap, but `VMFuncRef`s
//! contain raw pointers, so if we stored `VMFuncRef`s inside the GC heap, we
//! wouldn't be able to use the raw pointers from any `VMFuncRef` we got out of
//! the heap. And that means we wouldn't be able to, for example, call a
//! `funcref` we got from inside the GC heap.

use crate::{
hash_map::HashMap,
type_registry::TypeRegistry,
vm::{SendSyncPtr, VMFuncRef},
};
use wasmtime_environ::VMSharedTypeIndex;
use wasmtime_slab::{Id, Slab};

/// An identifier into the `FuncRefTable`.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct FuncRefTableId(Id);

impl FuncRefTableId {
/// Convert this `FuncRefTableId` into its raw `u32` ID.
pub fn into_raw(self) -> u32 {
self.0.into_raw()
}

/// Create a `FuncRefTableId` from a raw `u32` ID.
pub fn from_raw(raw: u32) -> Self {
Self(Id::from_raw(raw))
}
}

/// Side table mapping `FuncRefTableId`s that can be stored in the GC heap to
/// raw `VMFuncRef`s.
#[derive(Default)]
pub struct FuncRefTable {
interned: HashMap<Option<SendSyncPtr<VMFuncRef>>, FuncRefTableId>,
slab: Slab<Option<SendSyncPtr<VMFuncRef>>>,
}

impl FuncRefTable {
/// Intern a `VMFuncRef` in the side table, returning an ID that can be
/// stored in the GC heap.
///
/// # Safety
///
/// The given `func_ref` must point to a valid `VMFuncRef` and must remain
/// valid for the duration of this table's lifetime.
pub unsafe fn intern(&mut self, func_ref: Option<SendSyncPtr<VMFuncRef>>) -> FuncRefTableId {
*self
.interned
.entry(func_ref)
.or_insert_with(|| FuncRefTableId(self.slab.alloc(func_ref)))
}

/// Get the `VMFuncRef` associated with the given ID.
///
/// Checks that the `VMFuncRef` is a subtype of the expected type.
pub fn get_typed(
&self,
types: &TypeRegistry,
id: FuncRefTableId,
expected_ty: VMSharedTypeIndex,
) -> Option<SendSyncPtr<VMFuncRef>> {
let f = self.slab.get(id.0).copied().expect("bad FuncRefTableId");

if let Some(f) = f {
// The safety contract for `intern` ensures that deref'ing `f` is safe.
let actual_ty = unsafe { f.as_ref().type_index };

// Ensure that the funcref actually is a subtype of the expected
// type. This protects against GC heap corruption being leveraged in
// attacks: if the attacker has a write gadget inside the GC heap, they
// can overwrite a funcref ID to point to a different funcref, but this
// assertion ensures that any calls to that wrong funcref at least
// remain well-typed, which reduces the attack surface and maintains
// memory safety.
assert!(types.is_subtype(actual_ty, expected_ty));
}

f
}

/// Get the `VMFuncRef` associated with the given ID, without checking the
/// type.
///
/// Prefer `get_typed`. This method is only suitable for getting a
/// `VMFuncRef` as an untyped `funcref` function reference, and never as a
/// typed `(ref $some_func_type)` function reference.
pub fn get_untyped(&self, id: FuncRefTableId) -> Option<SendSyncPtr<VMFuncRef>> {
self.slab.get(id.0).copied().expect("bad FuncRefTableId")
}
}
Loading

0 comments on commit 71efbcf

Please sign in to comment.