diff --git a/src/core/error.rs b/src/core/error.rs index 42ba09155..927ca2933 100644 --- a/src/core/error.rs +++ b/src/core/error.rs @@ -1,5 +1,5 @@ use crate::core::indices::GlobalIdx; -use crate::validation_stack::LabelKind; +use crate::validation_stack::LabelInfo; use core::fmt::{Display, Formatter}; use core::str::Utf8Error; @@ -28,6 +28,7 @@ pub enum Error { InvalidNumType, InvalidVecType, InvalidFuncType, + InvalidFuncTypeIdx, InvalidRefType, InvalidValType, InvalidExportDesc(u8), @@ -44,7 +45,8 @@ pub enum Error { InvalidGlobalIdx(GlobalIdx), GlobalIsConst, RuntimeError(RuntimeError), - FoundLabel(LabelKind), + FoundLabel(LabelInfo), + InvalidLabelIdx, } impl Display for Error { @@ -75,6 +77,9 @@ impl Display for Error { Error::InvalidFuncType => { f.write_str("An invalid byte was read where a functype was expected") } + Error::InvalidFuncTypeIdx => { + f.write_str("An invalid index to the fuctypes table was read") + } Error::InvalidRefType => { f.write_str("An invalid byte was read where a reftype was expected") } @@ -108,16 +113,17 @@ impl Display for Error { "An invalid mut/const byte was found: {byte:#x?}" )), Error::MoreThanOneMemory => { - f.write_str("As of not only one memory is allowed per module.") + f.write_str("As of now only one memory is allowed per module.") } Error::InvalidGlobalIdx(idx) => f.write_fmt(format_args!( "An invalid global index `{idx}` was specified" )), Error::GlobalIsConst => f.write_str("A const global cannot be written to"), Error::RuntimeError(err) => err.fmt(f), - Error::FoundLabel(lk) => f.write_fmt(format_args!( - "Expecting a ValType, a Label was found: {lk:?}" + Error::FoundLabel(label_info) => f.write_fmt(format_args!( + "Expecting a ValType, a Label was found: {label_info:?}" )), + Error::InvalidLabelIdx => f.write_str("An invalid index to a label was read"), } } } diff --git a/src/core/mod.rs b/src/core/mod.rs index 92d3b7010..906317509 100644 --- a/src/core/mod.rs +++ b/src/core/mod.rs @@ -2,3 +2,4 @@ pub mod error; pub mod indices; pub mod reader; +pub mod sidetable; diff --git a/src/core/reader/mod.rs b/src/core/reader/mod.rs index f263efbd3..c7d263383 100644 --- a/src/core/reader/mod.rs +++ b/src/core/reader/mod.rs @@ -197,8 +197,8 @@ pub mod span { /// indexing unknown slices, as a [Span] does not validate the length of the indexed slice. #[derive(Copy, Clone, Debug, Hash)] pub struct Span { - pub(super) from: usize, - pub(super) len: usize, + pub(crate) from: usize, + pub(crate) len: usize, } impl Span { diff --git a/src/core/reader/types/mod.rs b/src/core/reader/types/mod.rs index 7a94699de..b28c15a89 100644 --- a/src/core/reader/types/mod.rs +++ b/src/core/reader/types/mod.rs @@ -200,6 +200,86 @@ impl WasmReadable for FuncType { } } +/// +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum BlockType { + Empty, + Returns(ValType), + Type(u32), +} + +impl WasmReadable for BlockType { + fn read(wasm: &mut WasmReader) -> Result { + // FIXME: Use transactions for ValType::read + if wasm.peek_u8()? as i8 == 0x40 { + // Empty block type + let _ = wasm.read_u8().expect("read to succeed, as we just peeked"); + Ok(BlockType::Empty) + } else if let Ok(val_ty) = ValType::read(wasm) { + // No parameters and given valtype as the result + Ok(BlockType::Returns(val_ty)) + } else { + // An index to a function type + wasm.read_var_i33() + .and_then(|idx| idx.try_into().map_err(|_| Error::InvalidFuncTypeIdx)) + .map(BlockType::Type) + } + } + + fn read_unvalidated(wasm: &mut WasmReader) -> Self { + if wasm.peek_u8().unwrap_validated() as i8 == 0x40 { + // Empty block type + let _ = wasm.read_u8(); + + BlockType::Empty + } else if let Ok(val_ty) = ValType::read(wasm) { + // No parameters and given valtype as the result + BlockType::Returns(val_ty) + } else { + // An index to a function type + BlockType::Type( + wasm.read_var_i33() + .unwrap_validated() + .try_into() + .unwrap_validated(), + ) + } + } +} + +impl BlockType { + pub fn as_func_type(&self, func_types: &[FuncType]) -> Result { + match self { + BlockType::Empty => Ok(FuncType { + params: ResultType { + valtypes: Vec::new(), + }, + returns: ResultType { + valtypes: Vec::new(), + }, + }), + BlockType::Returns(val_type) => Ok(FuncType { + params: ResultType { + valtypes: Vec::new(), + }, + returns: ResultType { + valtypes: [val_type.clone()].into(), + }, + }), + BlockType::Type(type_idx) => { + let type_idx: usize = (*type_idx) + .try_into() + .map_err(|_| Error::InvalidFuncTypeIdx)?; + + func_types + .get(type_idx) + .cloned() + .ok_or_else(|| Error::InvalidFuncTypeIdx) + } + } + } +} + #[derive(Copy, Clone, PartialEq, Eq)] pub struct Limits { pub min: u32, diff --git a/src/core/reader/types/opcode.rs b/src/core/reader/types/opcode.rs index ab9dbbf01..cdeeff0cd 100644 --- a/src/core/reader/types/opcode.rs +++ b/src/core/reader/types/opcode.rs @@ -1,8 +1,17 @@ +//! All opcodes, in alphanumerical order by their numeric (hex-)value pub const UNREACHABLE: u8 = 0x00; pub const NOP: u8 = 0x01; +pub const BLOCK: u8 = 0x02; +pub const LOOP: u8 = 0x03; +pub const IF: u8 = 0x04; +pub const ELSE: u8 = 0x05; +pub const CALL: u8 = 0x10; pub const END: u8 = 0x0B; +pub const BR: u8 = 0x0C; +pub const BR_IF: u8 = 0x0D; +pub const BR_TABLE: u8 = 0x0E; pub const RETURN: u8 = 0x0F; -pub const CALL: u8 = 0x10; +pub const DROP: u8 = 0x1A; pub const LOCAL_GET: u8 = 0x20; pub const LOCAL_SET: u8 = 0x21; pub const LOCAL_TEE: u8 = 0x22; diff --git a/src/core/reader/types/values.rs b/src/core/reader/types/values.rs index 28780bf52..686ae546b 100644 --- a/src/core/reader/types/values.rs +++ b/src/core/reader/types/values.rs @@ -67,6 +67,10 @@ impl WasmReader<'_> { Ok(result) } + pub fn read_var_i33(&mut self) -> Result { + todo!("read 33-bit signed integer D:") + } + pub fn read_var_f32(&mut self) -> Result { if self.full_wasm_binary.len() - self.pc < 4 { return Err(Error::Eof); diff --git a/src/core/sidetable.rs b/src/core/sidetable.rs new file mode 100644 index 000000000..6163e8056 --- /dev/null +++ b/src/core/sidetable.rs @@ -0,0 +1,117 @@ +//! This module contains a data structure to allow in-place interpretation +//! +//! Control-flow in WASM is denoted in labels. To avoid linear search through the WASM binary or +//! stack for the respective label of a branch, a sidetable is generated during validation, which +//! stores the offset on the current instruction pointer for the branch. A sidetable entry hence +//! allows to translate the implicit control flow information ("jump to the next `else`") to +//! explicit modifications of the instruction pointer (`instruction_pointer += 13`). +//! +//! Branches in WASM can only go outwards, they either `break` out of a block or `continue` to the +//! head of a loob block. Put differently, a label can only be referenced from within its +//! associated structured control instruction. +//! +//! Noteworthy, branching can also have side-effects on the operand stack: +//! +//! - Taking a branch unwinds the operand stack, down to where the targeted structured control flow +//! instruction was entered. [`SidetableEntry::popcnt`] holds information on how many values to +//! pop from the operand stack when a branch is taken. +//! - When a branch is taken, it may consume arguments from the operand stack. These are pushed +//! back on the operand stack after unwinding. This behavior can be emulated by copying the +//! uppermost [`SidetableEntry::valcnt`] operands on the operand stack before taking a branch +//! into a structured control instruction. +//! +//! # Relevant instructions +//! **Sidetable jump origins (and how many ST entries they require)** +//! - br (1) +//! - br_if (1) +//! - br_table (num_labels + 1 for default label) +//! - if (2, maybe 1??) + +//! **Sidetable jump targets** +//! - end of block +//! - loop +//! - else +//! - end of else block +//! +//! # Reference +//! +//! - Core / Syntax / Instructions / Control Instructions, WASM Spec, +//! +//! - "A fast in-place interpreter for WebAssembly", Ben L. Titzer, +//! + +use alloc::vec::Vec; + +use crate::{Error, Result}; + +/// A sidetable +pub type Sidetable = Vec; + +/// Entry to translate the current branches implicit target into an explicit offset to the instruction pointer, as well as the side table pointer +/// +/// Each of the following constructs requires a [`SidetableEntry`]: +/// +/// - br +/// - br_if +/// - br_table +/// - else +#[derive(Copy, Clone)] +pub struct SidetableEntry { + /// Δpc: the amount to adjust the instruction pointer by if the branch is taken + pub delta_pc: isize, + + /// Δstp: the amount to adjust the side-table index by if the branch is taken + pub delta_stp: isize, + + /// valcnt: the number of values that will be copied if the branch is taken + /// + /// Branches may additionally consume operands themselves, which they push back on the operand + /// stack after unwinding. + pub val_count: usize, + + /// popcnt: the number of values that will be popped if the branch is taken + /// + /// Taking a branch unwinds the operand stack down to the height where the targeted structured + /// control instruction was entered. + pub pop_count: usize, +} + +impl core::fmt::Debug for SidetableEntry { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.debug_struct("SidetableEntry") + .field("Δpc", &self.delta_pc) + .field("Δstp", &self.delta_stp) + .field("valcnt", &self.val_count) + .field("popcnt", &self.pop_count) + .finish() + } +} + +pub struct IncompleteSidetableEntry { + pub ip: usize, + pub delta_ip: Option, + pub delta_stp: Option, + pub val_count: usize, + pub pop_count: usize, +} + +pub struct SidetableBuilder(pub Vec); + +impl SidetableBuilder { + pub fn new() -> Self { + Self(Vec::new()) + } + + // This panics if some sidetable entries still contain any None fields. + pub fn into_sidetable(self) -> Sidetable { + self.0 + .into_iter() + .map(|entry| SidetableEntry { + delta_pc: entry.delta_ip.expect("Failed to generate sidetable"), + delta_stp: entry.delta_stp.expect("Failed to generate sidetable"), + val_count: entry.val_count, + pop_count: entry.pop_count, + }) + .collect() + } +} diff --git a/src/execution/interpreter_loop.rs b/src/execution/interpreter_loop.rs index 6892b52e4..96af1f856 100644 --- a/src/execution/interpreter_loop.rs +++ b/src/execution/interpreter_loop.rs @@ -17,13 +17,14 @@ use crate::{ core::{ indices::{FuncIdx, GlobalIdx, LocalIdx}, reader::{ - types::{memarg::MemArg, FuncType}, + types::{memarg::MemArg, BlockType, FuncType}, WasmReadable, WasmReader, }, + sidetable::{self, Sidetable, SidetableEntry}, }, locals::Locals, store::Store, - value, + unreachable_validated, value, value_stack::Stack, NumType, RuntimeError, ValType, Value, }; @@ -105,18 +106,70 @@ pub(super) fn run( let params = stack.pop_tail_iter(func_to_call_ty.params.valtypes.len()); let remaining_locals = func_to_call_inst.locals.iter().cloned(); + let sidetable = func_to_call_inst.sidetable.clone(); + trace!("Instruction: call [{func_to_call_idx:?}]"); let locals = Locals::new(params, remaining_locals); - stack.push_stackframe(func_to_call_idx, func_to_call_ty, locals, wasm.pc); + stack.push_stackframe(func_to_call_idx, func_to_call_ty, locals, wasm.pc, sidetable, 0); + wasm.move_start_to(func_to_call_inst.code_expr) .unwrap_validated(); } + BLOCK => { + let _block_type = BlockType::read_unvalidated(&mut wasm); + trace!("reached block, ignoring because of sidetable"); + + // Nothing else to do. The sidetable is responsible for control flow. + } + IF => { + todo!("execute if instruction, low priority as if can be simulated with br_if and blocks") + } + ELSE => { + let sidetable = stack.current_stackframe().sidetable.clone(); + let mut sidetable_pointer = stack.current_stackframe().sidetable_pointer; + do_sidetable_control_transfer(&sidetable, &mut sidetable_pointer, &mut wasm, stack); + stack.current_stackframe_mut().sidetable_pointer = sidetable_pointer; + } + BR => { + let _target_label = wasm.read_var_u32().unwrap_validated(); + + let sidetable = stack.current_stackframe().sidetable.clone(); + let mut sidetable_pointer = stack.current_stackframe().sidetable_pointer; + do_sidetable_control_transfer(&sidetable, &mut sidetable_pointer, &mut wasm, stack); + stack.current_stackframe_mut().sidetable_pointer = sidetable_pointer; + } + BR_IF => { + let _target_label = wasm.read_var_u32().unwrap_validated(); + + trace!("Reached br_if"); + let condition: i32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); + trace!("br_if condition is {condition}"); + + if condition != 0 { + let sidetable = stack.current_stackframe().sidetable.clone(); + let mut sidetable_pointer = stack.current_stackframe().sidetable_pointer; + do_sidetable_control_transfer(&sidetable, &mut sidetable_pointer, &mut wasm, stack); + stack.current_stackframe_mut().sidetable_pointer = sidetable_pointer; + } else { + stack.current_stackframe_mut().sidetable_pointer += 1; + } + } + BR_TABLE => { + todo!("execute BR_TABLE, Titzer stores multiple entries in the sidetable here, one for each label. See https://arxiv.org/pdf/2205.01183#lstlisting.1"); + } LOCAL_GET => { + trace!("executing local.get"); stack.get_local(wasm.read_var_u32().unwrap_validated() as LocalIdx); } - LOCAL_SET => stack.set_local(wasm.read_var_u32().unwrap_validated() as LocalIdx), - LOCAL_TEE => stack.tee_local(wasm.read_var_u32().unwrap_validated() as LocalIdx), + LOCAL_SET => { + trace!("executing local.set"); + stack.set_local(wasm.read_var_u32().unwrap_validated() as LocalIdx); + } + LOCAL_TEE => { + trace!("executing local.tee"); + stack.tee_local(wasm.read_var_u32().unwrap_validated() as LocalIdx); + } GLOBAL_GET => { let global_idx = wasm.read_var_u32().unwrap_validated() as GlobalIdx; let global = store.globals.get(global_idx).unwrap_validated(); @@ -129,6 +182,9 @@ pub(super) fn run( global.value = stack.pop_value(global.global.ty.ty) } + DROP => { + stack.pop_tail_iter(1); // pop_value takes a type. this works for now + } I32_LOAD => { let memarg = MemArg::read_unvalidated(&mut wasm); let relative_address: u32 = stack.pop_value(ValType::NumType(NumType::I32)).into(); @@ -1410,3 +1466,29 @@ pub(super) fn run( } Ok(()) } + +fn do_sidetable_control_transfer( + sidetable: &Sidetable, + sidetable_pointer: &mut usize, + wasm: &mut WasmReader, + stack: &mut Stack, +) { + trace!("Fetching sidetable entry at {}", sidetable_pointer); + let entry = *sidetable + .get(*sidetable_pointer) + .expect("sidetable entry to exist"); + + wasm.skip( + entry + .delta_pc + .try_into() + .expect("delta_pc to not be negative for branches"), + ) + .unwrap_validated(); + + *sidetable_pointer += + usize::try_from(entry.delta_stp).expect("delta_stp to be negative for branches"); + usize::try_from(entry.delta_stp).expect("delta_stp to be negative for branches"); + + stack.unwind(entry.val_count, entry.pop_count); +} diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 3480690f1..2628c3c7e 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -174,7 +174,7 @@ where // setting `usize::MAX` as return address for the outermost function ensures that we // observably fail upon errornoeusly continuing execution after that function returns. - stack.push_stackframe(func_idx, func_ty, locals, usize::MAX); + stack.push_stackframe(func_idx, func_ty, locals, usize::MAX, func_inst.sidetable.clone(), 0); // Run the interpreter run( @@ -227,7 +227,7 @@ where // Prepare a new stack with the locals for the entry function let mut stack = Stack::new(); let locals = Locals::new(params.into_iter(), func_inst.locals.iter().cloned()); - stack.push_stackframe(func_idx, func_ty, locals, 0); + stack.push_stackframe(func_idx, func_ty, locals, 0, func_inst.sidetable.clone(), 0); // Run the interpreter run( @@ -319,7 +319,7 @@ where functions .zip(func_blocks) - .map(|(ty, func)| { + .map(|(ty, (func, sidetable))| { wasm_reader .move_start_to(*func) .expect("function index to be in the bounds of the WASM binary"); @@ -336,6 +336,7 @@ where ty: *ty, locals, code_expr, + sidetable: sidetable.clone(), } }) .collect() diff --git a/src/execution/store.rs b/src/execution/store.rs index 6d897b8f4..5f3a13044 100644 --- a/src/execution/store.rs +++ b/src/execution/store.rs @@ -6,6 +6,7 @@ use crate::core::indices::TypeIdx; use crate::core::reader::span::Span; use crate::core::reader::types::global::Global; use crate::core::reader::types::{MemType, TableType, ValType}; +use crate::core::sidetable::Sidetable; use crate::execution::value::{Ref, Value}; /// The store represents all global state that can be manipulated by WebAssembly programs. It @@ -24,6 +25,7 @@ pub struct FuncInst { pub ty: TypeIdx, pub locals: Vec, pub code_expr: Span, + pub sidetable: Sidetable, } #[allow(dead_code)] diff --git a/src/execution/value_stack.rs b/src/execution/value_stack.rs index 27cf4ddfb..d9d2cfd27 100644 --- a/src/execution/value_stack.rs +++ b/src/execution/value_stack.rs @@ -2,15 +2,13 @@ use alloc::vec::{Drain, Vec}; use crate::core::indices::{FuncIdx, LocalIdx}; use crate::core::reader::types::{FuncType, ValType}; +use crate::core::sidetable::Sidetable; use crate::execution::assert_validated::UnwrapValidatedExt; use crate::execution::value::Value; use crate::locals::Locals; use crate::unreachable_validated; -/// The stack at runtime containing -/// 1. Values -/// 2. Labels -/// 3. Activations +/// The stack at runtime containing values /// /// See #[derive(Default)] @@ -52,6 +50,54 @@ impl Stack { } } + /// This unwinds the stack by popping the topmost `num_values_to_keep` values and storing them temporarily. + /// Then the next topmost `num_values_to_remove` values are discarded before the previously popped values are pushed back to the stack. + /// + /// Example: + /// ``` + /// BOTTOM TOP + /// ----------------------------------------------------------- + /// | ... | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 1 | 2 | 3 | 4 | + /// --------------------------------------------------------- + /// | num_values_to_remove | num_values_to_keep | + /// ``` + /// becomes + /// + /// ``` + /// BOTTOM TOP + /// |---------------------------------- + /// | ... | 1 | 8 | 9 | 1 | 2 | 3 | 4 | + /// |---------------------------------- + /// | num_values_to_keep | + /// ``` + /// + // TODO Eventually this value stack should store raw bytes instead of enums on the stack. Then both `num_values` parameters should instead work with bytes. + pub fn unwind(&mut self, num_values_to_keep: usize, num_values_to_remove: usize) { + // FIXME: This is inefficient + let mut temporary_values = Vec::new(); + + for _ in 0..num_values_to_keep { + temporary_values.push(self.values.pop().unwrap_validated()); + } + + for _ in 0..num_values_to_remove { + self.values.pop().unwrap_validated(); + } + + // We should not have crossed a callframe boundary + debug_assert!( + self.frames + .last() + .map_or(true, |last_frame| self.values.len() + >= last_frame.value_stack_base_idx), + "can not pop values past the current stackframe" + ); + + for value in temporary_values.into_iter().rev() { + self.values.push(value); + } + } + /// Copy a value of the given [ValType] from the value stack without removing it pub fn peek_value(&self, ty: ValType) -> Value { let value = self.values.last().unwrap_validated(); @@ -79,7 +125,7 @@ impl Stack { let stack_value = self.pop_value(local_ty); debug_assert!( - self.values.len() > self.current_stackframe().value_stack_base_idx, + self.values.len() >= self.current_stackframe().value_stack_base_idx, "can not pop values past the current stackframe" ); @@ -135,6 +181,8 @@ impl Stack { func_ty: &FuncType, locals: Locals, return_addr: usize, + sidetable: Sidetable, + sidetable_pointer: usize, ) { self.frames.push(CallFrame { func_idx, @@ -142,6 +190,8 @@ impl Stack { return_addr, value_stack_base_idx: self.values.len(), return_value_count: func_ty.returns.valtypes.len(), + sidetable, + sidetable_pointer, }) } @@ -183,4 +233,39 @@ pub(crate) struct CallFrame { /// Number of return values to retain on [`Stack::values`] when unwinding/popping a [`CallFrame`] pub return_value_count: usize, + + /// The sidetable used for control flow in the current [`CallFrame`] + /// FIXME: This is currently cloned for every callframe, which is pretty inefficient. + pub sidetable: Sidetable, + + /// An index that points to the current sidetable entry + pub sidetable_pointer: usize, +} + +#[test] +fn test_stack_unwind() { + fn test_with_ten_example_numbers(num_keep: usize, num_pop: usize, expected: &[u32]) { + let mut stack = Stack::new(); + for i in 0..10 { + stack.push_value(Value::I32(i)); + } + + stack.unwind(num_keep, num_pop); + + let expected_values: Vec = expected.into_iter().copied().map(Value::I32).collect(); + + assert_eq!(&stack.values, &expected_values); + } + + test_with_ten_example_numbers(2, 3, &[0, 1, 2, 3, 4, 8, 9]); + + test_with_ten_example_numbers(0, 2, &[0, 1, 2, 3, 4, 5, 6, 7]); + + test_with_ten_example_numbers(0, 0, &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + + test_with_ten_example_numbers(4, 0, &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + + test_with_ten_example_numbers(10, 0, &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + + test_with_ten_example_numbers(1, 9, &[9]); } diff --git a/src/validation/code.rs b/src/validation/code.rs index 52400d735..f01347816 100644 --- a/src/validation/code.rs +++ b/src/validation/code.rs @@ -1,14 +1,15 @@ use alloc::vec::Vec; use core::iter; -use crate::core::indices::{FuncIdx, GlobalIdx, LocalIdx}; +use crate::core::indices::{FuncIdx, GlobalIdx, LabelIdx, LocalIdx}; use crate::core::reader::section_header::{SectionHeader, SectionTy}; use crate::core::reader::span::Span; use crate::core::reader::types::global::Global; use crate::core::reader::types::memarg::MemArg; -use crate::core::reader::types::{FuncType, NumType, ValType}; +use crate::core::reader::types::{BlockType, FuncType, NumType, ResultType, ValType}; use crate::core::reader::{WasmReadable, WasmReader}; -use crate::validation_stack::ValidationStack; +use crate::core::sidetable::{self, IncompleteSidetableEntry, Sidetable, SidetableBuilder}; +use crate::validation_stack::{LabelInfo, ValidationStack}; use crate::{Error, Result}; pub fn validate_code_section( @@ -17,7 +18,7 @@ pub fn validate_code_section( fn_types: &[FuncType], type_idx_of_fn: &[usize], globals: &[Global], -) -> Result> { +) -> Result> { assert_eq!(section_header.ty, SectionTy::Code); let code_block_spans = wasm.read_vec_enumerated(|wasm, idx| { @@ -38,6 +39,8 @@ pub fn validate_code_section( let mut stack = ValidationStack::new(); + let mut sidetable_builder = SidetableBuilder::new(); + read_instructions( idx, wasm, @@ -46,8 +49,13 @@ pub fn validate_code_section( globals, fn_types, type_idx_of_fn, + &mut sidetable_builder, )?; + let sidetable = sidetable_builder.into_sidetable(); + + debug!("{:?}", &sidetable); + // Check if there were unread trailing instructions after the last END if previous_pc + func_size as usize != wasm.pc { todo!( @@ -55,7 +63,7 @@ pub fn validate_code_section( ) } - Ok(func_block) + Ok((func_block, sidetable)) })?; trace!( @@ -91,6 +99,7 @@ fn read_instructions( globals: &[Global], fn_types: &[FuncType], type_idx_of_fn: &[usize], + sidetable_builder: &mut SidetableBuilder, ) -> Result<()> { // TODO we must terminate only if both we saw the final `end` and when we consumed all of the code span loop { @@ -102,42 +111,197 @@ fn read_instructions( use crate::core::reader::types::opcode::*; match first_instr_byte { - // nop + // unreachable: [t1*] -> [t2*] + UNREACHABLE => { + stack.make_unspecified(); + } + // nop: [] -> [] NOP => {} - // end - END => { - // TODO check if there are labels on the stack. - // If there are none (i.e. this is the implicit end of the function and not a jump to the end of a function), the stack must only contain the valid return values, no other junk. - // - // Else, anything may remain on the stack, as long as the top of the stack matche the current blocks return value. - - if stack.has_remaining_label() { - // This is the END of a block. + // block: [] -> [t*2] + BLOCK => { + let func_type: FuncType = BlockType::read(wasm)?.as_func_type(&fn_types)?; + + stack.push_label(LabelInfo::Block { + func_type, + sidetable_branch_indices: Vec::new(), + num_values_on_stack_before: stack.len_without_labels(), + }); + } + BR => { + let label_idx = wasm.read_var_u32()? as LabelIdx; + + let label_info: &LabelInfo = stack + .find_nth_label_from_top(label_idx) + .ok_or(Error::InvalidLabelIdx)?; + + match label_info { + LabelInfo::Block { + func_type, + num_values_on_stack_before, + .. + } => { + let block_return_types = &func_type.returns.valtypes; + // todo!("do branches require the top of the stack or the entire stack to be correct?"); + stack.assert_val_types_on_top(block_return_types)?; + + let Some(stack_len) = stack.len_without_labels() else { + // The stack contains an `Unspecified` element, so this instruction is practically unreachable. + // It does not matter if the Unspecified was before this Block/If/Loop started or after. + // + // We don't need to generate a sidetable entry and the validation of the top-most stack values has already be done, so we can just continue with the next instruction. + continue; + }; + + let num_values_on_stack_before = num_values_on_stack_before.expect("this to always be Some. We already checked through `stack.len_without_label` whether the stack contains Unspecified. If so, we ignored this instruction. Because we are in a block right now, the stack values below the block label cannot have changed, so any Unspecified entries below the block label are impossible."); + + let val_count = block_return_types.len(); + let pop_count = stack_len - num_values_on_stack_before - val_count; + + error!("val: {}, pop: {}", val_count, pop_count); + + // FIXME Now we actually need to modifiy the label info, so we have to borrow it again + let Some(LabelInfo::Block { + sidetable_branch_indices, + .. + }) = stack.find_nth_label_from_top_mut(label_idx) + else { + unreachable!("We just found this block") + }; + + sidetable_builder.0.push(IncompleteSidetableEntry { + ip: wasm.pc, // TODO maybe - 1? + delta_ip: None, + delta_stp: None, + val_count, + pop_count, + }); + + // Store index of new sidetable entry so it can be completed, when the end of this block is reached. + sidetable_branch_indices.push(sidetable_builder.0.len() - 1); + } + _ => todo!("handle branches for loops and ifs/elses"), + } + } + BR_IF => { + let label_idx = wasm.read_var_u32()? as LabelIdx; - // We check the valtypes on top of the stack + // condition for if + stack.assert_pop_val_type(ValType::NumType(NumType::I32))?; + trace!("read br_if condition"); + + let label_info: &LabelInfo = stack + .find_nth_label_from_top(label_idx) + .ok_or(Error::InvalidLabelIdx)?; + + match label_info { + LabelInfo::Block { + func_type, + num_values_on_stack_before, + .. + } => { + let block_return_types = &func_type.returns.valtypes; + // todo!("do branches require the top of the stack or the entire stack to be correct?"); + stack.assert_val_types_on_top(block_return_types)?; + + let Some(stack_len) = stack.len_without_labels() else { + // The stack contains an `Unspecified` element, so this instruction is practically unreachable. + // It does not matter if the Unspecified was before this Block/If/Loop started or after. + // + // We don't need to generate a sidetable entry and the validation of the top-most stack values has already be done, so we can just continue with the next instruction. + continue; + }; + + let num_values_on_stack_before = num_values_on_stack_before.expect("this to always be Some. We already checked through `stack.len_without_label` whether the stack contains Unspecified. If so, we ignored this instruction. Because we are in a block right now, the stack values below the block label cannot have changed, so any Unspecified entries below the block label are impossible."); + + let val_count = block_return_types.len(); + let pop_count = stack_len - num_values_on_stack_before - val_count; + + error!("val: {}, pop: {}", val_count, pop_count); + + // FIXME Now we actually need to modifiy the label info, so we have to borrow it again + let Some(LabelInfo::Block { + sidetable_branch_indices, + .. + }) = stack.find_nth_label_from_top_mut(label_idx) + else { + unreachable!("We just found this block") + }; + + sidetable_builder.0.push(IncompleteSidetableEntry { + ip: wasm.pc, // TODO maybe - 1? + delta_ip: None, + delta_stp: None, + val_count, + pop_count, + }); + + // Store index of new sidetable entry so it can be completed, when the end of this block is reached. + sidetable_branch_indices.push(sidetable_builder.0.len() - 1); + } + _ => todo!("handle branches for loops and ifs/elses"), + } + } + LOOP | IF => { + let _block_type: FuncType = BlockType::read(wasm)?.as_func_type(&fn_types)?; - // TODO remove the ugly hack for the todo!(..)! - #[allow(clippy::diverging_sub_expression)] - { - let _block_return_ty: &[ValType] = - todo!("get return types for current block"); + todo!("handle loop and if instructions") + // todo!( + // "{}, {}", + // "add incomplete entry to sidetable", + // "verify from now on that only the top-most block_ty.params are accessed" + // ); + } + // end + END => { + // This is the END of a block, loop or if + match stack.find_nth_label_from_top(0) { + Some(LabelInfo::Block { func_type, .. }) => { + // Before we can actually pop the label and valtypes from the stack, we need to validate the valtypes on top of the stack + stack.assert_val_types_on_top(&func_type.returns.valtypes)?; + + // FIXME It's not very pretty to shadow `func_type` with a newer variable that should be exactly the same + // Clear the stack until the next label + let LabelInfo::Block { + func_type, + sidetable_branch_indices, + num_values_on_stack_before, + } = stack + .pop_label_and_above() + .expect("this to find find the topmost label, which we just found") + else { + unreachable!( + "We just checked that the topmost label has to be a Block" + ); + }; + + // And push the blocks return types onto the stack again + for valtype in &func_type.returns.valtypes { + stack.push_valtype(valtype.clone()); + } + + let sidetable_len = sidetable_builder.0.len(); + + for idx in sidetable_branch_indices { + let incomplete_entry = sidetable_builder + .0 + .get_mut(idx) + .expect("index into sidetable to always be valid"); + + incomplete_entry.delta_ip = + Some(wasm.pc as isize - incomplete_entry.ip as isize); + incomplete_entry.delta_stp = + Some(sidetable_len as isize - idx as isize); + } + } + Some(_) => todo!("handle end for loops and ifs/elses"), + None => { + // This is the last end of a function + + // The stack must only contain the function's return valtypes + let this_func_ty = &fn_types[type_idx_of_fn[this_function_idx]]; + stack.assert_val_types(&this_func_ty.returns.valtypes)?; + return Ok(()); } - // stack.assert_val_types_on_top(block_return_ty)?; - - // Clear the stack until the next label - // stack.clear_until_next_label(); - - // And push the blocks return types onto the stack again - // for valtype in block_return_ty { - // stack.push_valtype(*valtype); - // } - } else { - // This is the last end of a function - - // The stack must only contain the function's return valtypes - let this_func_ty = &fn_types[type_idx_of_fn[this_function_idx]]; - stack.assert_val_types(&this_func_ty.returns.valtypes)?; - return Ok(()); } } RETURN => { @@ -175,10 +339,12 @@ fn read_instructions( for typ in func_ty.returns.valtypes.iter() { stack.push_valtype(*typ); } - } - // unreachable: [t1*] -> [t2*] - UNREACHABLE => { - stack.make_unspecified(); + // if let Some(popped_control_entry) = control_stack.pop() { + // todo!("Complete sidetable entries that jumped to this entry's label"); + // } else { + // return Ok(()); + // } + // todo!("check if there is a pending sidetable entry to be completed") } // local.get: [] -> [t] LOCAL_GET => { @@ -192,11 +358,12 @@ fn read_instructions( let local_ty = locals.get(local_idx).ok_or(Error::InvalidLocalIdx)?; stack.assert_pop_val_type(*local_ty)?; } - // local.set [t] -> [t] + // local.tee [t] -> [t] LOCAL_TEE => { let local_idx = wasm.read_var_u32()? as LocalIdx; let local_ty = locals.get(local_idx).ok_or(Error::InvalidLocalIdx)?; stack.assert_pop_val_type(*local_ty)?; + stack.push_valtype(*local_ty); } // global.get [] -> [t] GLOBAL_GET => { @@ -220,6 +387,9 @@ fn read_instructions( stack.assert_pop_val_type(global.ty.ty)?; } + DROP => { + stack.pop_valtype()?; + } // i32.load [i32] -> [i32] I32_LOAD => { let _memarg = MemArg::read_unvalidated(wasm); diff --git a/src/validation/mod.rs b/src/validation/mod.rs index d6a52fa9b..bdb6740e0 100644 --- a/src/validation/mod.rs +++ b/src/validation/mod.rs @@ -8,6 +8,7 @@ use crate::core::reader::types::global::Global; use crate::core::reader::types::import::Import; use crate::core::reader::types::{FuncType, MemType, TableType}; use crate::core::reader::{WasmReadable, WasmReader}; +use crate::core::sidetable::Sidetable; use crate::{Error, Result}; pub(crate) mod code; @@ -28,7 +29,7 @@ pub struct ValidationInfo<'bytecode> { pub(crate) globals: Vec, #[allow(dead_code)] pub(crate) exports: Vec, - pub(crate) func_blocks: Vec, + pub(crate) func_blocks: Vec<(Span, Sidetable)>, /// The start function which is automatically executed during instantiation pub(crate) start: Option, } diff --git a/src/validation/validation_stack.rs b/src/validation/validation_stack.rs index a692ae402..b3e9e1699 100644 --- a/src/validation/validation_stack.rs +++ b/src/validation/validation_stack.rs @@ -7,7 +7,7 @@ use super::Result; use alloc::vec::Vec; -use crate::{Error, ValType}; +use crate::{core::reader::types::FuncType, Error, ValType}; #[derive(Debug, PartialEq, Eq)] pub(super) struct ValidationStack { @@ -24,6 +24,20 @@ impl ValidationStack { self.stack.len() } + /// Returns None if there is atleast one Unspecified entry. + /// This function is only used during sidetable generation and there those instructions, where atleast one unspecified is present, are unreachable during execution. + pub(super) fn len_without_labels(&self) -> Option { + let mut num_values = 0; + for entry in &self.stack { + match entry { + ValidationStackEntry::UnspecifiedValTypes => return None, + ValidationStackEntry::Label(_) => {} + ValidationStackEntry::Val(_) => num_values += 1, + } + } + Some(num_values) + } + pub(super) fn push_valtype(&mut self, valtype: ValType) { self.stack.push(ValidationStackEntry::Val(valtype)); } @@ -65,6 +79,19 @@ impl ValidationStack { .ok_or(Error::InvalidValidationStackValType(None)) } + pub fn pop_valtype(&mut self) -> Result<()> { + match self.stack.last() { + Some(ValidationStackEntry::Val(_)) => { + self.stack.pop(); + } + Some(ValidationStackEntry::UnspecifiedValTypes) => {} + Some(ValidationStackEntry::Label(li)) => return Err(Error::FoundLabel(li.clone())), + None => return Err(Error::InvalidValType), + }; + + Ok(()) + } + /// Assert the top-most [`ValidationStackEntry`] is a specific [`ValType`], after popping it from the [`ValidationStack`] /// /// # Returns @@ -82,7 +109,7 @@ impl ValidationStack { ValidationStackEntry::Val(ty) => (ty == expected_ty) .then_some(()) .ok_or(Error::InvalidValidationStackValType(Some(ty))), - ValidationStackEntry::Label(li) => Err(Error::FoundLabel(li.kind)), + ValidationStackEntry::Label(li) => Err(Error::FoundLabel(li)), ValidationStackEntry::UnspecifiedValTypes => { unreachable!("we just checked if the topmost entry is of this type") } @@ -185,7 +212,7 @@ impl ValidationStack { /// /// - `Ok(LabelInfo)` if a label has been found and popped /// - `None` if no label was found on the stack - fn pop_label_and_above(&mut self) -> Option { + pub fn pop_label_and_above(&mut self) -> Option { /// Delete all the values until the topmost label or until the stack is empty match self.find_topmost_label_idx() { Some(idx) => { @@ -212,6 +239,28 @@ impl ValidationStack { .iter() .any(|e| matches!(e, ValidationStackEntry::Label(_))) } + + pub fn find_nth_label_from_top(&self, n: usize) -> Option<&LabelInfo> { + self.stack + .iter() + .rev() + .filter_map(|entry| match entry { + ValidationStackEntry::Label(label_info) => Some(label_info), + _ => None, + }) + .nth(n) + } + + pub fn find_nth_label_from_top_mut(&mut self, n: usize) -> Option<&mut LabelInfo> { + self.stack + .iter_mut() + .rev() + .filter_map(|entry| match entry { + ValidationStackEntry::Label(label_info) => Some(label_info), + _ => None, + }) + .nth(n) + } } #[derive(Clone, Debug, PartialEq, Eq)] @@ -232,22 +281,43 @@ enum ValidationStackEntry { } #[derive(Clone, Debug, PartialEq, Eq)] -pub(crate) struct LabelInfo { - pub(crate) kind: LabelKind, -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum LabelKind { - Block, - Loop, - If, +pub(crate) enum LabelInfo { + Block { + func_type: FuncType, + sidetable_branch_indices: Vec, + num_values_on_stack_before: Option, // Is None if the block is pracically unreachable. We still have to validate everything, so this LabelInfo is still necessary. + }, + Loop { + func_type: FuncType, + num_values_on_stack_before: Option, // Is None if the block is pracically unreachable. We still have to validate everything, so this LabelInfo is still necessary. + first_sidetable_entry_index: usize, + }, + If, // TODO } #[cfg(test)] mod tests { - use crate::{NumType, RefType, ValType}; - - use super::{LabelInfo, LabelKind, ValidationStack}; + use super::{LabelInfo, ValidationStack}; + use crate::{ + core::reader::types::{FuncType, ResultType}, + NumType, RefType, ValType, + }; + use alloc::vec::Vec; + + fn make_dummy_block_label() -> LabelInfo { + LabelInfo::Block { + func_type: FuncType { + params: ResultType { + valtypes: Vec::new(), + }, + returns: ResultType { + valtypes: Vec::new(), + }, + }, + sidetable_branch_indices: Vec::new(), + num_values_on_stack_before: Some(0), + } + } #[test] fn push_then_pop() { @@ -275,32 +345,13 @@ mod tests { let mut stack = ValidationStack::new(); stack.push_valtype(ValType::NumType(NumType::I64)); - stack.push_label(LabelInfo { - kind: LabelKind::Block, - }); - - stack.push_label(LabelInfo { - kind: LabelKind::Loop, - }); - + stack.push_label(make_dummy_block_label()); + stack.push_label(make_dummy_block_label()); stack.push_valtype(ValType::VecType); - // This removes the `ValType::VecType` and the `LabelKind::Loop` label - let popped_label = stack.pop_label_and_above().unwrap(); - assert_eq!( - popped_label, - LabelInfo { - kind: LabelKind::Loop, - } - ); - - let popped_label = stack.pop_label_and_above().unwrap(); - assert_eq!( - popped_label, - LabelInfo { - kind: LabelKind::Block, - } - ); + // This removes the `ValType::VecType` and the `LabelInfo::Block` label + let _popped_label = stack.pop_label_and_above().unwrap(); + let _popped_label = stack.pop_label_and_above().unwrap(); // The first valtype should still be there stack.assert_pop_val_type(ValType::NumType(NumType::I64)); @@ -322,9 +373,7 @@ mod tests { ]) .unwrap(); - stack.push_label(LabelInfo { - kind: LabelKind::Block, - }); + stack.push_label(make_dummy_block_label()); stack.push_valtype(ValType::NumType(NumType::I32)); stack @@ -339,9 +388,7 @@ mod tests { stack.assert_val_types(&[]).unwrap(); stack.push_valtype(ValType::NumType(NumType::I32)); - stack.push_label(LabelInfo { - kind: LabelKind::Block, - }); + stack.push_label(make_dummy_block_label()); // Valtypes separated by a label should also not be detected stack.assert_val_types(&[]).unwrap(); @@ -383,9 +430,7 @@ mod tests { #[test] fn unspecified() { let mut stack = ValidationStack::new(); - stack.push_label(LabelInfo { - kind: LabelKind::Block, - }); + stack.push_label(make_dummy_block_label()); stack.make_unspecified(); @@ -399,13 +444,7 @@ mod tests { .unwrap(); // Let's remove the unspecified entry and the first label - let popped_label = stack.pop_label_and_above().unwrap(); - assert_eq!( - popped_label, - LabelInfo { - kind: LabelKind::Block, - } - ); + let _popped_label = stack.pop_label_and_above().unwrap(); // Now there are no values left on the stack assert_eq!(stack.assert_val_types(&[]), Ok(())); diff --git a/tests/lib.rs b/tests/lib.rs index bbbb713a2..0aec2d618 100644 --- a/tests/lib.rs +++ b/tests/lib.rs @@ -1 +1,2 @@ mod arithmetic; +mod structured_control_flow; diff --git a/tests/structured_control_flow/block.rs b/tests/structured_control_flow/block.rs new file mode 100644 index 000000000..61abb0d68 --- /dev/null +++ b/tests/structured_control_flow/block.rs @@ -0,0 +1,209 @@ +use wasm::{validate, RuntimeInstance, DEFAULT_MODULE}; + +/// Runs a function that does nothing and contains only a single empty block +#[test_log::test] +fn empty() { + let wasm_bytes = wat::parse_str( + r#" + (module + (func (export "do_nothing") (block) + ) + ) + "#, + ) + .unwrap(); + + let validation_info = validate(&wasm_bytes).expect("validation failed"); + let mut instance = RuntimeInstance::new(&validation_info).expect("instantiation failed"); + + assert_eq!( + (), + instance + .invoke(&instance.get_function_by_index(0, 0).unwrap(), ()) + .unwrap() + ); +} + +#[test_log::test] +fn branch() { + let wasm_bytes = wat::parse_str( + r#" + (module + (func (export "with_branch") (result i32) + (block $outer_block (result i32) + (block $my_block (result i32) + i32.const 5 + br $my_block + i32.const 3 + ) + i32.const 3 + i32.add + ) + ) + ) + "#, + ) + .unwrap(); + + let validation_info = validate(&wasm_bytes).expect("validation failed"); + let mut instance = RuntimeInstance::new(&validation_info).expect("instantiation failed"); + + assert_eq!( + 8, + instance + .invoke(&instance.get_function_by_index(0, 0).unwrap(), ()) + .unwrap() + ); +} + +#[test_log::test] +fn param_and_result() { + let wasm_bytes = wat::parse_str( + r#" + (module + (func (export "add_one") (param $x i32) (result) + local.get $x + (block $my_block (param i32) (result i32) + i32.const 1 + i32.add + br $my_block + ) + ) + ) + "#, + ) + .unwrap(); + + let validation_info = validate(&wasm_bytes).expect("validation failed"); + let mut instance = RuntimeInstance::new(&validation_info).expect("instantiation failed"); + + assert_eq!( + 7, + instance + .invoke(&instance.get_function_by_index(0, 0).unwrap(), 6) + .unwrap() + ); +} + +#[test_log::test] +fn return_out_of_block() { + let wasm_bytes = wat::parse_str( + r#" + (module + (func (export "get_three") (result i32) + (block + i32.const 5 + i32.const 3 + return + ) + unreachable + ) + ) + "#, + ) + .unwrap(); + + let validation_info = validate(&wasm_bytes).expect("validation failed"); + let mut instance = RuntimeInstance::new(&validation_info).expect("instantiation failed"); + + assert_eq!( + 3, + instance + .invoke(&instance.get_function_by_index(0, 0).unwrap(), ()) + .unwrap() + ); +} + +#[test_log::test] +fn branch_if() { + let wasm_bytes = wat::parse_str( + r#" + (module + (func (export "abs") (param $x i32) (result i32) + (block $my_block + local.get $x + i32.const 0 + i32.ge_s + br_if $my_block + local.get $x + i32.const -1 + i32.mul + return + ) + local.get $x + ) + ) + "#, + ) + .unwrap(); + + let validation_info = validate(&wasm_bytes).expect("validation failed"); + let mut instance = RuntimeInstance::new(&validation_info).expect("instantiation failed"); + + let abs_fn = instance.get_function_by_index(0, 0).unwrap(); + + assert_eq!(6, instance.invoke(&abs_fn, 6).unwrap()); + assert_eq!(123, instance.invoke(&abs_fn, -123).unwrap()); + assert_eq!(0, instance.invoke(&abs_fn, 0).unwrap()); +} + +#[test_log::test] +fn recursive_fibonacci() { + let wasm_bytes = wat::parse_str( + r#" + (module + (func (export "fibonacci") (param $x i32) (result i32) + (call $fib_internal + (i32.const 0) + (i32.const 1) + (local.get $x) + ) + ) + + (func $fib_internal (param $x0 i32) (param $x1 i32) (param $n_left i32) (result i32) + (block $zero_check + ;; if n_left reached 0, we return + local.get $n_left + br_if $zero_check + local.get $x0 + return + ) + + ;; otherwise decrement n_left + local.get $n_left + i32.const -1 + i32.add + local.set $n_left + + ;; store x1 temporarily + local.get $x1 + + ;; calculate new x1 + local.get $x0 + local.get $x1 + i32.add + local.set $x1 + + ;; set x0 to the previous x1 + local.set $x0 + + + (call $fib_internal + (local.get $x0) + (local.get $x1) + (local.get $n_left) + ) + ) + ) + "#, + ) + .unwrap(); + + let validation_info = validate(&wasm_bytes).expect("validation failed"); + let mut instance = RuntimeInstance::new(&validation_info).expect("instantiation failed"); + + let fib_fn = instance.get_function_by_index(0, 0).unwrap(); + + let first_ten = (0..10).map(|n| instance.invoke(&fib_fn, n).unwrap()).collect::>(); + assert_eq!(&first_ten, &[0, 1, 1, 2, 3, 5, 8, 13, 21, 34]); +} \ No newline at end of file diff --git a/tests/structured_control_flow/if.rs b/tests/structured_control_flow/if.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/tests/structured_control_flow/if.rs @@ -0,0 +1 @@ + diff --git a/tests/structured_control_flow/loop.rs b/tests/structured_control_flow/loop.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/tests/structured_control_flow/loop.rs @@ -0,0 +1 @@ + diff --git a/tests/structured_control_flow/mod.rs b/tests/structured_control_flow/mod.rs new file mode 100644 index 000000000..59fb5ec3c --- /dev/null +++ b/tests/structured_control_flow/mod.rs @@ -0,0 +1,3 @@ +mod block; +mod r#if; +mod r#loop;