diff --git a/cranelift/codegen/src/ir/memtype.rs b/cranelift/codegen/src/ir/memtype.rs index 6d9a2b999241..a78cf06ed916 100644 --- a/cranelift/codegen/src/ir/memtype.rs +++ b/cranelift/codegen/src/ir/memtype.rs @@ -4,8 +4,65 @@ //! each field having a type and possibly an attached fact -- that we //! can use in proof-carrying code to validate accesses to structs and //! propagate facts onto the loaded values as well. +//! +//! Memory types are meant to be rich enough to describe the *layout* +//! of values in memory, but do not necessarily need to embody +//! higher-level features such as subtyping directly. Rather, they +//! should encode an implementation of a type or object system. +//! +//! Note also that it is a non-goal for now for this type system to be +//! "complete" or fully orthogonal: we have some restrictions now +//! (e.g., struct fields are only primitives) because this is all we +//! need for existing PCC applications, and it keeps the +//! implementation simpler. +//! +//! There are a few basic kinds of types: +//! +//! - A struct is an aggregate of fields and an overall size. Each +//! field has a *primitive Cranelift type*. This is for simplicity's +//! sake: we do not allow nested memory types because to do so +//! invites cycles, requires recursive computation of sizes, creates +//! complicated questions when field types are dynamically-sized, +//! and in general is more complexity than we need. +//! +//! The expectation (validated by PCC) is that when a checked load +//! or store accesses memory typed by a memory type, accesses will +//! only be to fields at offsets named in the type, and will be via +//! the given Cranelift type -- i.e., no type-punning occurs in +//! memory. +//! +//! The overall size of the struct may be larger than that implied +//! by the fields because (i) we may not want or need to name all +//! the actually-existing fields in the memory type, and (ii) there +//! may be alignment padding that we also don't want or need to +//! represent explicitly. +//! +//! - A static memory is an untyped blob of storage with a static +//! size. This is memory that can be accessed with any type of load +//! or store at any valid offset. +//! +//! Note that this is *distinct* from an "array of u8" kind of +//! representation of memory, if/when we can represent such a thing, +//! because the expectation with memory types' fields (including +//! array elements) is that they are strongly typed, only accessed +//! via that type, and not type-punned. We don't want to imply any +//! restriction on load/store size, or any actual structure, with +//! untyped memory; it's just a blob. +//! +//! Eventually we plan to also have: +//! +//! - A dynamic memory is an untyped blob of storage with a size given +//! by a global value (GV). This is otherwise just like the "static +//! memory" variant described above. +//! +//! - A dynamic array is a sequence of struct memory types, with a +//! length given by a global value (GV). This is useful to model, +//! e.g., tables. +//! +//! - A discriminated union is a union of several memory types +//! together with a tag field. This will be useful to model and +//! verify subtyping/downcasting for Wasm GC, among other uses. -use crate::ir::entities::MemoryType; use crate::ir::pcc::Fact; use crate::ir::Type; use alloc::vec::Vec; @@ -14,10 +71,17 @@ use alloc::vec::Vec; use serde_derive::{Deserialize, Serialize}; /// Data defining a memory type. +/// +/// A memory type corresponds to a layout of data in memory. It may +/// have a statically-known or dynamically-known size. #[derive(Clone, PartialEq, Hash)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum MemoryTypeData { /// An aggregate consisting of certain fields at certain offsets. + /// + /// Fields must be sorted by offset, must be within the struct's + /// overall size, and must not overlap. These conditions are + /// checked by the CLIF verifier. Struct { /// Size of this type. size: u64, @@ -26,25 +90,10 @@ pub enum MemoryTypeData { fields: Vec, }, - /// An aggregate consisting of a single element repeated at a - /// certain stride, with a statically-known length (element - /// count). Layout is assumed to be contiguous, with a stride - /// equal to the element type's size (so, if the stride is greater - /// than the struct's packed size, be sure to include padding in - /// the memory-type definition). - StaticArray { - /// The element type. May be another array, a struct, etc. - element: MemoryType, - - /// Number of elements. - length: u64, - }, - - /// A single Cranelift primitive of the given type stored in - /// memory. - Primitive { - /// The primitive type. - ty: Type, + /// A statically-sized untyped blob of memory. + Memory { + /// Accessible size. + size: u64, }, /// A type with no size. @@ -80,11 +129,8 @@ impl std::fmt::Display for MemoryTypeData { write!(f, " }}")?; Ok(()) } - Self::StaticArray { element, length } => { - write!(f, "static_array {element} * {length:#x}") - } - Self::Primitive { ty } => { - write!(f, "primitive {ty}") + Self::Memory { size } => { + write!(f, "memory {size:#x}") } Self::Empty => { write!(f, "empty") @@ -99,12 +145,27 @@ impl std::fmt::Display for MemoryTypeData { pub struct MemoryTypeField { /// The offset of this field in the memory type. pub offset: u64, - /// The type of the value in this field. Accesses to the field - /// must use this type (i.e., cannot bitcast/type-pun in memory). - pub ty: MemoryType, + /// The primitive type of the value in this field. Accesses to the + /// field must use this type (i.e., cannot bitcast/type-pun in + /// memory). + pub ty: Type, /// A proof-carrying-code fact about this value, if any. pub fact: Option, /// Whether this field is read-only, i.e., stores should be /// disallowed. pub readonly: bool, } + +impl MemoryTypeData { + /// Provide the static size of this type, if known. + /// + /// (The size may not be known for dynamically-sized arrays or + /// memories, when those memtype kinds are added.) + pub fn static_size(&self) -> Option { + match self { + Self::Struct { size, .. } => Some(*size), + Self::Memory { size } => Some(*size), + Self::Empty => Some(0), + } + } +} diff --git a/cranelift/codegen/src/ir/pcc.rs b/cranelift/codegen/src/ir/pcc.rs index e50134d91c52..fc98bab18456 100644 --- a/cranelift/codegen/src/ir/pcc.rs +++ b/cranelift/codegen/src/ir/pcc.rs @@ -59,6 +59,7 @@ use crate::ir; use crate::isa::TargetIsa; use crate::machinst::{InsnIndex, LowerBackend, MachInst, VCode}; +use cranelift_entity::PrimaryMap; use std::fmt; #[cfg(feature = "enable-serde")] @@ -111,13 +112,6 @@ pub enum Fact { max: u64, }, - /// A pointer value to a memory region that can be accessed. - PointsTo { - /// A description of the memory region this pointer is allowed - /// to access (size, etc). - region: MemoryRegion, - }, - /// A pointer to a memory type. Mem { /// The memory type. @@ -127,24 +121,10 @@ pub enum Fact { }, } -/// A memory region that can be accessed. This description is attached -/// to a particular base pointer. -#[derive(Clone, Debug, Hash, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct MemoryRegion { - /// Includes both the actual memory bound as well as any guard - /// pages. Inclusive, so we can represent the full range of a - /// `u64`. The range is unsigned. - pub max: u64, -} - impl fmt::Display for Fact { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Fact::ValueMax { bit_width, max } => write!(f, "max({}, {:#x})", bit_width, max), - Fact::PointsTo { - region: MemoryRegion { max }, - } => write!(f, "points_to(0x{:x})", max), Fact::Mem { ty, offset } => write!(f, "mem({}, {:#x})", ty, offset), } } @@ -167,20 +147,29 @@ macro_rules! bail { /// A "context" in which we can evaluate and derive facts. This /// context carries environment/global properties, such as the machine /// pointer width. -#[derive(Debug)] -pub struct FactContext { +pub struct FactContext<'a> { + memory_types: &'a PrimaryMap, pointer_width: u16, } -impl FactContext { +impl<'a> FactContext<'a> { /// Create a new "fact context" in which to evaluate facts. - pub fn new(pointer_width: u16) -> Self { - FactContext { pointer_width } + pub fn new( + memory_types: &'a PrimaryMap, + pointer_width: u16, + ) -> Self { + FactContext { + memory_types, + pointer_width, + } } /// Computes whether `lhs` "subsumes" (implies) `rhs`. pub fn subsumes(&self, lhs: &Fact, rhs: &Fact) -> bool { match (lhs, rhs) { + // Reflexivity. + (l, r) if l == r => true, + ( Fact::ValueMax { bit_width: bw_lhs, @@ -201,22 +190,7 @@ impl FactContext { // possible value range. bw_lhs == bw_rhs && max_lhs <= max_rhs } - ( - Fact::PointsTo { - region: MemoryRegion { max: max_lhs }, - }, - Fact::PointsTo { - region: MemoryRegion { max: max_rhs }, - }, - ) => { - // If the pointer is valid up to `max_lhs`, and - // `max_rhs` is less than or equal to `max_lhs`, then - // it is certainly valid up to `max_rhs`. - // - // In other words, we can always shrink the valid - // addressable region. - max_rhs <= max_lhs - } + _ => false, } } @@ -250,19 +224,17 @@ impl FactContext { bit_width: bw_max, max, }, - Fact::PointsTo { region }, + Fact::Mem { ty, offset }, ) | ( - Fact::PointsTo { region }, + Fact::Mem { ty, offset }, Fact::ValueMax { bit_width: bw_max, max, }, ) if *bw_max >= self.pointer_width && add_width >= *bw_max => { - let region = MemoryRegion { - max: region.max.checked_sub(*max)?, - }; - Some(Fact::PointsTo { region }) + let offset = offset.checked_add(i64::try_from(*max).ok()?)?; + Some(Fact::Mem { ty: *ty, offset }) } _ => None, @@ -343,15 +315,15 @@ impl FactContext { /// Offsets a value with a fact by a known amount. pub fn offset(&self, fact: &Fact, width: u16, offset: i64) -> Option { - // If we eventually support two-sided ranges, we can - // represent (0..n) + m -> ((0+m)..(n+m)). However, - // right now, all ranges start with zero, so any - // negative offset could underflow, and removes all - // claims of constrained range. - let offset = u64::try_from(offset).ok()?; - match fact { Fact::ValueMax { bit_width, max } if *bit_width == width => { + // If we eventually support two-sided ranges, we can + // represent (0..n) + m -> ((0+m)..(n+m)). However, + // right now, all ranges start with zero, so any + // negative offset could underflow, and removes all + // claims of constrained range. + let offset = u64::try_from(offset).ok()?; + let max = match max.checked_add(offset) { Some(max) => max, None => { @@ -367,13 +339,12 @@ impl FactContext { max, }) } - Fact::PointsTo { - region: MemoryRegion { max }, + Fact::Mem { + ty, + offset: mem_offset, } => { - let max = max.checked_sub(offset)?; - Some(Fact::PointsTo { - region: MemoryRegion { max }, - }) + let offset = mem_offset.checked_sub(offset)?; + Some(Fact::Mem { ty: *ty, offset }) } _ => None, } @@ -383,9 +354,20 @@ impl FactContext { /// a memory access of the given size, is valid. pub fn check_address(&self, fact: &Fact, size: u32) -> PccResult<()> { match fact { - Fact::PointsTo { - region: MemoryRegion { max }, - } => ensure!(u64::from(size) <= *max, OutOfBounds), + Fact::Mem { ty, offset } => { + let end_offset: i64 = offset + .checked_add(i64::from(size)) + .ok_or(PccError::Overflow)?; + let end_offset: u64 = + u64::try_from(end_offset).map_err(|_| PccError::OutOfBounds)?; + match &self.memory_types[*ty] { + ir::MemoryTypeData::Struct { size, .. } + | ir::MemoryTypeData::Memory { size } => { + ensure!(end_offset <= *size, OutOfBounds) + } + ir::MemoryTypeData::Empty => bail!(OutOfBounds), + } + } _ => bail!(OutOfBounds), } @@ -405,7 +387,7 @@ fn max_value_for_width(bits: u16) -> u64 { /// Top-level entry point after compilation: this checks the facts in /// VCode. pub fn check_vcode_facts( - _f: &ir::Function, + f: &ir::Function, vcode: &VCode, backend: &B, ) -> PccResult<()> { @@ -417,7 +399,7 @@ pub fn check_vcode_facts( // verify. We'll call into the backend to validate this // fact with respect to the instruction and the input // facts. - backend.check_fact(&vcode[inst], vcode)?; + backend.check_fact(&vcode[inst], f, vcode)?; } } Ok(()) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 0ab57ec664b2..f3bbfd1e1256 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -10,7 +10,7 @@ use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::pcc::PccResult; use crate::ir::Inst as IRInst; -use crate::ir::{Opcode, Value}; +use crate::ir::{Function, Opcode, Value}; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::pcc; use crate::isa::aarch64::AArch64Backend; @@ -131,7 +131,12 @@ impl LowerBackend for AArch64Backend { Some(regs::pinned_reg()) } - fn check_fact(&self, inst: &Self::MInst, vcode: &VCode) -> PccResult<()> { - pcc::check(inst, vcode) + fn check_fact( + &self, + inst: &Self::MInst, + function: &Function, + vcode: &VCode, + ) -> PccResult<()> { + pcc::check(inst, function, vcode) } } diff --git a/cranelift/codegen/src/isa/aarch64/pcc.rs b/cranelift/codegen/src/isa/aarch64/pcc.rs index 07f52e92dbbe..81d330884417 100644 --- a/cranelift/codegen/src/isa/aarch64/pcc.rs +++ b/cranelift/codegen/src/isa/aarch64/pcc.rs @@ -1,7 +1,7 @@ //! Proof-carrying code checking for AArch64 VCode. use crate::ir::pcc::*; -use crate::ir::MemFlags; +use crate::ir::{Function, MemFlags}; use crate::isa::aarch64::inst::args::{PairAMode, ShiftOp}; use crate::isa::aarch64::inst::ALUOp; use crate::isa::aarch64::inst::Inst; @@ -68,9 +68,9 @@ fn check_output PccResult>( } } -pub(crate) fn check(inst: &Inst, vcode: &VCode) -> PccResult<()> { +pub(crate) fn check(inst: &Inst, function: &Function, vcode: &VCode) -> PccResult<()> { // Create a new fact context with the machine's pointer width. - let ctx = FactContext::new(64); + let ctx = FactContext::new(&function.memory_types, 64); trace!("Checking facts on inst: {:?}", inst); diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 0df5e7e73666..4b360fd966ff 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -149,7 +149,12 @@ pub trait LowerBackend { /// Check any facts about an instruction, given VCode with facts /// on VRegs. - fn check_fact(&self, _inst: &Self::MInst, _vcode: &VCode) -> PccResult<()> { + fn check_fact( + &self, + _inst: &Self::MInst, + _function: &Function, + _vcode: &VCode, + ) -> PccResult<()> { Err(PccError::UnimplementedBackend) } } diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index 5eaecaa190fa..608d0ac050ed 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -47,6 +47,12 @@ //! - Detect cycles in global values. //! - Detect use of 'vmctx' global value when no corresponding parameter is defined. //! +//! Memory types +//! +//! - Ensure that struct fields are in offset order. +//! - Ensure that struct fields are completely within the overall +//! struct size, and do not overlap. +//! //! TODO: //! Ad hoc checking //! @@ -66,7 +72,8 @@ use crate::ir::instructions::{CallInfo, InstructionFormat, ResolvedConstraint}; use crate::ir::{self, ArgumentExtension}; use crate::ir::{ types, ArgumentPurpose, Block, Constant, DynamicStackSlot, FuncRef, Function, GlobalValue, - Inst, JumpTable, MemFlags, Opcode, SigRef, StackSlot, Type, Value, ValueDef, ValueList, + Inst, JumpTable, MemFlags, MemoryTypeData, Opcode, SigRef, StackSlot, Type, Value, ValueDef, + ValueList, }; use crate::isa::TargetIsa; use crate::iterators::IteratorExtras; @@ -403,6 +410,53 @@ impl<'a> Verifier<'a> { Ok(()) } + fn verify_memory_types(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { + // Verify that all fields are statically-sized and lie within + // the struct, do not overlap, and are in offset order + for (mt, mt_data) in &self.func.memory_types { + match mt_data { + MemoryTypeData::Struct { size, fields } => { + let mut last_offset = 0; + for field in fields { + if field.offset < last_offset { + errors.report(( + mt, + format!( + "memory type {} has a field at offset {}, which is out-of-order", + mt, field.offset + ), + )); + } + last_offset = match field.offset.checked_add(u64::from(field.ty.bytes())) { + Some(o) => o, + None => { + errors.report(( + mt, + format!( + "memory type {} has a field at offset {} of size {}; offset plus size overflows a u64", + mt, field.offset, field.ty.bytes()), + )); + break; + } + }; + + if last_offset > *size { + errors.report(( + mt, + format!( + "memory type {} has a field at offset {} of size {} that overflows the struct size {}", + mt, field.offset, field.ty.bytes(), *size), + )); + } + } + } + _ => {} + } + } + + Ok(()) + } + fn verify_tables(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { if let Some(isa) = self.isa { for (table, table_data) in &self.func.tables { @@ -1754,6 +1808,7 @@ impl<'a> Verifier<'a> { pub fn run(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { self.verify_global_values(errors)?; + self.verify_memory_types(errors)?; self.verify_tables(errors)?; self.typecheck_entry_block_params(errors)?; self.check_entry_not_cold(errors)?; diff --git a/cranelift/filetests/filetests/pcc/fail/load.clif b/cranelift/filetests/filetests/pcc/fail/load.clif index 17a189fa05fc..a7b916addbf0 100644 --- a/cranelift/filetests/filetests/pcc/fail/load.clif +++ b/cranelift/filetests/filetests/pcc/fail/load.clif @@ -3,57 +3,62 @@ set enable_pcc=true target aarch64 function %f0(i64, i32) -> i64 { -block0(v0 ! points_to(0x1000): i64, v1 ! max(32, 0x1000): i32): + mt0 = memory 0x1000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0x1000): i32): v2 ! max(64, 0x100) = uextend.i64 v1 - v3 ! points_to(0x8) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0x100) = iadd.i64 v0, v2 v4 = load.i64 checked v3 return v4 } ;; Insufficient guard region: the 8-byte load could go off the end. function %f1(i64, i32) -> i64 { -block0(v0 ! points_to(0x1_0000_0000): i64, v1 ! max(32, 0xffff_ffff): i32): + mt0 = memory 0x1_0000_0000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xffff_ffff): i32): v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 - ;; 8 bytes of valid range is all we actually need, so let's only claim that. - v3 ! points_to(0x8) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0xffff_ffff) = iadd.i64 v0, v2 v4 = load.i64 checked v3 return v4 } ;; RegRegExtend mode on aarch64. function %f2(i64, i32) -> i8 { -block0(v0 ! points_to(0x1000): i64, v1 ! max(32, 0x1000): i32): + mt0 = memory 0x1000 +block0(v0 ! mem(mt0, 0x1000): i64, v1 ! max(32, 0x1000): i32): v2 ! max(64, 0x100) = uextend.i64 v1 - v3 ! points_to(0x1) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0x100) = iadd.i64 v0, v2 v4 = load.i8 checked v3 return v4 } ;; RegReg mode on aarch64. function %f3(i64, i64) -> i8 { -block0(v0 ! points_to(0x100): i64, v1 ! max(64, 0xfff): i64): - v2 ! points_to(0x1) = iadd.i64 v0, v1 + mt0 = memory 0x100 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(64, 0xfff): i64): + v2 ! mem(mt0, 0xfff) = iadd.i64 v0, v1 v3 = load.i8 checked v2 return v3 } ;; RegScaledExtended mode on aarch64. function %f4(i64, i32) -> i64 { -block0(v0 ! points_to(0x7000): i64, v1 ! max(32, 0xfff): i32): + mt0 = memory 0x7000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xfff): i32): v2 ! max(64, 0xfff) = uextend.i64 v1 v3 = iconst.i32 3 v4 ! max(64, 0x7ff8) = ishl.i64 v2, v3 - v5 ! points_to(0x8) = iadd.i64 v0, v4 + v5 ! mem(mt0, 0x7ff8) = iadd.i64 v0, v4 v6 = load.i64 checked v5 return v6 } ;; RegScaled mode on aarch64. function %f5(i64, i64) -> i64 { -block0(v0 ! points_to(0x7000): i64, v1 ! max(64, 0xfff): i64): + mt0 = memory 0x7000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(64, 0xfff): i64): v2 = iconst.i32 3 v3 ! max(64, 0x7ff8) = ishl.i64 v1, v2 - v4 ! points_to(0x8) = iadd.i64 v0, v3 + v4 ! mem(mt0, 0x7ff8) = iadd.i64 v0, v3 v5 = load.i64 checked v4 return v5 } diff --git a/cranelift/filetests/filetests/pcc/fail/memtypes.clif b/cranelift/filetests/filetests/pcc/fail/memtypes.clif new file mode 100644 index 000000000000..2a9c87f1ba27 --- /dev/null +++ b/cranelift/filetests/filetests/pcc/fail/memtypes.clif @@ -0,0 +1,29 @@ +test verifier +set enable_pcc=true +target aarch64 + +function %f0(i64) -> i32 { + mt0 = struct 8 { 4: i32, 0: i32 } ; error: out-of-order + +block0(v0 ! mem(mt0, 0): i64): + v1 = load.i32 v0+0 + return v1 +} + +function %f0(i64) -> i32 { + ;; out-of-bounds field: + mt0 = struct 8 { 0: i32, 6: i32 } ; error: field at offset 6 of size 4 that overflows + +block0(v0 ! mem(mt0, 0): i64): + v1 = load.i32 v0+0 + return v1 +} + +function %f0(i64) -> i32 { + ;; overflowing offset + field size: + mt0 = struct 8 { 0: i32, 0xffff_ffff_ffff_ffff: i32 } ; error: field at offset 18446744073709551615 of size 4; offset plus size overflows a u64 + +block0(v0 ! mem(mt0, 0): i64): + v1 = load.i32 v0+0 + return v1 +} diff --git a/cranelift/filetests/filetests/pcc/fail/simple.clif b/cranelift/filetests/filetests/pcc/fail/simple.clif index 5aac4f8601f3..8ba0d6ee9a17 100644 --- a/cranelift/filetests/filetests/pcc/fail/simple.clif +++ b/cranelift/filetests/filetests/pcc/fail/simple.clif @@ -2,14 +2,25 @@ test compile expect-fail set enable_pcc=true target aarch64 -;; The `points_to` annotation is not large enough here -- the -;; 4GiB-range 32-bit offset could go out of range. PCC should catch -;; this. +;; The `memory` memtype is not large enough here -- the 4GiB-range +;; 32-bit offset could go out of range. PCC should catch this. function %simple1(i64 vmctx, i32) -> i8 { -block0(v0 ! points_to(0x8000_0000): i64, v1 ! max(32, 0xffff_ffff): i32): + mt0 = memory 0x8000_0000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xffff_ffff): i32): v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 - v3 ! points_to(0x1) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0xffff_ffff) = iadd.i64 v0, v2 + v4 = load.i8 checked v3 + return v4 +} + +;; Check that the offset in the `mem` is validated too. + +function %simple1(i64 vmctx, i32) -> i8 { + mt0 = memory 0x8000_0000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xffff_ffff): i32): + v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 + v3 ! mem(mt0, 0) = iadd.i64 v0, v2 v4 = load.i8 checked v3 return v4 } diff --git a/cranelift/filetests/filetests/pcc/succeed/load.clif b/cranelift/filetests/filetests/pcc/succeed/load.clif index e863cb059ffa..6766c7a28b8c 100644 --- a/cranelift/filetests/filetests/pcc/succeed/load.clif +++ b/cranelift/filetests/filetests/pcc/succeed/load.clif @@ -3,58 +3,62 @@ set enable_pcc=true target aarch64 function %f0(i64, i32) -> i64 { -block0(v0 ! points_to(0x1_0000_0000): i64, v1 ! max(32, 0x100): i32): + mt0 = memory 0x1_0000_0000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0x100): i32): v2 ! max(64, 0x100) = uextend.i64 v1 - ;; 8 bytes of valid range is all we actually need, so let's only claim that. - v3 ! points_to(0x8) = iadd.i64 v0, v2 + v3 ! mem(mt0, 8) = iadd.i64 v0, v2 v4 = load.i64 checked v3 return v4 } -;; Note the guard region of 8 bytes -- just enough for the below! function %f1(i64, i32) -> i64 { -block0(v0 ! points_to(0x1_0000_0008): i64, v1 ! max(32, 0xffff_ffff): i32): + ;; Note the guard region of 8 bytes -- just enough for the below! + mt0 = memory 0x1_0000_0008 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xffff_ffff): i32): v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 - ;; 8 bytes of valid range is all we actually need, so let's only claim that. - v3 ! points_to(0x8) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0xffff_ffff) = iadd.i64 v0, v2 v4 = load.i64 checked v3 return v4 } ;; RegRegExtend mode on aarch64. function %f2(i64, i32) -> i8 { -block0(v0 ! points_to(0x1000): i64, v1 ! max(32, 0xfff): i32): + mt0 = memory 0x1000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xfff): i32): v2 ! max(64, 0x100) = uextend.i64 v1 - v3 ! points_to(0x1) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0x100) = iadd.i64 v0, v2 v4 = load.i8 checked v3 return v4 } ;; RegReg mode on aarch64. function %f3(i64, i64) -> i8 { -block0(v0 ! points_to(0x1000): i64, v1 ! max(64, 0xfff): i64): - v2 ! points_to(0x1) = iadd.i64 v0, v1 + mt0 = memory 0x1000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(64, 0xfff): i64): + v2 ! mem(mt0, 0xfff) = iadd.i64 v0, v1 v3 = load.i8 checked v2 return v3 } ;; RegScaledExtended mode on aarch64. function %f4(i64, i32) -> i64 { -block0(v0 ! points_to(0x8000): i64, v1 ! max(32, 0xfff): i32): + mt0 = memory 0x8000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xfff): i32): v2 ! max(64, 0xfff) = uextend.i64 v1 v3 = iconst.i32 3 v4 ! max(64, 0x7ff8) = ishl.i64 v2, v3 - v5 ! points_to(0x8) = iadd.i64 v0, v4 + v5 ! mem(mt0, 0x7ff8) = iadd.i64 v0, v4 v6 = load.i64 checked v5 return v6 } ;; RegScaled mode on aarch64. function %f5(i64, i64) -> i64 { -block0(v0 ! points_to(0x8000): i64, v1 ! max(64, 0xfff): i64): + mt0 = memory 0x8000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(64, 0xfff): i64): v2 = iconst.i32 3 v3 ! max(64, 0x7ff8) = ishl.i64 v1, v2 - v4 ! points_to(0x8) = iadd.i64 v0, v3 + v4 ! mem(mt0, 0x7ff8) = iadd.i64 v0, v3 v5 = load.i64 checked v4 return v5 } diff --git a/cranelift/filetests/filetests/pcc/succeed/memtypes.clif b/cranelift/filetests/filetests/pcc/succeed/memtypes.clif index f64b54e9935b..9849ea00eb7c 100644 --- a/cranelift/filetests/filetests/pcc/succeed/memtypes.clif +++ b/cranelift/filetests/filetests/pcc/succeed/memtypes.clif @@ -3,8 +3,7 @@ set enable_pcc=true target aarch64 function %f0(i64) -> i32 { - mt0 = struct 8 { 0: mt1, 4: mt1 readonly } - mt1 = primitive i32 + mt0 = struct 8 { 0: i32, 4: i32 readonly } block0(v0 ! mem(mt0, 0): i64): ;; v0 points to an instance of mt0, at offset 0 v1 = load.i32 v0+0 @@ -14,14 +13,8 @@ block0(v0 ! mem(mt0, 0): i64): ;; v0 points to an instance of mt0, at offset 0 } function %f1(i64) -> i32 { - ;; TODO: validate that fields don't overlap, and are within the - ;; overall size. Make note of future thoughts for enums, tag - ;; discriminants, etc. - mt0 = struct 8 { 0: mt3 readonly ! mem(mt1, 0) } - ;; `u8` here could be a CLIF type or a memtype - mt1 = static_array mt2 * 0x1_0000_0000 - mt2 = primitive i8 - mt3 = primitive i64 + mt0 = struct 8 { 0: i64 readonly ! mem(mt1, 0) } + mt1 = memory 0x1_0000_0000 block0(v0 ! mem(mt0, 0): i64): v1 ! mem(mt1, 0) = load.i64 v0 diff --git a/cranelift/filetests/filetests/pcc/succeed/simple.clif b/cranelift/filetests/filetests/pcc/succeed/simple.clif index cf1793e5a583..241124e678a5 100644 --- a/cranelift/filetests/filetests/pcc/succeed/simple.clif +++ b/cranelift/filetests/filetests/pcc/succeed/simple.clif @@ -3,9 +3,10 @@ set enable_pcc=true target aarch64 function %simple1(i64 vmctx, i32) -> i8 { -block0(v0 ! points_to(0x1_0000_0000): i64, v1 ! max(32, 0xffff_ffff): i32): + mt0 = memory 0x1_0000_0000 +block0(v0 ! mem(mt0, 0): i64, v1 ! max(32, 0xffff_ffff): i32): v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 - v3 ! points_to(0x1) = iadd.i64 v0, v2 + v3 ! mem(mt0, 0xffff_ffff) = iadd.i64 v0, v2 v4 = load.i8 checked v3 return v4 } diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index ee0271800902..eabb7dff4027 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -12,7 +12,7 @@ use cranelift_codegen::entity::{EntityRef, PrimaryMap}; use cranelift_codegen::ir::entities::{AnyEntity, DynamicType, MemoryType}; use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; -use cranelift_codegen::ir::pcc::{Fact, MemoryRegion}; +use cranelift_codegen::ir::pcc::Fact; use cranelift_codegen::ir::types; use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::types::*; @@ -1659,7 +1659,7 @@ impl<'a> Parser<'a> { // Parse one field definition in a memory-type struct decl. // - // memory-type-field ::= offset ":" memory-type ["readonly"] [ "!" fact ] + // memory-type-field ::= offset ":" type ["readonly"] [ "!" fact ] // offset ::= uimm64 fn parse_memory_type_field(&mut self) -> ParseResult { let offset: u64 = self @@ -1671,8 +1671,7 @@ impl<'a> Parser<'a> { Token::Colon, "expected colon after field offset in struct memory-type declaration", )?; - let ty = - self.match_mt("expected memory type for field in struct memory-type declaration")?; + let ty = self.match_type("expected type for field in struct memory-type declaration")?; let readonly = if self.token() == Some(Token::Identifier("readonly")) { self.consume(); true @@ -1698,10 +1697,9 @@ impl<'a> Parser<'a> { // // memory-type-decl ::= MemoryType(mt) "=" memory-type-desc // memory-type-desc ::= "struct" size "{" memory-type-field,* "}" - // | "static_array" memory-type "*" element-count - // | "primitive" type + // | "memory" size // | "empty" - // element-count ::= uimm64 + // size ::= uimm64 fn parse_memory_type_decl(&mut self) -> ParseResult<(MemoryType, MemoryTypeData)> { let mt = self.match_mt("expected memory type number: mt«n»")?; self.match_token(Token::Equal, "expected '=' in memory type declaration")?; @@ -1725,26 +1723,12 @@ impl<'a> Parser<'a> { Token::RBrace, "expected closing brace after struct fields in struct memory-type declaration", )?; - // Ensure fields are sorted ascending by offset. - // TODO: check for field overlap and sorted field - // order in the CLIF validator. - fields.sort_by_key(|f| f.offset); MemoryTypeData::Struct { size, fields } } - Some(Token::Identifier("static_array")) => { + Some(Token::Identifier("memory")) => { self.consume(); - let element = self.match_mt( - "expected memory type for element in array memory-type declaration", - )?; - self.match_token(Token::Multiply, "expected `*` to separate element type and count in static array memory-type declaration")?; - let length: u64 = self.match_uimm64("expected u64 constant value for element count in array memory-type declaration")?.into(); - MemoryTypeData::StaticArray { element, length } - } - Some(Token::Identifier("primitive")) => { - self.consume(); - let ty = self - .match_type("expected primitive type in primitive memory-type declaration")?; - MemoryTypeData::Primitive { ty } + let size: u64 = self.match_uimm64("expected u64 constant value for size in static-memory memory-type declaration")?.into(); + MemoryTypeData::Memory { size } } Some(Token::Identifier("empty")) => { self.consume(); @@ -2164,7 +2148,6 @@ impl<'a> Parser<'a> { // Parse a "fact" for proof-carrying code, attached to a value. // // fact ::= "max" "(" bit-width "," max-value ")" - // | "points_to" "(" valid-range ")" // | "mem" "(" memory-type "," mt-offset ")" // bit-width ::= uimm64 // max-value ::= uimm64 @@ -2200,15 +2183,6 @@ impl<'a> Parser<'a> { max: max.into(), }) } - Some(Token::Identifier("points_to")) => { - self.consume(); - self.match_token(Token::LPar, "expected a `(`")?; - let max = self.match_uimm64("expected a max offset for `points_to` fact")?; - self.match_token(Token::RPar, "expected a `)`")?; - Ok(Fact::PointsTo { - region: MemoryRegion { max: max.into() }, - }) - } Some(Token::Identifier("mem")) => { self.consume(); self.match_token(Token::LPar, "expected a `(`")?; @@ -2223,7 +2197,7 @@ impl<'a> Parser<'a> { self.match_token(Token::RPar, "expected a `)`")?; Ok(Fact::Mem { ty, offset }) } - _ => Err(self.error("expected a `max` or `points_to` fact")), + _ => Err(self.error("expected a `max` or `mem` fact")), } }