diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index e4e511b5e8cb..e977088731a8 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -63,6 +63,15 @@ pub(crate) fn define() -> SettingGroup { true, ); + settings.add_bool( + "enable_pcc", + "Enable proof-carrying code translation validation.", + r#" + This adds a proof-carrying code mode. TODO ADD MORE + "#, + false, + ); + // Note that Cranelift doesn't currently need an is_pie flag, because PIE is // just PIC where symbols can't be pre-empted, which can be expressed with the // `colocated` flag on external functions and global values. diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 2dc62ee77fd5..ca3d5fb73628 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -237,6 +237,8 @@ impl Context { /// Run the verifier on the function. /// /// Also check that the dominator tree and control flow graph are consistent with the function. + /// + /// TODO: rename to "CLIF validate" or similar. pub fn verify<'a, FOI: Into>>(&self, fisa: FOI) -> VerifierResult<()> { let mut errors = VerifierErrors::default(); let _ = verify_context(&self.func, &self.cfg, &self.domtree, fisa, &mut errors); diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index 76b1b08cbeea..ca02aee99dad 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -5,6 +5,7 @@ use crate::ir; use crate::ir::builder::ReplaceBuilder; use crate::ir::dynamic_type::{DynamicTypeData, DynamicTypes}; use crate::ir::instructions::{CallInfo, InstructionData}; +use crate::ir::pcc::Fact; use crate::ir::{ types, Block, BlockCall, ConstantData, ConstantPool, DynamicType, ExtFuncData, FuncRef, Immediate, Inst, JumpTables, RelSourceLoc, SigRef, Signature, Type, Value, @@ -125,6 +126,9 @@ pub struct DataFlowGraph { /// Primary value table with entries for all values. values: PrimaryMap, + /// Facts: proof-carrying-code assertions about values. + pub facts: SecondaryMap>, + /// Function signature table. These signatures are referenced by indirect call instructions as /// well as the external function references. pub signatures: PrimaryMap, @@ -158,6 +162,7 @@ impl DataFlowGraph { dynamic_types: DynamicTypes::new(), value_lists: ValueListPool::new(), values: PrimaryMap::new(), + facts: SecondaryMap::new(), signatures: PrimaryMap::new(), old_signatures: SecondaryMap::new(), ext_funcs: PrimaryMap::new(), diff --git a/cranelift/codegen/src/ir/memflags.rs b/cranelift/codegen/src/ir/memflags.rs index 87165ba69117..546e11fa3703 100644 --- a/cranelift/codegen/src/ir/memflags.rs +++ b/cranelift/codegen/src/ir/memflags.rs @@ -6,10 +6,19 @@ use core::fmt; use serde_derive::{Deserialize, Serialize}; enum FlagBit { + /// Guaranteed not to trap. This may enable additional + /// optimizations to be performed. Notrap, + /// Guaranteed to use "natural alignment" for the given type. This + /// may enable better instruction selection. Aligned, + /// A load that reads data in memory that does not change for the + /// duration of the function's execution. This may enable + /// additional optimizations to be performed. Readonly, + /// Load multi-byte values from memory in a little-endian format. LittleEndian, + /// Load multi-byte values from memory in a big-endian format. BigEndian, /// Accesses only the "heap" part of abstract state. Used for /// alias analysis. Mutually exclusive with "table" and "vmctx". @@ -20,10 +29,15 @@ enum FlagBit { /// Accesses only the "vmctx" part of abstract state. Used for /// alias analysis. Mutually exclusive with "heap" and "table". Vmctx, + /// Check this load or store for safety when using the + /// proof-carrying-code framework. The address must have a + /// `PointsTo` fact attached with a sufficiently large valid range + /// for the accessed size. + Checked, } -const NAMES: [&str; 8] = [ - "notrap", "aligned", "readonly", "little", "big", "heap", "table", "vmctx", +const NAMES: [&str; 9] = [ + "notrap", "aligned", "readonly", "little", "big", "heap", "table", "vmctx", "checked", ]; /// Endianness of a memory access. @@ -48,7 +62,7 @@ pub enum Endianness { #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub struct MemFlags { - bits: u8, + bits: u16, } impl MemFlags { @@ -265,6 +279,32 @@ impl MemFlags { self.set_vmctx(); self } + + /// Test if the `checked` bit is set. + /// + /// Loads and stores with this flag are verified to access + /// pointers only with a validated `PointsTo` fact attached, and + /// with that fact validated, when using the proof-carrying-code + /// framework. If initial facts on program inputs are correct + /// (i.e., correctly denote the shape and types of data structures + /// in memory), and if PCC validates the compiled output, then all + /// `checked`-marked memory accesses are guaranteed (up to the + /// checker's correctness) to access valid memory. This can be + /// used to ensure memory safety and sandboxing. + pub fn checked(self) -> bool { + self.read(FlagBit::Checked) + } + + /// Set the `checked` bit. + pub fn set_checked(&mut self) { + self.set(FlagBit::Checked); + } + + /// Set the `checked` bit, returning new flags. + pub fn with_checked(mut self) -> Self { + self.set_checked(); + self + } } impl fmt::Display for MemFlags { diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 986f8003722c..c5cd502200f7 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -18,6 +18,7 @@ pub(crate) mod known_symbol; pub mod layout; pub(crate) mod libcall; mod memflags; +pub mod pcc; mod progpoint; mod sourceloc; pub mod stackslot; diff --git a/cranelift/codegen/src/ir/pcc.rs b/cranelift/codegen/src/ir/pcc.rs new file mode 100644 index 000000000000..3fc2662a87b1 --- /dev/null +++ b/cranelift/codegen/src/ir/pcc.rs @@ -0,0 +1,399 @@ +//! Proof-carrying code. We attach "facts" to values and then check +//! that they remain true after compilation. +//! +//! A few key design principle of this approach are: +//! +//! - The producer of the IR provides the axioms. All "ground truth", +//! such as what memory is accessible -- is meant to come by way of +//! facts on the function arguments and global values. In some +//! sense, all we are doing here is validating the "internal +//! consistency" of the facts that are provided on values, and the +//! actions performed on those values. +//! +//! - We do not derive and forward-propagate facts eagerly. Rather, +//! the producer needs to provide breadcrumbs -- a "proof witness" +//! of sorts -- to allow the checking to complete. That means that +//! as an address is computed, or pointer chains are dereferenced, +//! each intermediate value will likely have some fact attached. +//! +//! This does create more verbose IR, but a significant positive +//! benefit is that it avoids unnecessary work: we do not build up a +//! knowledge base that effectively encodes the integer ranges of +//! many or most values in the program. Rather, we only check +//! specifically the memory-access sequences. In practice, each such +//! sequence is likely to be a carefully-controlled sequence of IR +//! operations from, e.g., a sandboxing compiler (such as +//! `cranelift-wasm`) so adding annotations here to communicate +//! intent (ranges, bounds-checks, and the like) is no problem. +//! +//! Facts are attached to SSA values in CLIF, and are maintained +//! through optimizations and through lowering. They are thus also +//! present on VRegs in the VCode. In theory, facts could be checked +//! at either level, though in practice it is most useful to check +//! them at the VCode level if the goal is an end-to-end verification +//! of certain properties (e.g., memory sandboxing). +//! +//! Checking facts entails visiting each instruction that defines a +//! value with a fact, and checking the result's fact against the +//! facts on arguments and the operand. For VCode, this is +//! fundamentally a question of the target ISA's semantics, so we call +//! into the `LowerBackend` for this. Note that during checking there +//! is also limited forward propagation / inference, but only within +//! an instruction: for example, an addressing mode commonly can +//! include an addition, multiplication/shift, or extend operation, +//! and there is no way to attach facts to the intermediate values +//! "inside" the instruction, so instead the backend can use +//! `FactContext::add()` and friends to forward-propagate facts. +//! +//! TODO: +//! - Propagate facts through optimization (egraph layer). +//! - Generate facts in cranelift-wasm frontend when lowering memory ops. +//! - Implement richer "points-to" facts that describe the pointed-to +//! memory, so the loaded values can also have facts. +//! - Support bounds-checking-type operations for dynamic memories and +//! tables. +//! - Implement checking at the CLIF level as well. +//! - Check instructions that can trap as well? + +use crate::ir; +use crate::isa::TargetIsa; +use crate::machinst::{InsnIndex, LowerBackend, MachInst, VCode}; +use std::fmt; + +#[cfg(feature = "enable-serde")] +use serde_derive::{Deserialize, Serialize}; + +/// The result of checking proof-carrying-code facts. +pub type PccResult = std::result::Result; + +/// An error or inconsistency discovered when checking proof-carrying +/// code. +#[derive(Debug, Clone)] +pub enum PccError { + /// An operation wraps around, invalidating the stated value + /// range. + Overflow, + /// An input to an operator that produces a fact-annotated value + /// does not have a fact describing it, and one is needed. + MissingFact, + /// A memory access is out of bounds. + OutOfBounds, + /// Proof-carrying-code checking is not implemented for a + /// particular compiler backend. + UnimplementedBackend, + /// Proof-carrying-code checking is not implemented for a + /// particular instruction that instruction-selection chose. This + /// is an internal compiler error. + UnimplementedInst, +} + +/// A fact on a value. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub enum Fact { + /// A bitslice of a value (up to a bitwidth) is less than or equal + /// to a given maximum value. + /// + /// The slicing behavior is needed because this fact can describe + /// both an SSA `Value`, whose entire value is well-defined, and a + /// `VReg` in VCode, whose bits beyond the type stored in that + /// register are don't-care (undefined). + ValueMax { + /// The bitwidth of bits we care about, from the LSB upward. + bit_width: u16, + /// The maximum value that the bitslice can take + /// (inclusive). The range is unsigned: the bits of the value + /// will be within the range `0..=max`. + max: u64, + }, + + /// A pointer value to a memory region that can be accessed. + PointsTo { + /// A description of the memory region this pointer is allowed + /// to access (size, etc). + region: MemoryRegion, + }, +} + +/// A memory region that can be accessed. This description is attached +/// to a particular base pointer. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct MemoryRegion { + /// Includes both the actual memory bound as well as any guard + /// pages. Inclusive, so we can represent the full range of a + /// `u64`. The range is unsigned. + pub max: u64, +} + +impl fmt::Display for Fact { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Fact::ValueMax { bit_width, max } => write!(f, "max({}, 0x{:x})", bit_width, max), + Fact::PointsTo { + region: MemoryRegion { max }, + } => write!(f, "points_to(0x{:x})", max), + } + } +} + +macro_rules! ensure { + ( $condition:expr, $err:tt $(,)? ) => { + if !$condition { + return Err(PccError::$err); + } + }; +} + +macro_rules! bail { + ( $err:tt ) => {{ + return Err(PccError::$err); + }}; +} + +/// A "context" in which we can evaluate and derive facts. This +/// context carries environment/global properties, such as the machine +/// pointer width. +#[derive(Debug)] +pub struct FactContext { + pointer_width: u16, +} + +impl FactContext { + /// Create a new "fact context" in which to evaluate facts. + pub fn new(pointer_width: u16) -> Self { + FactContext { pointer_width } + } + + /// Computes whether `lhs` "subsumes" (implies) `rhs`. + pub fn subsumes(&self, lhs: &Fact, rhs: &Fact) -> bool { + match (lhs, rhs) { + ( + Fact::ValueMax { + bit_width: bw_lhs, + max: max_lhs, + }, + Fact::ValueMax { + bit_width: bw_rhs, + max: max_rhs, + }, + ) => { + // If the bitwidths we're claiming facts about are the + // same, and if the value is less than or equal to + // `max_lhs`, and if `max_rhs` is less than `max_lhs`, + // then it is certainly less than or equal to + // `max_rhs`. + // + // In other words, we can always expand the claimed + // possible value range. + bw_lhs == bw_rhs && max_lhs <= max_rhs + } + ( + Fact::PointsTo { + region: MemoryRegion { max: max_lhs }, + }, + Fact::PointsTo { + region: MemoryRegion { max: max_rhs }, + }, + ) => { + // If the pointer is valid up to `max_lhs`, and + // `max_rhs` is less than or equal to `max_lhs`, then + // it is certainly valid up to `max_rhs`. + // + // In other words, we can always shrink the valid + // addressable region. + max_rhs <= max_lhs + } + _ => false, + } + } + + /// Computes whatever fact can be known about the sum of two + /// values with attached facts. The add is performed to the given + /// bit-width. Note that this is distinct from the machine or + /// pointer width: e.g., many 64-bit machines can still do 32-bit + /// adds that wrap at 2^32. + pub fn add(&self, lhs: &Fact, rhs: &Fact, add_width: u16) -> Option { + match (lhs, rhs) { + ( + Fact::ValueMax { + bit_width: bw_lhs, + max: lhs, + }, + Fact::ValueMax { + bit_width: bw_rhs, + max: rhs, + }, + ) if bw_lhs == bw_rhs && add_width >= *bw_lhs => { + let computed_max = lhs.checked_add(*rhs)?; + Some(Fact::ValueMax { + bit_width: *bw_lhs, + max: computed_max, + }) + } + + ( + Fact::ValueMax { + bit_width: bw_max, + max, + }, + Fact::PointsTo { region }, + ) + | ( + Fact::PointsTo { region }, + Fact::ValueMax { + bit_width: bw_max, + max, + }, + ) if *bw_max >= self.pointer_width && add_width >= *bw_max => { + let region = MemoryRegion { + max: region.max.checked_sub(*max)?, + }; + Some(Fact::PointsTo { region }) + } + + _ => None, + } + } + + /// Computes the `uextend` of a value with the given facts. + pub fn uextend(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option { + match fact { + // If we have a defined value in bits 0..bit_width, and we + // are filling zeroes into from_bits..to_bits, and + // bit_width and from_bits are exactly contiguous, then we + // have defined values in 0..to_bits (and because this is + // a zero-extend, the max value is the same). + Fact::ValueMax { bit_width, max } if *bit_width == from_width => Some(Fact::ValueMax { + bit_width: to_width, + max: *max, + }), + // Otherwise, we can at least claim that the value is + // within the range of `to_width`. + Fact::ValueMax { .. } => Some(Fact::ValueMax { + bit_width: to_width, + max: max_value_for_width(to_width), + }), + _ => None, + } + } + + /// Computes the `sextend` of a value with the given facts. + pub fn sextend(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option { + match fact { + // If we have a defined value in bits 0..bit_width, and + // the MSB w.r.t. `from_width` is *not* set, then we can + // do the same as `uextend`. + Fact::ValueMax { bit_width, max } + if *bit_width == from_width && (*max & (1 << (*bit_width - 1)) == 0) => + { + self.uextend(fact, from_width, to_width) + } + _ => None, + } + } + + /// Scales a value with a fact by a known constant. + pub fn scale(&self, fact: &Fact, width: u16, factor: u32) -> Option { + // The minimal (loosest) fact we can claim: the value will be + // within the range implied by its bitwidth. + let minimal_fact = Fact::ValueMax { + bit_width: width, + max: max_value_for_width(width), + }; + match fact { + Fact::ValueMax { bit_width, max } if *bit_width == width => { + let max = match max.checked_mul(u64::from(factor)) { + Some(max) => max, + None => return Some(minimal_fact), + }; + if *bit_width < 64 && max > max_value_for_width(width) { + return Some(minimal_fact); + } + Some(minimal_fact) + } + _ => None, + } + } + + /// Offsets a value with a fact by a known amount. + pub fn offset(&self, fact: &Fact, width: u16, offset: i64) -> Option { + // If we eventually support two-sided ranges, we can + // represent (0..n) + m -> ((0+m)..(n+m)). However, + // right now, all ranges start with zero, so any + // negative offset could underflow, and removes all + // claims of constrained range. + let offset = u64::try_from(offset).ok()?; + + match fact { + Fact::ValueMax { bit_width, max } if *bit_width == width => { + let max = match max.checked_add(offset) { + Some(max) => max, + None => { + return Some(Fact::ValueMax { + bit_width: width, + max: max_value_for_width(width), + }) + } + }; + + Some(Fact::ValueMax { + bit_width: *bit_width, + max, + }) + } + Fact::PointsTo { + region: MemoryRegion { max }, + } => { + let max = max.checked_sub(offset)?; + Some(Fact::PointsTo { + region: MemoryRegion { max }, + }) + } + _ => None, + } + } + + /// Check that accessing memory via a pointer with this fact, with + /// a memory access of the given size, is valid. + pub fn check_address(&self, fact: &Fact, size: u32) -> PccResult<()> { + match fact { + Fact::PointsTo { + region: MemoryRegion { max }, + } => ensure!(u64::from(size) <= *max, OutOfBounds), + _ => bail!(OutOfBounds), + } + + Ok(()) + } +} + +fn max_value_for_width(bits: u16) -> u64 { + assert!(bits <= 64); + if bits == 64 { + u64::MAX + } else { + (1u64 << bits) - 1 + } +} + +/// Top-level entry point after compilation: this checks the facts in +/// VCode. +pub fn check_vcode_facts( + _f: &ir::Function, + vcode: &VCode, + backend: &B, +) -> PccResult<()> { + for inst in 0..vcode.num_insts() { + let inst = InsnIndex::new(inst); + if vcode.inst_defines_facts(inst) || vcode[inst].is_mem_access() { + // This instruction defines a register with a new fact, or + // has some side-effect we want to be careful to + // verify. We'll call into the backend to validate this + // fact with respect to the instruction and the input + // facts. + backend.check_fact(&vcode[inst], vcode)?; + } + } + Ok(()) +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index e916d9e72359..aaed2245500e 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1045,6 +1045,34 @@ impl MachInst for Inst { } } + fn is_mem_access(&self) -> bool { + match self { + &Inst::ULoad8 { .. } + | &Inst::SLoad8 { .. } + | &Inst::ULoad16 { .. } + | &Inst::SLoad16 { .. } + | &Inst::ULoad32 { .. } + | &Inst::SLoad32 { .. } + | &Inst::ULoad64 { .. } + | &Inst::LoadP64 { .. } + | &Inst::FpuLoad32 { .. } + | &Inst::FpuLoad64 { .. } + | &Inst::FpuLoad128 { .. } + | &Inst::FpuLoadP64 { .. } + | &Inst::FpuLoadP128 { .. } + | &Inst::Store8 { .. } + | &Inst::Store16 { .. } + | &Inst::Store32 { .. } + | &Inst::Store64 { .. } + | &Inst::StoreP64 { .. } + | &Inst::FpuStore32 { .. } + | &Inst::FpuStore64 { .. } + | &Inst::FpuStore128 { .. } => true, + // TODO: verify this carefully + _ => false, + } + } + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { let bits = ty.bits(); diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 02c841d1b2f9..0ab57ec664b2 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -8,12 +8,13 @@ //! - Floating-point immediates (FIMM instruction). use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::pcc::PccResult; use crate::ir::Inst as IRInst; use crate::ir::{Opcode, Value}; use crate::isa::aarch64::inst::*; +use crate::isa::aarch64::pcc; use crate::isa::aarch64::AArch64Backend; use crate::machinst::lower::*; -use crate::machinst::Reg; use crate::machinst::*; pub mod isle; @@ -129,4 +130,8 @@ impl LowerBackend for AArch64Backend { fn maybe_pinned_reg(&self) -> Option { Some(regs::pinned_reg()) } + + fn check_fact(&self, inst: &Self::MInst, vcode: &VCode) -> PccResult<()> { + pcc::check(inst, vcode) + } } diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index ec724abbf444..59541d9d1517 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -21,6 +21,7 @@ use target_lexicon::{Aarch64Architecture, Architecture, OperatingSystem, Triple} mod abi; pub mod inst; mod lower; +mod pcc; pub mod settings; use self::inst::EmitInfo; diff --git a/cranelift/codegen/src/isa/aarch64/pcc.rs b/cranelift/codegen/src/isa/aarch64/pcc.rs new file mode 100644 index 000000000000..d6427a8c1787 --- /dev/null +++ b/cranelift/codegen/src/isa/aarch64/pcc.rs @@ -0,0 +1,193 @@ +//! Proof-carrying code checking for AArch64 VCode. + +use crate::ir::pcc::*; +use crate::ir::MemFlags; +use crate::isa::aarch64::inst::args::PairAMode; +use crate::isa::aarch64::inst::Inst; +use crate::isa::aarch64::inst::{AMode, ExtendOp}; +use crate::machinst::Reg; +use crate::machinst::VCode; +use crate::trace; + +pub(crate) fn check(inst: &Inst, vcode: &VCode) -> PccResult<()> { + // Create a new fact context with the machine's pointer width. + let ctx = FactContext::new(64); + + trace!("Checking facts on inst: {:?}", inst); + + match inst { + Inst::Args { .. } => { + // Defs on the args have "axiomatic facts": we trust the + // ABI code to pass through the values unharmed, so the + // facts given to us in the CLIF should still be true. + Ok(()) + } + Inst::ULoad8 { mem, flags, .. } | Inst::SLoad8 { mem, flags, .. } => { + check_addr(&ctx, *flags, mem, vcode, 1) + } + Inst::ULoad16 { mem, flags, .. } | Inst::SLoad16 { mem, flags, .. } => { + check_addr(&ctx, *flags, mem, vcode, 2) + } + Inst::ULoad32 { mem, flags, .. } | Inst::SLoad32 { mem, flags, .. } => { + check_addr(&ctx, *flags, mem, vcode, 4) + } + Inst::ULoad64 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 8), + Inst::FpuLoad32 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 4), + Inst::FpuLoad64 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 8), + Inst::FpuLoad128 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 16), + Inst::LoadP64 { mem, flags, .. } => check_addr_pair(&ctx, *flags, mem, vcode, 16), + Inst::FpuLoadP64 { mem, flags, .. } => check_addr_pair(&ctx, *flags, mem, vcode, 16), + Inst::FpuLoadP128 { mem, flags, .. } => check_addr_pair(&ctx, *flags, mem, vcode, 32), + Inst::VecLoadReplicate { rn, flags, .. } => check_scalar_addr(&ctx, *flags, *rn, vcode, 16), + Inst::LoadAcquire { + access_ty, + rn, + flags, + .. + } => check_scalar_addr(&ctx, *flags, *rn, vcode, access_ty.bytes() as u8), + + Inst::Store8 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 1), + Inst::Store16 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 2), + Inst::Store32 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 4), + Inst::Store64 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 8), + Inst::FpuStore32 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 4), + Inst::FpuStore64 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 8), + Inst::FpuStore128 { mem, flags, .. } => check_addr(&ctx, *flags, mem, vcode, 16), + Inst::StoreP64 { mem, flags, .. } => check_addr_pair(&ctx, *flags, mem, vcode, 16), + Inst::FpuStoreP64 { mem, flags, .. } => check_addr_pair(&ctx, *flags, mem, vcode, 16), + Inst::FpuStoreP128 { mem, flags, .. } => check_addr_pair(&ctx, *flags, mem, vcode, 32), + Inst::StoreRelease { + access_ty, + rn, + flags, + .. + } => check_scalar_addr(&ctx, *flags, *rn, vcode, access_ty.bytes() as u8), + + i => { + panic!("Fact on unknown inst: {:?}", i); + } + } +} + +fn amode_extend(ctx: &FactContext, value: &Fact, mode: ExtendOp) -> Option { + match mode { + ExtendOp::UXTB => ctx.uextend(value, 8, 64), + ExtendOp::UXTH => ctx.uextend(value, 16, 64), + ExtendOp::UXTW => ctx.uextend(value, 32, 64), + ExtendOp::UXTX => Some(value.clone()), + ExtendOp::SXTB => ctx.sextend(value, 8, 64), + ExtendOp::SXTH => ctx.uextend(value, 16, 64), + ExtendOp::SXTW => ctx.uextend(value, 32, 64), + ExtendOp::SXTX => None, + } +} + +fn check_addr( + ctx: &FactContext, + flags: MemFlags, + addr: &AMode, + vcode: &VCode, + size: u8, +) -> PccResult<()> { + if !flags.checked() { + return Ok(()); + } + + match addr { + &AMode::RegReg { rn, rm } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let rm = vcode.vreg_fact(rm.into()).ok_or(PccError::MissingFact)?; + let sum = ctx.add(&rn, &rm, 64).ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::RegScaled { rn, rm, ty } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let rm = vcode.vreg_fact(rm.into()).ok_or(PccError::MissingFact)?; + let rm_scaled = ctx.scale(&rm, 64, ty.bytes()).ok_or(PccError::Overflow)?; + let sum = ctx.add(&rn, &rm_scaled, 64).ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::RegScaledExtended { + rn, + rm, + ty, + extendop, + } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let rm = vcode.vreg_fact(rm.into()).ok_or(PccError::MissingFact)?; + let rm_extended = amode_extend(ctx, rm, extendop).ok_or(PccError::MissingFact)?; + let rm_scaled = ctx + .scale(&rm_extended, 64, ty.bytes()) + .ok_or(PccError::Overflow)?; + let sum = ctx.add(&rn, &rm_scaled, 64).ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::RegExtended { rn, rm, extendop } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let rm = vcode.vreg_fact(rm.into()).ok_or(PccError::MissingFact)?; + let rm_extended = amode_extend(ctx, rm, extendop).ok_or(PccError::MissingFact)?; + let sum = ctx + .add(&rn, &rm_extended, 64) + .ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::Unscaled { rn, simm9 } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let sum = ctx + .offset(&rn, 64, simm9.value as i64) + .ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::UnsignedOffset { rn, uimm12 } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let offset = (uimm12.value as u64) * (size as u64); + let sum = ctx + .offset(&rn, 64, offset as i64) + .ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::Label { .. } | &AMode::Const { .. } => { + // Always accept: labels and constants must be within the + // generated code (else they won't be resolved). + Ok(()) + } + &AMode::RegOffset { rn, off, .. } => { + let rn = vcode.vreg_fact(rn.into()).ok_or(PccError::MissingFact)?; + let sum = ctx.offset(&rn, 64, off).ok_or(PccError::MissingFact)?; + ctx.check_address(&sum, size as u32) + } + &AMode::SPOffset { .. } + | &AMode::FPOffset { .. } + | &AMode::NominalSPOffset { .. } + | &AMode::SPPostIndexed { .. } + | &AMode::SPPreIndexed { .. } => { + // We trust ABI code (for now!) and no lowering rules + // lower input value accesses directly to these. + Ok(()) + } + } +} + +fn check_addr_pair( + _ctx: &FactContext, + _flags: MemFlags, + _addr: &PairAMode, + _vcode: &VCode, + _size: u8, +) -> PccResult<()> { + Err(PccError::UnimplementedInst) +} + +fn check_scalar_addr( + ctx: &FactContext, + flags: MemFlags, + reg: Reg, + vcode: &VCode, + size: u8, +) -> PccResult<()> { + if !flags.checked() { + return Ok(()); + } + let fact = vcode.vreg_fact(reg.into()).ok_or(PccError::MissingFact)?; + ctx.check_address(&fact, size as u32) +} diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 4c10191eccfc..e484195e7565 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -889,6 +889,10 @@ impl MachInst for Inst { } } + fn is_mem_access(&self) -> bool { + panic!("TODO FILL ME OUT") + } + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { let x = Inst::Mov { rd: to_reg, diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 0ece48c7b664..bec240f8b960 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -1064,6 +1064,10 @@ impl MachInst for Inst { } } + fn is_mem_access(&self) -> bool { + panic!("TODO FILL ME OUT") + } + fn is_safepoint(&self) -> bool { match self { &Inst::Call { .. } diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 2a321c7f44f6..19bda166c19f 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2584,6 +2584,10 @@ impl MachInst for Inst { } } + fn is_mem_access(&self) -> bool { + panic!("TODO FILL ME OUT") + } + fn gen_move(dst_reg: Writable, src_reg: Reg, ty: Type) -> Inst { trace!( "Inst::gen_move {:?} -> {:?} (type: {:?})", diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 07440489a91c..87481218246c 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -1,11 +1,13 @@ //! Compilation backend pipeline: optimized IR to VCode / binemit. use crate::dominator_tree::DominatorTree; +use crate::ir::pcc; use crate::ir::Function; use crate::isa::TargetIsa; use crate::machinst::*; use crate::timing; use crate::trace; +use crate::CodegenError; use regalloc2::RegallocOptions; @@ -45,6 +47,11 @@ pub fn compile( log::debug!("Number of lowered vcode blocks: {}", vcode.num_blocks()); trace!("vcode from lowering: \n{:?}", vcode); + // Perform validation of proof-carrying-code facts, if requested. + if b.flags().enable_pcc() { + pcc::check_vcode_facts(f, &vcode, b).map_err(CodegenError::Pcc)?; + } + // Perform register allocation. let regalloc_result = { let _tt = timing::regalloc(); diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index f4f02890e7de..38fac43b4ee7 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -8,6 +8,7 @@ use crate::entity::SecondaryMap; use crate::fx::{FxHashMap, FxHashSet}; use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit}; +use crate::ir::pcc::{PccError, PccResult}; use crate::ir::{ ArgumentPurpose, Block, Constant, ConstantData, DataFlowGraph, ExternalName, Function, GlobalValue, GlobalValueData, Immediate, Inst, InstructionData, MemFlags, RelSourceLoc, Type, @@ -145,6 +146,12 @@ pub trait LowerBackend { fn maybe_pinned_reg(&self) -> Option { None } + + /// Check any facts about an instruction, given VCode with facts + /// on VRegs. + fn check_fact(&self, _inst: &Self::MInst, _vcode: &VCode) -> PccResult<()> { + Err(PccError::UnimplementedBackend) + } } /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence @@ -349,7 +356,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for ¶m in f.dfg.block_params(bb) { let ty = f.dfg.value_type(param); if value_regs[param].is_invalid() { - let regs = vregs.alloc(ty)?; + let regs = vregs.alloc_with_maybe_fact(ty, f.dfg.facts[param].clone())?; value_regs[param] = regs; trace!("bb {} param {}: regs {:?}", bb, param, regs); } @@ -358,7 +365,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for &result in f.dfg.inst_results(inst) { let ty = f.dfg.value_type(result); if value_regs[result].is_invalid() && !ty.is_invalid() { - let regs = vregs.alloc(ty)?; + let regs = vregs.alloc_with_maybe_fact(ty, f.dfg.facts[result].clone())?; value_regs[result] = regs; trace!( "bb {} inst {} ({:?}): result {} regs {:?}", diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index a8d3ce376d12..c587d64adfb6 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -112,6 +112,9 @@ pub trait MachInst: Clone + Debug { /// Should this instruction be included in the clobber-set? fn is_included_in_clobbers(&self) -> bool; + /// Does this instruction access memory? + fn is_mem_access(&self) -> bool; + /// Generate a move. fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self; diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 7e88902634d2..1a09bc59f97c 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -19,6 +19,7 @@ use crate::fx::FxHashMap; use crate::fx::FxHashSet; +use crate::ir::pcc::*; use crate::ir::RelSourceLoc; use crate::ir::{self, types, Constant, ConstantData, DynamicStackSlot, ValueLabel}; use crate::machinst::*; @@ -172,6 +173,9 @@ pub struct VCode { debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, pub(crate) sigs: SigSet, + + /// Facts on VRegs, for proof-carrying code verification. + facts: Vec>, } /// The result of `VCode::emit`. Contains all information computed @@ -586,6 +590,7 @@ impl VCodeBuilder { /// Build the final VCode. pub fn build(mut self, vregs: VRegAllocator) -> VCode { self.vcode.vreg_types = vregs.vreg_types; + self.vcode.facts = vregs.facts; self.vcode.reftyped_vregs = vregs.reftyped_vregs; if self.direction == VCodeBuildDirection::Backward { @@ -654,6 +659,7 @@ impl VCode { constants, debug_value_labels: vec![], vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()), + facts: vec![], } } @@ -1277,6 +1283,27 @@ impl VCode { self.assert_not_vreg_alias(op.vreg()); op } + + /// Get the fact, if any, for a given VReg. + pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> { + self.facts[vreg.vreg()].as_ref() + } + + /// Does a given instruction define any facts? + pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool { + self.inst_operands(inst) + .iter() + .filter(|o| o.kind() == OperandKind::Def) + .map(|o| o.vreg()) + .any(|vreg| self.vreg_fact(vreg).is_some()) + } +} + +impl std::ops::Index for VCode { + type Output = I; + fn index(&self, idx: InsnIndex) -> &Self::Output { + &self.insts[idx.index()] + } } impl RegallocFunction for VCode { @@ -1431,6 +1458,13 @@ impl fmt::Debug for VCode { inst, self.insts[inst].pretty_print_inst(&[], &mut state) )?; + for operand in self.inst_operands(InsnIndex::new(inst)) { + if operand.kind() == OperandKind::Def { + if let Some(fact) = &self.facts[operand.vreg().vreg()] { + writeln!(f, " v{} ! {:?}", operand.vreg().vreg(), fact)?; + } + } + } } } @@ -1462,6 +1496,9 @@ pub struct VRegAllocator { /// lowering rules) or some ABI code. deferred_error: Option, + /// Facts on VRegs, for proof-carrying code. + facts: Vec>, + /// The type of instruction that this allocator makes registers for. _inst: core::marker::PhantomData, } @@ -1472,6 +1509,7 @@ impl VRegAllocator { Self { next_vreg: first_user_vreg_index(), vreg_types: vec![], + facts: vec![], reftyped_vregs_set: FxHashSet::default(), reftyped_vregs: vec![], deferred_error: None, @@ -1502,6 +1540,11 @@ impl VRegAllocator { for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { self.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty); } + + // Create empty facts for each allocated vreg. + self.facts + .resize(usize::try_from(self.next_vreg).unwrap(), None); + Ok(regs) } @@ -1550,6 +1593,32 @@ impl VRegAllocator { } } } + + /// Set the proof-carrying code fact on a given virtual register. + /// + /// Returns the old fact, if any (only one fact can be stored). + pub fn set_fact(&mut self, vreg: VirtualReg, fact: Fact) -> Option { + self.facts[vreg.index()].replace(fact) + } + + /// Allocate a fresh ValueRegs, with a given fact to apply if + /// the value fits in one VReg. + pub fn alloc_with_maybe_fact( + &mut self, + ty: Type, + fact: Option, + ) -> CodegenResult> { + let result = self.alloc(ty)?; + + // Ensure that we don't lose a fact on a value that splits + // into multiple VRegs. + assert!(result.len() == 1 || fact.is_none()); + if let Some(fact) = fact { + self.set_fact(result.regs()[0].to_virtual_reg().unwrap(), fact); + } + + Ok(result) + } } /// This structure tracks the large constants used in VCode that will be emitted separately by the diff --git a/cranelift/codegen/src/result.rs b/cranelift/codegen/src/result.rs index d14049c0b6b9..9cbef7979551 100644 --- a/cranelift/codegen/src/result.rs +++ b/cranelift/codegen/src/result.rs @@ -2,6 +2,7 @@ use regalloc2::checker::CheckerErrors; +use crate::ir::pcc::PccError; use crate::{ir::Function, verifier::VerifierErrors}; use std::string::String; @@ -41,6 +42,9 @@ pub enum CodegenError { /// Register allocator internal error discovered by the symbolic checker. Regalloc(CheckerErrors), + + /// Proof-carrying-code validation error. + Pcc(PccError), } /// A convenient alias for a `Result` that uses `CodegenError` as the error type. @@ -58,6 +62,7 @@ impl std::error::Error for CodegenError { #[cfg(feature = "unwind")] CodegenError::RegisterMappingError { .. } => None, CodegenError::Regalloc(..) => None, + CodegenError::Pcc(..) => None, } } } @@ -72,6 +77,7 @@ impl std::fmt::Display for CodegenError { #[cfg(feature = "unwind")] CodegenError::RegisterMappingError(_0) => write!(f, "Register mapping error"), CodegenError::Regalloc(errors) => write!(f, "Regalloc validation errors: {:?}", errors), + CodegenError::Pcc(e) => write!(f, "Proof-carrying-code validation error: {:?}", e), } } } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index bb4af16d1fb6..9c52203365cf 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -530,6 +530,7 @@ regalloc_checker = false regalloc_verbose_logs = false enable_alias_analysis = true enable_verifier = true +enable_pcc = false is_pic = false use_colocated_libcalls = false enable_float = true diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 39025fd7addc..20239d1355de 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -199,7 +199,12 @@ fn write_spec(w: &mut dyn Write, func: &Function) -> fmt::Result { // Basic blocks fn write_arg(w: &mut dyn Write, func: &Function, arg: Value) -> fmt::Result { - write!(w, "{}: {}", arg, func.dfg.value_type(arg)) + let ty = func.dfg.value_type(arg); + if let Some(f) = &func.dfg.facts[arg] { + write!(w, "{} ! {}: {}", arg, f, ty) + } else { + write!(w, "{}: {}", arg, ty) + } } /// Write out the basic block header, outdented: @@ -346,6 +351,9 @@ fn write_instruction( } else { write!(w, ", {}", r)?; } + if let Some(f) = &func.dfg.facts[*r] { + write!(w, " ! {}", f)?; + } } if has_results { write!(w, " = ")?; diff --git a/cranelift/filetests/filetests/pcc/fail/simple.clif b/cranelift/filetests/filetests/pcc/fail/simple.clif new file mode 100644 index 000000000000..5aac4f8601f3 --- /dev/null +++ b/cranelift/filetests/filetests/pcc/fail/simple.clif @@ -0,0 +1,15 @@ +test compile expect-fail +set enable_pcc=true +target aarch64 + +;; The `points_to` annotation is not large enough here -- the +;; 4GiB-range 32-bit offset could go out of range. PCC should catch +;; this. + +function %simple1(i64 vmctx, i32) -> i8 { +block0(v0 ! points_to(0x8000_0000): i64, v1 ! max(32, 0xffff_ffff): i32): + v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 + v3 ! points_to(0x1) = iadd.i64 v0, v2 + v4 = load.i8 checked v3 + return v4 +} diff --git a/cranelift/filetests/filetests/pcc/succeed/simple.clif b/cranelift/filetests/filetests/pcc/succeed/simple.clif new file mode 100644 index 000000000000..cf1793e5a583 --- /dev/null +++ b/cranelift/filetests/filetests/pcc/succeed/simple.clif @@ -0,0 +1,11 @@ +test compile +set enable_pcc=true +target aarch64 + +function %simple1(i64 vmctx, i32) -> i8 { +block0(v0 ! points_to(0x1_0000_0000): i64, v1 ! max(32, 0xffff_ffff): i32): + v2 ! max(64, 0xffff_ffff) = uextend.i64 v1 + v3 ! points_to(0x1) = iadd.i64 v0, v2 + v4 = load.i8 checked v3 + return v4 +} diff --git a/cranelift/reader/src/lexer.rs b/cranelift/reader/src/lexer.rs index e2c80cf93db9..0367f4b7b9b5 100644 --- a/cranelift/reader/src/lexer.rs +++ b/cranelift/reader/src/lexer.rs @@ -28,7 +28,7 @@ pub enum Token<'a> { Dot, // '.' Colon, // ':' Equal, // '=' - Not, // '!' + Bang, // '!' Arrow, // '->' Float(&'a str), // Floating point immediate Integer(&'a str), // Integer immediate @@ -474,7 +474,7 @@ impl<'a> Lexer<'a> { Some('.') => Some(self.scan_char(Token::Dot)), Some(':') => Some(self.scan_char(Token::Colon)), Some('=') => Some(self.scan_char(Token::Equal)), - Some('!') => Some(self.scan_char(Token::Not)), + Some('!') => Some(self.scan_char(Token::Bang)), Some('+') => Some(self.scan_number()), Some('*') => Some(self.scan_char(Token::Multiply)), Some('-') => { diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 28b6ae18600c..100ef20ec332 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -12,6 +12,7 @@ use cranelift_codegen::entity::{EntityRef, PrimaryMap}; use cranelift_codegen::ir::entities::{AnyEntity, DynamicType}; use cranelift_codegen::ir::immediates::{Ieee32, Ieee64, Imm64, Offset32, Uimm32, Uimm64}; use cranelift_codegen::ir::instructions::{InstructionData, InstructionFormat, VariableArgs}; +use cranelift_codegen::ir::pcc::{Fact, MemoryRegion}; use cranelift_codegen::ir::types; use cranelift_codegen::ir::types::INVALID; use cranelift_codegen::ir::types::*; @@ -1135,7 +1136,7 @@ impl<'a> Parser<'a> { let mut list = Vec::new(); while self.token() == Some(Token::Identifier("feature")) { self.consume(); - let has = !self.optional(Token::Not); + let has = !self.optional(Token::Bang); match (self.token(), has) { (Some(Token::String(flag)), true) => list.push(Feature::With(flag)), (Some(Token::String(flag)), false) => list.push(Feature::Without(flag)), @@ -1943,7 +1944,7 @@ impl<'a> Parser<'a> { // between the parsing of value aliases and the parsing of instructions. // // inst-results ::= Value(v) { "," Value(v) } - let results = self.parse_inst_results()?; + let results = self.parse_inst_results(ctx)?; for result in &results { while ctx.function.dfg.num_values() <= result.index() { @@ -1993,16 +1994,23 @@ impl<'a> Parser<'a> { // Parse a single block parameter declaration, and append it to `block`. // - // block-param ::= * Value(v) ":" Type(t) arg-loc? + // block-param ::= * Value(v) [ "!" fact ] ":" Type(t) arg-loc? // arg-loc ::= "[" value-location "]" // fn parse_block_param(&mut self, ctx: &mut Context, block: Block) -> ParseResult<()> { - // block-param ::= * Value(v) ":" Type(t) arg-loc? + // block-param ::= * Value(v) [ "!" fact ] ":" Type(t) arg-loc? let v = self.match_value("block argument must be a value")?; let v_location = self.loc; - // block-param ::= Value(v) * ":" Type(t) arg-loc? + // block-param ::= Value(v) * [ "!" fact ] ":" Type(t) arg-loc? + let fact = if self.token() == Some(Token::Bang) { + self.consume(); + // block-param ::= Value(v) [ "!" * fact ] ":" Type(t) arg-loc? + Some(self.parse_fact()?) + } else { + None + }; self.match_token(Token::Colon, "expected ':' after block argument")?; - // block-param ::= Value(v) ":" * Type(t) arg-loc? + // block-param ::= Value(v) [ "!" fact ] ":" * Type(t) arg-loc? while ctx.function.dfg.num_values() <= v.index() { ctx.function.dfg.make_invalid_value_for_parser(); @@ -2012,15 +2020,66 @@ impl<'a> Parser<'a> { // Allocate the block argument. ctx.function.dfg.append_block_param_for_parser(block, t, v); ctx.map.def_value(v, v_location)?; + ctx.function.dfg.facts[v] = fact; Ok(()) } + // Parse a "fact" for proof-carrying code, attached to a value. + // + // fact ::= "max" "(" bit-width "," max-value ")" + // | "points_to" "(" valid-range ")" + // bit-width ::= uimm64 + // max-value ::= uimm64 + // valid-range ::= uimm64 + fn parse_fact(&mut self) -> ParseResult { + match self.token() { + Some(Token::Identifier("max")) => { + self.consume(); + self.match_token(Token::LPar, "`max` fact needs an opening `(`")?; + let bit_width: u64 = self + .match_uimm64("expected a bit-width value for `max` fact")? + .into(); + self.match_token(Token::Comma, "expected a comma")?; + let max: u64 = self + .match_uimm64("expected a max value for `max` fact")? + .into(); + self.match_token(Token::RPar, "`max` fact needs a closing `)`")?; + let bit_width_max = match bit_width { + x if x > 64 => { + return Err(self.error("bitwidth must be <= 64 bits on a `max` fact")); + } + 64 => u64::MAX, + x => (1u64 << x) - 1, + }; + if max > bit_width_max { + return Err( + self.error("max value is out of range for bitwidth on a `max` fact") + ); + } + Ok(Fact::ValueMax { + bit_width: u16::try_from(bit_width).unwrap(), + max: max.into(), + }) + } + Some(Token::Identifier("points_to")) => { + self.consume(); + self.match_token(Token::LPar, "expected a `(`")?; + let max = self.match_uimm64("expected a max offset for `points_to` fact")?; + self.match_token(Token::RPar, "expected a `)`")?; + Ok(Fact::PointsTo { + region: MemoryRegion { max: max.into() }, + }) + } + _ => Err(self.error("expected a `max` or `points_to` fact")), + } + } + // Parse instruction results and return them. // // inst-results ::= Value(v) { "," Value(v) } // - fn parse_inst_results(&mut self) -> ParseResult> { + fn parse_inst_results(&mut self, ctx: &mut Context) -> ParseResult> { // Result value numbers. let mut results = SmallVec::new(); @@ -2031,10 +2090,29 @@ impl<'a> Parser<'a> { results.push(v); + let fact = if self.token() == Some(Token::Bang) { + self.consume(); + // block-param ::= Value(v) [ "!" * fact ] ":" Type(t) arg-loc? + Some(self.parse_fact()?) + } else { + None + }; + ctx.function.dfg.facts[v] = fact; + // inst-results ::= Value(v) * { "," Value(v) } while self.optional(Token::Comma) { // inst-results ::= Value(v) { "," * Value(v) } - results.push(self.match_value("expected result value")?); + let v = self.match_value("expected result value")?; + results.push(v); + + let fact = if self.token() == Some(Token::Bang) { + self.consume(); + // block-param ::= Value(v) [ "!" * fact ] ":" Type(t) arg-loc? + Some(self.parse_fact()?) + } else { + None + }; + ctx.function.dfg.facts[v] = fact; } } @@ -2368,7 +2446,7 @@ impl<'a> Parser<'a> { if self.optional(Token::Equal) { self.match_token(Token::Equal, "expected another =")?; Ok(Comparison::Equals) - } else if self.optional(Token::Not) { + } else if self.optional(Token::Bang) { self.match_token(Token::Equal, "expected a =")?; Ok(Comparison::NotEquals) } else { diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 82dd651e093d..336048e79f4a 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -416,6 +416,7 @@ impl Engine { | "enable_jump_tables" | "enable_float" | "enable_verifier" + | "enable_pcc" | "regalloc_checker" | "regalloc_verbose_logs" | "is_pic"