diff --git a/c2rust-analyze/src/rewrite/apply.rs b/c2rust-analyze/src/rewrite/apply.rs index 5a1f4c7b8..52ab69a50 100644 --- a/c2rust-analyze/src/rewrite/apply.rs +++ b/c2rust-analyze/src/rewrite/apply.rs @@ -419,6 +419,20 @@ impl Emitter<'_, S> { self.emit(rw, 0) } + Rewrite::Match(ref expr, ref cases) => { + self.emit_str("match ")?; + self.emit(expr, 0)?; + self.emit_str(" {\n")?; + for &(ref pat, ref body) in cases { + self.emit_str(" ")?; + self.emit_str(pat)?; + self.emit_str(" => ")?; + self.emit(body, 0)?; + self.emit_str(",\n")?; + } + self.emit_str("}") + } + Rewrite::TyPtr(ref rw, mutbl) => { match mutbl { Mutability::Not => self.emit_str("*const ")?, diff --git a/c2rust-analyze/src/rewrite/expr/convert.rs b/c2rust-analyze/src/rewrite/expr/convert.rs index 6091d9c17..8a730c02a 100644 --- a/c2rust-analyze/src/rewrite/expr/convert.rs +++ b/c2rust-analyze/src/rewrite/expr/convert.rs @@ -224,38 +224,96 @@ impl<'tcx> ConvertVisitor<'tcx> { } mir_op::RewriteKind::MemcpySafe { - elem_size, + ref elem_ty, dest_single, + dest_option, src_single, + src_option, } => { - // `memcpy(dest, src, n)` to a `copy_from_slice` call + // `memcpy(dest, src, byte_len)` to a `copy_from_slice` call assert!(matches!(hir_rw, Rewrite::Identity)); - assert!(!dest_single, "&T -> &[T] conversion for memcpy dest NYI"); - assert!(!src_single, "&T -> &[T] conversion for memcpy src NYI"); - Rewrite::Block( - vec![ - Rewrite::Let(vec![ - ("dest".into(), self.get_subexpr(ex, 0)), - ("src".into(), self.get_subexpr(ex, 1)), - ("byte_len".into(), self.get_subexpr(ex, 2)), - ]), - Rewrite::Let(vec![( - "n".into(), - format_rewrite!("byte_len as usize / {elem_size}"), - )]), - Rewrite::MethodCall( - "copy_from_slice".into(), - Box::new(format_rewrite!("dest[..n]")), - vec![format_rewrite!("&src[..n]")], - ), - ], - Some(Box::new(format_rewrite!("dest"))), - ) + let mut stmts = Vec::with_capacity(6); + + stmts.push(Rewrite::Let(vec![ + ("dest".into(), self.get_subexpr(ex, 0)), + ("src".into(), self.get_subexpr(ex, 1)), + ("byte_len".into(), self.get_subexpr(ex, 2)), + ])); + // Best-effort check to detect size mismatches. This can happen if we infer the + // wrong pointee type, or if the C code used a hardcoded size for `elem_ty` but we + // changed its size during rewriting, or possibly other cases. These errors could + // potentially cause too few items to be copied, introducing a subtle logic error; + // this assertion tries to catch this situation early so it's easier to diagnose. + stmts.push(format_rewrite!( + "assert_eq!(byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)" + )); + stmts.push(Rewrite::Let(vec![( + "n".into(), + format_rewrite!("byte_len as usize / std::mem::size_of::<{elem_ty}>()"), + )])); + let mut convert = |var: &str, is_mut, is_single, is_option| { + let single_to_slice = if is_single { + if is_mut { + format_rewrite!("std::slice::from_mut({var})") + } else { + format_rewrite!("std::slice::from_ref({var})") + } + } else { + Rewrite::Text(var.into()) + }; + let rhs = if is_option { + // ``` + // match var { + // Some(x) => x, // or slice::from_ref(x), etc + // None => { assert_eq!(n, 0); &[] }, + // } + // ``` + let empty_slice = if is_mut { + format_rewrite!("&mut []") + } else { + format_rewrite!("&[]") + }; + Rewrite::Match( + Box::new(Rewrite::Text(var.into())), + vec![ + (format!("Some({var})"), single_to_slice), + ( + "None".into(), + Rewrite::Block( + vec![format_rewrite!("assert_eq!(n, 0)")], + Some(Box::new(empty_slice)), + ), + ), + ], + ) + } else { + single_to_slice + }; + stmts.push(Rewrite::Let1(var.into(), Box::new(rhs))); + }; + convert("dest", true, dest_single, dest_option); + convert("src", false, src_single, src_option); + // `dest[..n].copy_from_slice(&src[..n]);` + stmts.push(Rewrite::MethodCall( + "copy_from_slice".into(), + Box::new(format_rewrite!("dest[..n]")), + vec![format_rewrite!("&src[..n]")], + )); + + // TODO: `memcpy` cases that actually use the return value are only partially + // supported. Currently we always return `&mut [T]`, which may not match the + // permissions on the output. Doing this correctly would require saving the + // original `dest` and then applying `slice::from_mut`, `OptionDowngrade`, and/or + // `DynOwnedDowngrade` to get `&mut [T]` for the call to `copy_from_slice`. This + // would allow ownership to flow from `p` to `q` in `q = memcpy(p, ...)`, for + // example. Fortunately, most code just uses `memcpy` for its side effect and + // ignores the return value. + Rewrite::Block(stmts, Some(Box::new(format_rewrite!("dest")))) } mir_op::RewriteKind::MemsetZeroize { ref zero_ty, - elem_size, + ref elem_ty, dest_single, } => { // `memset(dest, 0, n)` to assignments that zero out each field of `*dest` @@ -275,9 +333,13 @@ impl<'tcx> ConvertVisitor<'tcx> { ("val".into(), self.get_subexpr(ex, 1)), ("byte_len".into(), self.get_subexpr(ex, 2)), ]), + // Best-effort check to detect size mismatches, as in `MemcpySafe`. + format_rewrite!( + "assert_eq!(byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)" + ), Rewrite::Let(vec![( "n".into(), - format_rewrite!("byte_len as usize / {elem_size}"), + format_rewrite!("byte_len as usize / std::mem::size_of::<{elem_ty}>()"), )]), format_rewrite!("assert_eq!(val, 0, \"non-zero memset NYI\")"), zeroize_body, @@ -288,12 +350,12 @@ impl<'tcx> ConvertVisitor<'tcx> { mir_op::RewriteKind::MallocSafe { ref zero_ty, - elem_size, + ref elem_ty, single, } | mir_op::RewriteKind::CallocSafe { ref zero_ty, - elem_size, + ref elem_ty, single, } => { // `malloc(n)` -> `Box::new(z)` or similar @@ -302,9 +364,15 @@ impl<'tcx> ConvertVisitor<'tcx> { let mut stmts = match *rw { mir_op::RewriteKind::MallocSafe { .. } => vec![ Rewrite::Let(vec![("byte_len".into(), self.get_subexpr(ex, 0))]), + // Best-effort check to detect size mismatches, as in `MemcpySafe`. + format_rewrite!( + "assert_eq!(byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)" + ), Rewrite::Let1( "n".into(), - Box::new(format_rewrite!("byte_len as usize / {elem_size}")), + Box::new(format_rewrite!( + "byte_len as usize / std::mem::size_of::<{elem_ty}>()" + )), ), ], mir_op::RewriteKind::CallocSafe { .. } => vec![ @@ -312,7 +380,9 @@ impl<'tcx> ConvertVisitor<'tcx> { ("count".into(), self.get_subexpr(ex, 0)), ("size".into(), self.get_subexpr(ex, 1)), ]), - format_rewrite!("assert_eq!(size, {elem_size})"), + format_rewrite!( + "assert_eq!(size as usize, std::mem::size_of::<{elem_ty}>())" + ), Rewrite::Let1("n".into(), Box::new(format_rewrite!("count as usize"))), ], _ => unreachable!(), @@ -342,7 +412,7 @@ impl<'tcx> ConvertVisitor<'tcx> { mir_op::RewriteKind::ReallocSafe { ref zero_ty, - elem_size, + ref elem_ty, src_single, dest_single, option, @@ -355,9 +425,15 @@ impl<'tcx> ConvertVisitor<'tcx> { ("src_ptr".into(), self.get_subexpr(ex, 0)), ("dest_byte_len".into(), self.get_subexpr(ex, 1)), ]), + // Best-effort check to detect size mismatches, as in `MemcpySafe`. + format_rewrite!( + "assert_eq!(dest_byte_len as usize % std::mem::size_of::<{elem_ty}>(), 0)" + ), Rewrite::Let1( "dest_n".into(), - Box::new(format_rewrite!("dest_byte_len as usize / {elem_size}")), + Box::new(format_rewrite!( + "dest_byte_len as usize / std::mem::size_of::<{elem_ty}>()" + )), ), ]; if dest_single { diff --git a/c2rust-analyze/src/rewrite/expr/mir_op.rs b/c2rust-analyze/src/rewrite/expr/mir_op.rs index a82d91195..d4a1a915b 100644 --- a/c2rust-analyze/src/rewrite/expr/mir_op.rs +++ b/c2rust-analyze/src/rewrite/expr/mir_op.rs @@ -11,6 +11,7 @@ use crate::context::{AnalysisCtxt, Assignment, DontRewriteFnReason, FlagSet, LTy use crate::panic_detail; use crate::pointee_type::PointeeTypes; use crate::pointer_id::{GlobalPointerTable, PointerId, PointerTable}; +use crate::rewrite; use crate::type_desc::{self, Ownership, Quantity, TypeDesc}; use crate::util::{self, ty_callee, Callee}; use log::{debug, error, trace}; @@ -20,7 +21,7 @@ use rustc_middle::mir::{ StatementKind, Terminator, TerminatorKind, }; use rustc_middle::ty::print::{FmtPrinter, PrettyPrinter, Print}; -use rustc_middle::ty::{ParamEnv, Ty, TyCtxt, TyKind}; +use rustc_middle::ty::{Ty, TyCtxt, TyKind}; use std::collections::HashMap; use std::ops::Index; @@ -80,20 +81,22 @@ pub enum RewriteKind { ZeroAsPtrToNone, /// Replace a call to `memcpy(dest, src, n)` with a safe copy operation that works on slices - /// instead of raw pointers. `elem_size` is the size of the original, unrewritten pointee - /// type, which is used to convert the byte length `n` to an element count. `dest_single` and - /// `src_single` are set when `dest`/`src` is a pointer to a single item rather than a slice. + /// instead of raw pointers. `elem_ty` is the pointee type type, whose size is used to convert + /// the byte length `n` to an element count. `dest_single` and `src_single` are set when + /// `dest`/`src` is a pointer to a single item rather than a slice. MemcpySafe { - elem_size: u64, + elem_ty: String, dest_single: bool, + dest_option: bool, src_single: bool, + src_option: bool, }, - /// Replace a call to `memset(ptr, 0, n)` with a safe zeroize operation. `elem_size` is the - /// size of the type being zeroized, which is used to convert the byte length `n` to an element - /// count. `dest_single` is set when `dest` is a pointer to a single item rather than a slice. + /// Replace a call to `memset(ptr, 0, n)` with a safe zeroize operation. `elem_ty` is the type + /// being zeroized, whose size is used to convert the byte length `n` to an element count. + /// `dest_single` is set when `dest` is a pointer to a single item rather than a slice. MemsetZeroize { zero_ty: ZeroizeType, - elem_size: u64, + elem_ty: String, dest_single: bool, }, @@ -101,21 +104,21 @@ pub enum RewriteKind { /// zero-initialized. MallocSafe { zero_ty: ZeroizeType, - elem_size: u64, + elem_ty: String, single: bool, }, /// Replace a call to `free(p)` with a safe `drop` operation. FreeSafe { single: bool }, ReallocSafe { zero_ty: ZeroizeType, - elem_size: u64, + elem_ty: String, src_single: bool, dest_single: bool, option: bool, }, CallocSafe { zero_ty: ZeroizeType, - elem_size: u64, + elem_ty: String, single: bool, }, @@ -345,6 +348,26 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { desc.dyn_owned } + fn rewrite_lty(&self, lty: LTy<'tcx>) -> Ty<'tcx> { + let tcx = self.acx.tcx(); + rewrite::ty::rewrite_lty( + tcx, + lty, + &self.perms, + &self.flags, + &self.pointee_types, + &self.acx.gacx.adt_metadata, + ) + } + + fn lty_to_rewritten_str(&self, lty: LTy<'tcx>) -> (Ty<'tcx>, String) { + let rewritten_ty = self.rewrite_lty(lty); + let tcx = self.acx.tcx(); + let printer = FmtPrinter::new(tcx, Namespace::TypeNS); + let s = rewritten_ty.print(printer).unwrap().into_buffer(); + (rewritten_ty, s) + } + fn visit_statement(&mut self, stmt: &Statement<'tcx>, loc: Location) { let _g = panic_detail::set_current_span(stmt.source_info.span); debug!( @@ -570,25 +593,30 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { None => return, }; - let orig_pointee_ty = pointee_lty.ty; - let ty_layout = tcx - .layout_of(ParamEnv::reveal_all().and(orig_pointee_ty)) - .unwrap(); - let elem_size = ty_layout.layout.size().bytes(); + let (_, elem_ty) = v.lty_to_rewritten_str(pointee_lty); let dest_single = !v.perms[dest_lty.label] .intersects(PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB); let src_single = !v.perms[src_lty.label] .intersects(PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB); + let dest_option = + !v.perms[dest_lty.label].contains(PermissionSet::NON_NULL); + let src_option = + !v.perms[src_lty.label].contains(PermissionSet::NON_NULL); v.emit(RewriteKind::MemcpySafe { - elem_size, + elem_ty, src_single, + src_option, dest_single, + dest_option, }); if !pl_ty.label.is_none() && v.perms[pl_ty.label].intersects(PermissionSet::USED) { let dest_lty = v.acx.type_of(&args[0]); + // TODO: The result of `MemcpySafe` is always a slice, so this cast + // may be using an incorrect input type. See the comment on the + // `MemcpySafe` case of `rewrite::expr::convert` for details. v.emit_cast_lty_lty(dest_lty, pl_ty); } }); @@ -609,17 +637,11 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { None => return, }; - let orig_pointee_ty = pointee_lty.ty; - let ty_layout = tcx - .layout_of(ParamEnv::reveal_all().and(orig_pointee_ty)) - .unwrap(); - let elem_size = ty_layout.layout.size().bytes(); + let (elem_ty, elem_ty_str) = v.lty_to_rewritten_str(pointee_lty); let dest_single = !v.perms[dest_lty.label] .intersects(PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB); - // TODO: use rewritten types here, so that the `ZeroizeType` will - // reflect the actual types and fields after rewriting. - let zero_ty = match ZeroizeType::from_ty(tcx, orig_pointee_ty) { + let zero_ty = match ZeroizeType::from_ty(tcx, elem_ty) { Some(x) => x, // TODO: emit void* cast before bailing out, as described above None => return, @@ -627,7 +649,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { v.emit(RewriteKind::MemsetZeroize { zero_ty, - elem_size, + elem_ty: elem_ty_str, dest_single, }); @@ -680,11 +702,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } }; - let orig_pointee_ty = pointee_lty.ty; - let ty_layout = tcx - .layout_of(ParamEnv::reveal_all().and(orig_pointee_ty)) - .unwrap(); - let elem_size = ty_layout.layout.size().bytes(); + let (_, elem_ty) = v.lty_to_rewritten_str(pointee_lty); let single = !v.perms[dest_lty.label] .intersects(PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB); @@ -705,12 +723,12 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { let rw = match *callee { Callee::Malloc => RewriteKind::MallocSafe { zero_ty, - elem_size, + elem_ty, single, }, Callee::Calloc => RewriteKind::CallocSafe { zero_ty, - elem_size, + elem_ty, single, }, _ => unreachable!(), @@ -788,25 +806,19 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { } }; - let orig_pointee_ty = pointee_lty.ty; - let ty_layout = tcx - .layout_of(ParamEnv::reveal_all().and(orig_pointee_ty)) - .unwrap(); - let elem_size = ty_layout.layout.size().bytes(); + let (elem_ty, elem_ty_str) = v.lty_to_rewritten_str(pointee_lty); let dest_single = !v.perms[dest_lty.label] .intersects(PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB); let src_single = !v.perms[src_lty.label] .intersects(PermissionSet::OFFSET_ADD | PermissionSet::OFFSET_SUB); - // TODO: use rewritten types here, so that the `ZeroizeType` will - // reflect the actual types and fields after rewriting. - let zero_ty = match ZeroizeType::from_ty(tcx, orig_pointee_ty) { + let zero_ty = match ZeroizeType::from_ty(tcx, elem_ty) { Some(x) => x, // TODO: emit void* cast before bailing out None => { trace!( "{callee:?}: failed to compute ZeroizeType \ - for {orig_pointee_ty:?}" + for {elem_ty:?}" ); return; } @@ -837,7 +849,7 @@ impl<'a, 'tcx> ExprRewriteVisitor<'a, 'tcx> { v.emit(RewriteKind::ReallocSafe { zero_ty, - elem_size, + elem_ty: elem_ty_str, src_single, dest_single, option, diff --git a/c2rust-analyze/src/rewrite/mod.rs b/c2rust-analyze/src/rewrite/mod.rs index 8801ea0d2..ac455a9d9 100644 --- a/c2rust-analyze/src/rewrite/mod.rs +++ b/c2rust-analyze/src/rewrite/mod.rs @@ -102,6 +102,9 @@ pub enum Rewrite { /// Single-argument closure. As with `Let` and `Let1`, the body must be carefully constructed /// to avoid potential shadowing. Closure1(String, Box), + /// Match expression. The `Vec` is a list of cases, each consisting of a pattern and an + /// expression. + Match(Box, Vec<(String, Rewrite)>), // Type builders /// Emit a complete pretty-printed type, discarding the original annotation. @@ -201,6 +204,13 @@ impl Rewrite { } Let1(ref name, ref rw) => Let1(String::clone(name), try_subst(rw)?), Closure1(ref name, ref rw) => Closure1(String::clone(name), try_subst(rw)?), + Match(ref expr, ref cases) => { + let mut new_cases = Vec::with_capacity(cases.len()); + for &(ref pat, ref body) in cases { + new_cases.push((String::clone(pat), body.try_subst(subst)?)); + } + Match(try_subst(expr)?, new_cases) + } Print(ref s) => Print(String::clone(s)), TyPtr(ref rw, mutbl) => TyPtr(try_subst(rw)?, mutbl), diff --git a/c2rust-analyze/src/rewrite/ty.rs b/c2rust-analyze/src/rewrite/ty.rs index cab9c8d3f..3f29e7ed4 100644 --- a/c2rust-analyze/src/rewrite/ty.rs +++ b/c2rust-analyze/src/rewrite/ty.rs @@ -140,7 +140,7 @@ fn relabel_rewrites<'tcx, P, F, PT>( pointee_types: &PT, lcx: LabeledTyCtxt<'tcx, RewriteLabel<'tcx>>, lty: LTy<'tcx>, - gacx: &GlobalAnalysisCtxt<'tcx>, + adt_metadata: &AdtMetadataTable, ) -> RwLTy<'tcx> where P: Index, @@ -156,7 +156,7 @@ where flags, pointee_types, &[], - &gacx.adt_metadata, + adt_metadata, ) }) } @@ -673,7 +673,7 @@ impl<'tcx, 'a> intravisit::Visitor<'tcx> for HirTyVisitor<'a, 'tcx> { &self.pointee_types, self.rw_lcx, lty, - self.acx.gacx, + &self.acx.gacx.adt_metadata, ); let hir_ty = hir_local.ty.unwrap(); self.handle_ty(rw_lty, hir_ty); @@ -923,6 +923,20 @@ pub fn gen_adt_ty_rewrites<'tcx>( hir_rewrites } +/// Compute the new, rewritten version of `lty`. +pub fn rewrite_lty<'tcx>( + tcx: TyCtxt<'tcx>, + lty: LTy<'tcx>, + perms: &GlobalPointerTable, + flags: &GlobalPointerTable, + pointee_types: &PointerTable>, + adt_metadata: &AdtMetadataTable, +) -> Ty<'tcx> { + let rw_lcx = LabeledTyCtxt::::new(tcx); + let rw_lty = relabel_rewrites(perms, flags, pointee_types, rw_lcx, lty, adt_metadata); + mk_rewritten_ty(rw_lcx, rw_lty) +} + /// Print the rewritten types for all locals in `mir`. This is used for tests and debugging, as it /// reveals the inference results even for temporaries and other locals with no type annotation in /// the HIR. @@ -942,7 +956,7 @@ pub fn dump_rewritten_local_tys<'tcx>( &pointee_types, rw_lcx, acx.local_tys[local], - acx.gacx, + &acx.gacx.adt_metadata, ); let ty = mk_rewritten_ty(rw_lcx, rw_lty); debug!( diff --git a/c2rust-analyze/tests/filecheck/alloc.rs b/c2rust-analyze/tests/filecheck/alloc.rs index 58f4058a2..fa03fde55 100644 --- a/c2rust-analyze/tests/filecheck/alloc.rs +++ b/c2rust-analyze/tests/filecheck/alloc.rs @@ -95,3 +95,7 @@ unsafe extern "C" fn realloc1(n: libc::c_ulong) { free(buf as *mut libc::c_void); } + +// Rewrites of malloc/calloc/realloc/memset should use `mem::size_of` to convert byte counts to +// element counts. +// CHECK: let n = byte_len as usize / std::mem::size_of::();