Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New lint [ptr_to_temporary] #10962

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5163,6 +5163,7 @@ Released 2018-09-13
[`ptr_cast_constness`]: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_cast_constness
[`ptr_eq`]: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_eq
[`ptr_offset_with_cast`]: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_offset_with_cast
[`ptr_to_temporary`]: https://rust-lang.github.io/rust-clippy/master/index.html#ptr_to_temporary
[`pub_enum_variant_names`]: https://rust-lang.github.io/rust-clippy/master/index.html#pub_enum_variant_names
[`pub_use`]: https://rust-lang.github.io/rust-clippy/master/index.html#pub_use
[`pub_with_shorthand`]: https://rust-lang.github.io/rust-clippy/master/index.html#pub_with_shorthand
Expand Down
1 change: 1 addition & 0 deletions clippy_lints/src/declared_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,7 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
crate::ptr::MUT_FROM_REF_INFO,
crate::ptr::PTR_ARG_INFO,
crate::ptr_offset_with_cast::PTR_OFFSET_WITH_CAST_INFO,
crate::ptr_to_temporary::PTR_TO_TEMPORARY_INFO,
crate::pub_use::PUB_USE_INFO,
crate::question_mark::QUESTION_MARK_INFO,
crate::question_mark_used::QUESTION_MARK_USED_INFO,
Expand Down
3 changes: 3 additions & 0 deletions clippy_lints/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![feature(array_windows)]
#![feature(binary_heap_into_iter_sorted)]
#![feature(box_patterns)]
#![feature(exclusive_range_pattern)]
#![feature(if_let_guard)]
#![feature(iter_intersperse)]
#![feature(let_chains)]
Expand Down Expand Up @@ -262,6 +263,7 @@ mod permissions_set_readonly_false;
mod precedence;
mod ptr;
mod ptr_offset_with_cast;
mod ptr_to_temporary;
mod pub_use;
mod question_mark;
mod question_mark_used;
Expand Down Expand Up @@ -1082,6 +1084,7 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
store.register_late_pass(|_| Box::new(manual_float_methods::ManualFloatMethods));
store.register_late_pass(|_| Box::new(four_forward_slashes::FourForwardSlashes));
store.register_late_pass(|_| Box::new(error_impl_error::ErrorImplError));
store.register_late_pass(move |_| Box::new(ptr_to_temporary::PtrToTemporary));
// add lints here, do not remove this comment, it's used in `new_lint`
}

Expand Down
355 changes: 355 additions & 0 deletions clippy_lints/src/ptr_to_temporary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,355 @@
use clippy_utils::consts::is_promotable;
use clippy_utils::diagnostics::{span_lint_and_note, span_lint_hir_and_then};
use clippy_utils::mir::{location_to_node, StatementOrTerminator};
use rustc_data_structures::fx::FxHashMap;
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_hir::intravisit::FnKind;
use rustc_hir::{Body, BorrowKind, Expr, ExprKind, FnDecl, HirId, ItemKind, OwnerNode};
use rustc_lint::{LateContext, LateLintPass};
use rustc_middle::mir::visit::{MutatingUseContext, PlaceContext, Visitor};
use rustc_middle::mir::{
self, BasicBlock, BasicBlockData, CallSource, Local, Location, Place, PlaceRef, ProjectionElem, Rvalue, SourceInfo,
StatementKind, TerminatorKind,
};
use rustc_session::{declare_lint_pass, declare_tool_lint};
use rustc_span::{sym, Span, Symbol};

declare_clippy_lint! {
/// ### What it does
/// Checks for raw pointers pointing to temporary values that will **not** be promoted to a
/// constant through
/// [constant promotion](https://doc.rust-lang.org/stable/reference/destructors.html#constant-promotion).
///
/// ### Why is this bad?
/// Usage of such a pointer will result in Undefined Behavior, as the pointer will stop
/// pointing to valid stack memory once the temporary is dropped.
///
/// ### Known problems
/// Expects any call to methods named `as_ptr` or `as_mut_ptr` returning a raw pointer to have
/// that raw pointer point to data owned by self. Essentially, it will lint all temporary
/// `as_ptr` calls even if the pointer doesn't point to the temporary.
///
/// ### Example
/// ```rust,ignore
/// fn returning_temp() -> *const i32 {
/// let x = 0;
/// &x as *const i32
/// }
///
/// let px = returning_temp();
/// unsafe { *px }; // ⚠️
/// let pv = vec![].as_ptr();
/// unsafe { *pv }; // ⚠️
/// ```
#[clippy::version = "1.72.0"]
pub PTR_TO_TEMPORARY,
// TODO: Let's make it warn-by-default for now, and change this to deny-by-default once we know
// there are no major FPs
suspicious,
"disallows obtaining raw pointers to temporary values"
}
declare_lint_pass!(PtrToTemporary => [PTR_TO_TEMPORARY]);

impl<'tcx> LateLintPass<'tcx> for PtrToTemporary {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
check_for_returning_raw_ptr(cx, expr);
}

fn check_fn(
&mut self,
cx: &LateContext<'tcx>,
_: FnKind<'_>,
_: &FnDecl<'_>,
_: &Body<'_>,
_: Span,
def_id: LocalDefId,
) {
let mir = cx.tcx.optimized_mir(def_id);

// Collect all local assignments in this body. This is faster than continuously passing over the
// body every time we want to get the assignments.
let mut assignments = LocalAssignmentsVisitor {
results: FxHashMap::default(),
};
assignments.visit_body(mir);

let mut v = DanglingPtrVisitor {
cx,
body: mir,
results: vec![],
local_assignments: assignments.results,
};
v.visit_body(mir);

for (span, hir_id, ident) in v.results {
// TODO: We need to lint on the call in question instead, so lint attributes work fine. I'm not sure
// how to though
span_lint_hir_and_then(
cx,
PTR_TO_TEMPORARY,
hir_id,
span,
&format!("calling `{ident}` on a temporary value"),
|diag| {
diag.note(
"usage of this pointer will cause Undefined Behavior as the temporary will be deallocated at \
the end of the statement, yet the pointer will continue pointing to it, resulting in a \
dangling pointer",
);
},
);
}
}
}

/// Check for returning raw pointers to temporaries that are not promoted to a constant
fn check_for_returning_raw_ptr<'tcx>(cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) -> bool {
// Get the final return statement if this is a return statement, or don't lint
let expr = if let ExprKind::Ret(Some(expr)) = expr.kind {
expr
} else if let OwnerNode::Item(parent) = cx.tcx.hir().owner(cx.tcx.hir().get_parent_item(expr.hir_id))
&& let ItemKind::Fn(_, _, body) = parent.kind
&& let block = cx.tcx.hir().body(body).value
&& let ExprKind::Block(block, _) = block.kind
&& let Some(final_block_expr) = block.expr
&& final_block_expr.hir_id == expr.hir_id
{
expr
} else {
return false;
};
Comment on lines +107 to +120
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be a use for find_all_ret_expressions


if let ExprKind::Cast(cast_expr, _) = expr.kind
&& let ExprKind::AddrOf(BorrowKind::Ref, _, e) = cast_expr.kind
&& !is_promotable(cx, e)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should operate on the MIR here, as after a few reborrows(? stuff like _ = &(*_)) there should always be a const _ if it's promoted. Otherwise, it's temporary. This should be a good starting point.

{
span_lint_and_note(
cx,
PTR_TO_TEMPORARY,
expr.span,
"returning a raw pointer to a temporary value that cannot be promoted to a constant",
None,
"usage of this pointer by callers will cause Undefined Behavior as the temporary will be deallocated at \
the end of the statement, yet the pointer will continue pointing to it, resulting in a dangling pointer",
);

return true;
}

false
}

struct LocalAssignmentsVisitor {
results: FxHashMap<Local, Vec<Location>>,
}

impl Visitor<'_> for LocalAssignmentsVisitor {
fn visit_place(&mut self, place: &Place<'_>, ctxt: PlaceContext, loc: Location) {
if matches!(
ctxt,
PlaceContext::MutatingUse(
MutatingUseContext::Store | MutatingUseContext::Call | MutatingUseContext::Borrow
)
) {
self.results.entry(place.local).or_insert(vec![]).push(loc);
}
}
}

struct DanglingPtrVisitor<'a, 'tcx> {
cx: &'a LateContext<'tcx>,
body: &'tcx mir::Body<'tcx>,
local_assignments: FxHashMap<Local, Vec<Location>>,
results: Vec<(Span, HirId, Symbol)>,
}

impl<'tcx> Visitor<'tcx> for DanglingPtrVisitor<'_, 'tcx> {
fn visit_basic_block_data(&mut self, _: BasicBlock, data: &BasicBlockData<'tcx>) {
let Self {
cx,
body,
local_assignments,
results,
} = self;

if let Some(term) = &data.terminator
&& let TerminatorKind::Call {
func,
args,
destination,
target: Some(target),
call_source: CallSource::Normal,
..
} = &term.kind
&& destination.ty(&body.local_decls, cx.tcx).ty.is_unsafe_ptr()
&& let [recv] = args.as_slice()
&& let Some(recv) = recv.place()
&& let Some((def_id, _)) = func.const_fn_def()
&& let Some(ident) = returns_ptr_to_self(cx, def_id)
&& let Ok(recv) = traverse_up_until_owned(body, local_assignments, recv)
{

check_for_dangling(
body,
*target,
recv,
destination.as_ref(),
term.source_info,
ident,
results,
);
}
}
}

fn check_for_dangling<'tcx>(
body: &mir::Body<'tcx>,
bb: BasicBlock,
mut recv: PlaceRef<'tcx>,
ptr: PlaceRef<'_>,
source_info: SourceInfo,
ident: Symbol,
results: &mut Vec<(Span, HirId, Symbol)>,
) {
let data = &body.basic_blocks[bb];
let mut recv_dead = false;

// If there's a `Drop`, we must include the statements in its target so we don't miss any
// potentially important `StorageDead`s.
let rest = vec![];
let rest = if let Some(term) = &data.terminator && let TerminatorKind::Drop { place, target, .. } = term.kind {
// This indicates a bug in our heuristic. It's normally fine if the `Drop` is present, but
// if it isn't (i.e., no drop glue) then we may have FNs, or worse. Let's catch this early
// if there are upstream MIR changes.
debug_assert_eq!(place.as_ref(), recv, "dropped place is not receiver");
// In release mode, let's prevent a few FPs where `Drop` is present.
recv = place.as_ref();

&body.basic_blocks[target].statements
} else {
&rest
};

for dead_local in data.statements.iter().chain(rest).filter_map(|stmt| {
if let StatementKind::StorageDead(local) = stmt.kind {
return Some(local);
}

None
}) {
match (dead_local == recv.local, dead_local == ptr.local) {
(true, false) => recv_dead = true,
(false, true) if recv_dead => {
results.push((
source_info.span,
body.source_scopes[source_info.scope]
.local_data
.clone()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
.clone()
.as_ref()

.assert_crate_local()
.lint_root,
ident,
));
},
_ => continue,
}
}
}

/// Traverses the MIR backwards until it finds owned data. This can be assumed to be the dropped
/// data in the next `Drop` terminator, if not this indicates a bug in our heuristic.
fn traverse_up_until_owned<'tcx>(
body: &'tcx mir::Body<'tcx>,
local_assignments: &FxHashMap<Local, Vec<Location>>,
start: Place<'tcx>,
) -> Result<PlaceRef<'tcx>, TraverseError> {
traverse_up_until_owned_inner(body, local_assignments, start.as_ref(), 0)
}

fn traverse_up_until_owned_inner<'tcx>(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should use an analysis pass instead, and track where this owned data goes.

cc @Jarcho, do you have any ideas of what "owned" should be defined as? Basically, for Drop types, the place that is dropped. We could just use the place that is dropped for these types but a temporary that doesn't implement Drop won't have this, despite being freed. Checking the order of StorageDeads works, as the temporary cannot be dead before the pointer, but there may be some nuances to it I'm missing.

body: &'tcx mir::Body<'tcx>,
local_assignments: &FxHashMap<Local, Vec<Location>>,
current_place: PlaceRef<'tcx>,
depth: usize,
) -> Result<PlaceRef<'tcx>, TraverseError> {
if depth > 100 {
return Err(TraverseError::MaxDepthReached);
}
let Some(current) = local_assignments.get(&current_place.local) else {
return Err(TraverseError::NoAssignments);
};
if current.is_empty() {
return Err(TraverseError::NoAssignments);
}
let [current] = current.as_slice() else {
return Err(TraverseError::TooManyAssignments);
};
let current = location_to_node(body, *current);
let next = match current {
StatementOrTerminator::Statement(stmt) if let StatementKind::Assign(box (_, rvalue)) = &stmt.kind => {
match rvalue {
Rvalue::Use(op) | Rvalue::Cast(_, op, _) => {
let Some(place) = op.place() else {
return Err(TraverseError::LikelyPromoted);
};
// If there's a field access, this is likely to be accessing `.0` on a
// `Unique`. We need a better heuristic for this though, as this may lead to
// some FPs.
if let Some(place) = place.iter_projections().find_map(|proj| {
if matches!(proj.1, ProjectionElem::Field(_, _)) {
return Some(proj.0);
}
None
}) {
return Ok(place);
}
place.as_ref()
}
Rvalue::Ref(_, _, place) => {
if !place.has_deref() {
return Ok(place.as_ref());
}
place.as_ref()
}
// Give up if we can't determine it's dangling with near 100% accuracy
_ => return Err(TraverseError::InvalidOp),
}
}
StatementOrTerminator::Terminator(term) if let TerminatorKind::Call { args, .. } = &term.kind
&& let [arg] = args.as_slice() =>
{
let Some(place) = arg.place() else {
return Err(TraverseError::LikelyPromoted);
};
place.as_ref()
}
// Give up if we can't determine it's dangling with near 100% accuracy
_ => return Err(TraverseError::InvalidOp),
};

traverse_up_until_owned_inner(body, local_assignments, next, depth + 1)
}

enum TraverseError {
NoAssignments,
TooManyAssignments,
MaxDepthReached,
LikelyPromoted,
InvalidOp,
}

/// Whether the call returns a raw pointer to data owned by self, i.e., `as_ptr` and friends. If so,
/// if it's temporary it will be dangling and we should lint it. Returns the name of the call if so.
fn returns_ptr_to_self(cx: &LateContext<'_>, def_id: DefId) ->Option<Symbol> {
let path = cx.tcx.def_path(def_id).data;

if let [.., last] = &*path
&& let Some(ident) = last.data.get_opt_name()
&& (ident == sym::as_ptr || ident == sym!(as_mut_ptr))
{
return Some(ident);
}

// TODO: More checks here. We want to lint most libstd functions that return a pointer that
// aren't named `as_ptr`.
None
}
Loading