Skip to content

Commit 8c2c9a9

Browse files
committed
Auto merge of #130401 - matthiaskrgr:rollup-fri2j58, r=matthiaskrgr
Rollup of 5 pull requests Successful merges: - #129439 (Implement feature `string_from_utf8_lossy_owned` for lossy conversion from `Vec<u8>` to `String` methods) - #129828 (miri: treat non-memory local variables properly for data race detection) - #130110 (make dist vendoring configurable) - #130293 (Fix lint levels not getting overridden by attrs on `Stmt` nodes) - #130342 (interpret, miri: fix dealing with overflow during slice indexing and allocation) Failed merges: - #130394 (const: don't ICE when encountering a mutable ref to immutable memory) r? `@ghost` `@rustbot` modify labels: rollup
2 parents dde7d66 + 96195a5 commit 8c2c9a9

37 files changed

+805
-183
lines changed

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
216216
self.copy_intrinsic(&args[0], &args[1], &args[2], /*nonoverlapping*/ false)?;
217217
}
218218
sym::write_bytes => {
219-
self.write_bytes_intrinsic(&args[0], &args[1], &args[2])?;
219+
self.write_bytes_intrinsic(&args[0], &args[1], &args[2], "write_bytes")?;
220220
}
221221
sym::compare_bytes => {
222222
let result = self.compare_bytes_intrinsic(&args[0], &args[1], &args[2])?;
@@ -599,9 +599,8 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
599599
let count = self.read_target_usize(count)?;
600600
let layout = self.layout_of(src.layout.ty.builtin_deref(true).unwrap())?;
601601
let (size, align) = (layout.size, layout.align.abi);
602-
// `checked_mul` enforces a too small bound (the correct one would probably be target_isize_max),
603-
// but no actual allocation can be big enough for the difference to be noticeable.
604-
let size = size.checked_mul(count, self).ok_or_else(|| {
602+
603+
let size = self.compute_size_in_bytes(size, count).ok_or_else(|| {
605604
err_ub_custom!(
606605
fluent::const_eval_size_overflow,
607606
name = if nonoverlapping { "copy_nonoverlapping" } else { "copy" }
@@ -635,11 +634,12 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
635634
Ok(())
636635
}
637636

638-
pub(crate) fn write_bytes_intrinsic(
637+
pub fn write_bytes_intrinsic(
639638
&mut self,
640639
dst: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
641640
byte: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
642641
count: &OpTy<'tcx, <M as Machine<'tcx>>::Provenance>,
642+
name: &'static str,
643643
) -> InterpResult<'tcx> {
644644
let layout = self.layout_of(dst.layout.ty.builtin_deref(true).unwrap())?;
645645

@@ -649,9 +649,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
649649

650650
// `checked_mul` enforces a too small bound (the correct one would probably be target_isize_max),
651651
// but no actual allocation can be big enough for the difference to be noticeable.
652-
let len = layout.size.checked_mul(count, self).ok_or_else(|| {
653-
err_ub_custom!(fluent::const_eval_size_overflow, name = "write_bytes")
654-
})?;
652+
let len = self
653+
.compute_size_in_bytes(layout.size, count)
654+
.ok_or_else(|| err_ub_custom!(fluent::const_eval_size_overflow, name = name))?;
655655

656656
let bytes = std::iter::repeat(byte).take(len.bytes_usize());
657657
self.write_bytes_ptr(dst, bytes)

compiler/rustc_const_eval/src/interpret/machine.rs

+20-1
Original file line numberDiff line numberDiff line change
@@ -540,10 +540,29 @@ pub trait Machine<'tcx>: Sized {
540540
Ok(ReturnAction::Normal)
541541
}
542542

543+
/// Called immediately after an "immediate" local variable is read
544+
/// (i.e., this is called for reads that do not end up accessing addressable memory).
545+
#[inline(always)]
546+
fn after_local_read(_ecx: &InterpCx<'tcx, Self>, _local: mir::Local) -> InterpResult<'tcx> {
547+
Ok(())
548+
}
549+
550+
/// Called immediately after an "immediate" local variable is assigned a new value
551+
/// (i.e., this is called for writes that do not end up in memory).
552+
/// `storage_live` indicates whether this is the initial write upon `StorageLive`.
553+
#[inline(always)]
554+
fn after_local_write(
555+
_ecx: &mut InterpCx<'tcx, Self>,
556+
_local: mir::Local,
557+
_storage_live: bool,
558+
) -> InterpResult<'tcx> {
559+
Ok(())
560+
}
561+
543562
/// Called immediately after actual memory was allocated for a local
544563
/// but before the local's stack frame is updated to point to that memory.
545564
#[inline(always)]
546-
fn after_local_allocated(
565+
fn after_local_moved_to_memory(
547566
_ecx: &mut InterpCx<'tcx, Self>,
548567
_local: mir::Local,
549568
_mplace: &MPlaceTy<'tcx, Self::Provenance>,

compiler/rustc_const_eval/src/interpret/memory.rs

+8-3
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
222222
} else {
223223
Allocation::try_uninit(size, align)?
224224
};
225-
self.allocate_raw_ptr(alloc, kind)
225+
self.insert_allocation(alloc, kind)
226226
}
227227

228228
pub fn allocate_bytes_ptr(
@@ -233,14 +233,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
233233
mutability: Mutability,
234234
) -> InterpResult<'tcx, Pointer<M::Provenance>> {
235235
let alloc = Allocation::from_bytes(bytes, align, mutability);
236-
self.allocate_raw_ptr(alloc, kind)
236+
self.insert_allocation(alloc, kind)
237237
}
238238

239-
pub fn allocate_raw_ptr(
239+
pub fn insert_allocation(
240240
&mut self,
241241
alloc: Allocation<M::Provenance, (), M::Bytes>,
242242
kind: MemoryKind<M::MemoryKind>,
243243
) -> InterpResult<'tcx, Pointer<M::Provenance>> {
244+
assert!(alloc.size() <= self.max_size_of_val());
244245
let id = self.tcx.reserve_alloc_id();
245246
debug_assert_ne!(
246247
Some(kind),
@@ -1046,6 +1047,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
10461047
);
10471048
res
10481049
}
1050+
1051+
pub(super) fn validation_in_progress(&self) -> bool {
1052+
self.memory.validation_in_progress
1053+
}
10491054
}
10501055

10511056
#[doc(hidden)]

compiler/rustc_const_eval/src/interpret/operand.rs

+1
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
697697
if matches!(op, Operand::Immediate(_)) {
698698
assert!(!layout.is_unsized());
699699
}
700+
M::after_local_read(self, local)?;
700701
Ok(OpTy { op, layout })
701702
}
702703

compiler/rustc_const_eval/src/interpret/operator.rs

+16-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
use either::Either;
22
use rustc_apfloat::{Float, FloatConvert};
3-
use rustc_middle::mir::interpret::{InterpResult, Scalar};
3+
use rustc_middle::mir::interpret::{InterpResult, PointerArithmetic, Scalar};
44
use rustc_middle::mir::NullOp;
55
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
66
use rustc_middle::ty::{self, FloatTy, ScalarInt, Ty};
77
use rustc_middle::{bug, mir, span_bug};
88
use rustc_span::symbol::sym;
9+
use rustc_target::abi::Size;
910
use tracing::trace;
1011

1112
use super::{throw_ub, ImmTy, InterpCx, Machine, MemPlaceMeta};
@@ -287,6 +288,20 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
287288
})
288289
}
289290

291+
/// Computes the total size of this access, `count * elem_size`,
292+
/// checking for overflow beyond isize::MAX.
293+
pub fn compute_size_in_bytes(&self, elem_size: Size, count: u64) -> Option<Size> {
294+
// `checked_mul` applies `u64` limits independent of the target pointer size... but the
295+
// subsequent check for `max_size_of_val` means we also handle 32bit targets correctly.
296+
// (We cannot use `Size::checked_mul` as that enforces `obj_size_bound` as the limit, which
297+
// would be wrong here.)
298+
elem_size
299+
.bytes()
300+
.checked_mul(count)
301+
.map(Size::from_bytes)
302+
.filter(|&total| total <= self.max_size_of_val())
303+
}
304+
290305
fn binary_ptr_op(
291306
&self,
292307
bin_op: mir::BinOp,

compiler/rustc_const_eval/src/interpret/place.rs

+22-9
Original file line numberDiff line numberDiff line change
@@ -500,15 +500,13 @@ where
500500
&self,
501501
local: mir::Local,
502502
) -> InterpResult<'tcx, PlaceTy<'tcx, M::Provenance>> {
503-
// Other parts of the system rely on `Place::Local` never being unsized.
504-
// So we eagerly check here if this local has an MPlace, and if yes we use it.
505503
let frame = self.frame();
506504
let layout = self.layout_of_local(frame, local, None)?;
507505
let place = if layout.is_sized() {
508506
// We can just always use the `Local` for sized values.
509507
Place::Local { local, offset: None, locals_addr: frame.locals_addr() }
510508
} else {
511-
// Unsized `Local` isn't okay (we cannot store the metadata).
509+
// Other parts of the system rely on `Place::Local` never being unsized.
512510
match frame.locals[local].access()? {
513511
Operand::Immediate(_) => bug!(),
514512
Operand::Indirect(mplace) => Place::Ptr(*mplace),
@@ -562,7 +560,10 @@ where
562560
place: &PlaceTy<'tcx, M::Provenance>,
563561
) -> InterpResult<
564562
'tcx,
565-
Either<MPlaceTy<'tcx, M::Provenance>, (&mut Immediate<M::Provenance>, TyAndLayout<'tcx>)>,
563+
Either<
564+
MPlaceTy<'tcx, M::Provenance>,
565+
(&mut Immediate<M::Provenance>, TyAndLayout<'tcx>, mir::Local),
566+
>,
566567
> {
567568
Ok(match place.to_place().as_mplace_or_local() {
568569
Left(mplace) => Left(mplace),
@@ -581,7 +582,7 @@ where
581582
}
582583
Operand::Immediate(local_val) => {
583584
// The local still has the optimized representation.
584-
Right((local_val, layout))
585+
Right((local_val, layout, local))
585586
}
586587
}
587588
}
@@ -643,9 +644,13 @@ where
643644
assert!(dest.layout().is_sized(), "Cannot write unsized immediate data");
644645

645646
match self.as_mplace_or_mutable_local(&dest.to_place())? {
646-
Right((local_val, local_layout)) => {
647+
Right((local_val, local_layout, local)) => {
647648
// Local can be updated in-place.
648649
*local_val = src;
650+
// Call the machine hook (the data race detector needs to know about this write).
651+
if !self.validation_in_progress() {
652+
M::after_local_write(self, local, /*storage_live*/ false)?;
653+
}
649654
// Double-check that the value we are storing and the local fit to each other.
650655
if cfg!(debug_assertions) {
651656
src.assert_matches_abi(local_layout.abi, self);
@@ -714,8 +719,12 @@ where
714719
dest: &impl Writeable<'tcx, M::Provenance>,
715720
) -> InterpResult<'tcx> {
716721
match self.as_mplace_or_mutable_local(&dest.to_place())? {
717-
Right((local_val, _local_layout)) => {
722+
Right((local_val, _local_layout, local)) => {
718723
*local_val = Immediate::Uninit;
724+
// Call the machine hook (the data race detector needs to know about this write).
725+
if !self.validation_in_progress() {
726+
M::after_local_write(self, local, /*storage_live*/ false)?;
727+
}
719728
}
720729
Left(mplace) => {
721730
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
@@ -734,8 +743,12 @@ where
734743
dest: &impl Writeable<'tcx, M::Provenance>,
735744
) -> InterpResult<'tcx> {
736745
match self.as_mplace_or_mutable_local(&dest.to_place())? {
737-
Right((local_val, _local_layout)) => {
746+
Right((local_val, _local_layout, local)) => {
738747
local_val.clear_provenance()?;
748+
// Call the machine hook (the data race detector needs to know about this write).
749+
if !self.validation_in_progress() {
750+
M::after_local_write(self, local, /*storage_live*/ false)?;
751+
}
739752
}
740753
Left(mplace) => {
741754
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
@@ -941,7 +954,7 @@ where
941954
mplace.mplace,
942955
)?;
943956
}
944-
M::after_local_allocated(self, local, &mplace)?;
957+
M::after_local_moved_to_memory(self, local, &mplace)?;
945958
// Now we can call `access_mut` again, asserting it goes well, and actually
946959
// overwrite things. This points to the entire allocation, not just the part
947960
// the place refers to, i.e. we do this before we apply `offset`.

compiler/rustc_const_eval/src/interpret/projection.rs

+6-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use rustc_target::abi::{self, Size, VariantIdx};
1717
use tracing::{debug, instrument};
1818

1919
use super::{
20-
throw_ub, throw_unsup, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy,
20+
err_ub, throw_ub, throw_unsup, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy,
2121
Provenance, Scalar,
2222
};
2323

@@ -229,7 +229,11 @@ where
229229
// This can only be reached in ConstProp and non-rustc-MIR.
230230
throw_ub!(BoundsCheckFailed { len, index });
231231
}
232-
let offset = stride * index; // `Size` multiplication
232+
// With raw slices, `len` can be so big that this *can* overflow.
233+
let offset = self
234+
.compute_size_in_bytes(stride, index)
235+
.ok_or_else(|| err_ub!(PointerArithOverflow))?;
236+
233237
// All fields have the same layout.
234238
let field_layout = base.layout().field(self, 0);
235239
(offset, field_layout)

compiler/rustc_const_eval/src/interpret/stack.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -534,8 +534,11 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
534534
let dest_place = self.allocate_dyn(layout, MemoryKind::Stack, meta)?;
535535
Operand::Indirect(*dest_place.mplace())
536536
} else {
537-
assert!(!meta.has_meta()); // we're dropping the metadata
538537
// Just make this an efficient immediate.
538+
assert!(!meta.has_meta()); // we're dropping the metadata
539+
// Make sure the machine knows this "write" is happening. (This is important so that
540+
// races involving local variable allocation can be detected by Miri.)
541+
M::after_local_write(self, local, /*storage_live*/ true)?;
539542
// Note that not calling `layout_of` here does have one real consequence:
540543
// if the type is too big, we'll only notice this when the local is actually initialized,
541544
// which is a bit too late -- we should ideally notice this already here, when the memory

compiler/rustc_lint/src/levels.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -255,11 +255,9 @@ impl<'tcx> Visitor<'tcx> for LintLevelsBuilder<'_, LintLevelQueryMap<'tcx>> {
255255
intravisit::walk_foreign_item(self, it);
256256
}
257257

258-
fn visit_stmt(&mut self, e: &'tcx hir::Stmt<'tcx>) {
259-
// We will call `add_id` when we walk
260-
// the `StmtKind`. The outer statement itself doesn't
261-
// define the lint levels.
262-
intravisit::walk_stmt(self, e);
258+
fn visit_stmt(&mut self, s: &'tcx hir::Stmt<'tcx>) {
259+
self.add_id(s.hir_id);
260+
intravisit::walk_stmt(self, s);
263261
}
264262

265263
fn visit_expr(&mut self, e: &'tcx hir::Expr<'tcx>) {

config.example.toml

+3
Original file line numberDiff line numberDiff line change
@@ -942,3 +942,6 @@
942942
# Copy the linker, DLLs, and various libraries from MinGW into the Rust toolchain.
943943
# Only applies when the host or target is pc-windows-gnu.
944944
#include-mingw-linker = true
945+
946+
# Whether to vendor dependencies for the dist tarball.
947+
#vendor = if "is a tarball source" || "is a git repository" { true } else { false }

library/alloc/src/string.rs

+74
Original file line numberDiff line numberDiff line change
@@ -660,6 +660,56 @@ impl String {
660660
Cow::Owned(res)
661661
}
662662

663+
/// Converts a [`Vec<u8>`] to a `String`, substituting invalid UTF-8
664+
/// sequences with replacement characters.
665+
///
666+
/// See [`from_utf8_lossy`] for more details.
667+
///
668+
/// [`from_utf8_lossy`]: String::from_utf8_lossy
669+
///
670+
/// Note that this function does not guarantee reuse of the original `Vec`
671+
/// allocation.
672+
///
673+
/// # Examples
674+
///
675+
/// Basic usage:
676+
///
677+
/// ```
678+
/// #![feature(string_from_utf8_lossy_owned)]
679+
/// // some bytes, in a vector
680+
/// let sparkle_heart = vec![240, 159, 146, 150];
681+
///
682+
/// let sparkle_heart = String::from_utf8_lossy_owned(sparkle_heart);
683+
///
684+
/// assert_eq!(String::from("💖"), sparkle_heart);
685+
/// ```
686+
///
687+
/// Incorrect bytes:
688+
///
689+
/// ```
690+
/// #![feature(string_from_utf8_lossy_owned)]
691+
/// // some invalid bytes
692+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
693+
/// let output = String::from_utf8_lossy_owned(input);
694+
///
695+
/// assert_eq!(String::from("Hello �World"), output);
696+
/// ```
697+
#[must_use]
698+
#[cfg(not(no_global_oom_handling))]
699+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
700+
pub fn from_utf8_lossy_owned(v: Vec<u8>) -> String {
701+
if let Cow::Owned(string) = String::from_utf8_lossy(&v) {
702+
string
703+
} else {
704+
// SAFETY: `String::from_utf8_lossy`'s contract ensures that if
705+
// it returns a `Cow::Borrowed`, it is a valid UTF-8 string.
706+
// Otherwise, it returns a new allocation of an owned `String`, with
707+
// replacement characters for invalid sequences, which is returned
708+
// above.
709+
unsafe { String::from_utf8_unchecked(v) }
710+
}
711+
}
712+
663713
/// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`]
664714
/// if `v` contains any invalid data.
665715
///
@@ -2010,6 +2060,30 @@ impl FromUtf8Error {
20102060
&self.bytes[..]
20112061
}
20122062

2063+
/// Converts the bytes into a `String` lossily, substituting invalid UTF-8
2064+
/// sequences with replacement characters.
2065+
///
2066+
/// See [`String::from_utf8_lossy`] for more details on replacement of
2067+
/// invalid sequences, and [`String::from_utf8_lossy_owned`] for the
2068+
/// `String` function which corresponds to this function.
2069+
///
2070+
/// # Examples
2071+
///
2072+
/// ```
2073+
/// #![feature(string_from_utf8_lossy_owned)]
2074+
/// // some invalid bytes
2075+
/// let input: Vec<u8> = b"Hello \xF0\x90\x80World".into();
2076+
/// let output = String::from_utf8(input).unwrap_or_else(|e| e.into_utf8_lossy());
2077+
///
2078+
/// assert_eq!(String::from("Hello �World"), output);
2079+
/// ```
2080+
#[must_use]
2081+
#[cfg(not(no_global_oom_handling))]
2082+
#[unstable(feature = "string_from_utf8_lossy_owned", issue = "129436")]
2083+
pub fn into_utf8_lossy(self) -> String {
2084+
String::from_utf8_lossy_owned(self.bytes)
2085+
}
2086+
20132087
/// Returns the bytes that were attempted to convert to a `String`.
20142088
///
20152089
/// This method is carefully constructed to avoid allocation. It will

0 commit comments

Comments
 (0)