Skip to content

Commit 250a636

Browse files
committed
Avoid copying some undef memory in MIR
During MIR interpretation it may happen that a place containing uninitialized bytes is copied. This would read the current representation of these bytes and write it to the destination even though they must, by definition, not matter to the execution. This elides that representation change when no bytes are defined in such a copy, saving some cpu cycles. In such a case, the memory of the target allocation is not touched at all which also means that sometimes no physical page backing the memory allocation of the representation needs to be provided by the OS at all, reducing memory pressure on the system.
1 parent bd93b77 commit 250a636

File tree

2 files changed

+36
-24
lines changed

2 files changed

+36
-24
lines changed

src/librustc/mir/interpret/allocation.rs

+8
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,14 @@ pub struct AllocationDefinedness {
594594
ranges: smallvec::SmallVec<[u64; 1]>,
595595
}
596596

597+
impl AllocationDefinedness {
598+
pub fn all_bytes_undef(&self) -> bool {
599+
// The `ranges` are run-length encoded and of alternating definedness.
600+
// So if `ranges.len() > 1` then the second block is a range of defined.
601+
self.initial == false && self.ranges.len() == 1
602+
}
603+
}
604+
597605
/// Transferring the definedness mask to other allocations.
598606
impl<Tag, Extra> Allocation<Tag, Extra> {
599607
/// Creates a run-length encoding of the undef mask.

src/librustc_mir/interpret/memory.rs

+28-24
Original file line numberDiff line numberDiff line change
@@ -841,6 +841,9 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
841841

842842
let tcx = self.tcx.tcx;
843843

844+
// The bits have to be saved locally before writing to dest in case src and dest overlap.
845+
assert_eq!(size.bytes() as usize as u64, size.bytes());
846+
844847
// This checks relocation edges on the src.
845848
let src_bytes =
846849
self.get_raw(src.alloc_id)?.get_bytes_with_undef_and_ptr(&tcx, src, size)?.as_ptr();
@@ -855,6 +858,22 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
855858

856859
let dest_bytes = dest_bytes.as_mut_ptr();
857860

861+
// Prepare a copy of the undef mask.
862+
let compressed = self.get_raw(src.alloc_id)?.compress_undef_range(src, size);
863+
864+
if compressed.all_bytes_undef() {
865+
// Fast path: If all bytes are `undef` then there is nothing to copy. The target range
866+
// is marked as undef but we otherwise omit changing the byte representation which may
867+
// be arbitrary for undef bytes.
868+
// This also avoids writing to the target bytes so that the backing allocation is never
869+
// touched if the bytes stay undef for the whole interpreter execution. On contemporary
870+
// operating system this can avoid physically allocating the page.
871+
let dest_alloc = self.get_raw_mut(dest.alloc_id)?;
872+
dest_alloc.mark_definedness(dest, size * length, false);
873+
dest_alloc.mark_relocation_range(relocations);
874+
return Ok(());
875+
}
876+
858877
// SAFE: The above indexing would have panicked if there weren't at least `size` bytes
859878
// behind `src` and `dest`. Also, we use the overlapping-safe `ptr::copy` if `src` and
860879
// `dest` could possibly overlap.
@@ -889,38 +908,23 @@ impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
889908
}
890909
}
891910

892-
// copy definedness to the destination
893-
self.copy_undef_mask(src, dest, size, length)?;
911+
// now fill in all the data
912+
self.get_raw_mut(dest.alloc_id)?.mark_compressed_undef_range(
913+
&compressed,
914+
dest,
915+
size,
916+
length,
917+
);
918+
894919
// copy the relocations to the destination
895920
self.get_raw_mut(dest.alloc_id)?.mark_relocation_range(relocations);
896921

897922
Ok(())
898923
}
899924
}
900925

901-
/// Undefined bytes
926+
/// Machine pointer introspection.
902927
impl<'mir, 'tcx, M: Machine<'mir, 'tcx>> Memory<'mir, 'tcx, M> {
903-
// FIXME: Add a fast version for the common, nonoverlapping case
904-
fn copy_undef_mask(
905-
&mut self,
906-
src: Pointer<M::PointerTag>,
907-
dest: Pointer<M::PointerTag>,
908-
size: Size,
909-
repeat: u64,
910-
) -> InterpResult<'tcx> {
911-
// The bits have to be saved locally before writing to dest in case src and dest overlap.
912-
assert_eq!(size.bytes() as usize as u64, size.bytes());
913-
914-
let src_alloc = self.get_raw(src.alloc_id)?;
915-
let compressed = src_alloc.compress_undef_range(src, size);
916-
917-
// now fill in all the data
918-
let dest_allocation = self.get_raw_mut(dest.alloc_id)?;
919-
dest_allocation.mark_compressed_undef_range(&compressed, dest, size, repeat);
920-
921-
Ok(())
922-
}
923-
924928
pub fn force_ptr(
925929
&self,
926930
scalar: Scalar<M::PointerTag>,

0 commit comments

Comments
 (0)