Closed
Description
Consider the following code:
pub fn foo() -> Box<[u8; 4096]> {
Box::new([0; 4096])
}
(made it big because it's kind of simpler to see the memset and memcpy calls in the resulting asm)
It generates the following assembly:
example::foo:
push rbx
mov eax, 4096
call __rust_probestack
sub rsp, rax
mov rdi, rsp
xor esi, esi
mov edx, 4096
call memset@PLT
mov edi, 4096
mov esi, 1
call __rust_alloc@PLT
mov rbx, rax
test rbx, rbx
je .LBB1_1
mov rsi, rsp
mov edx, 4096
mov rdi, rbx
call memcpy@PLT
mov rax, rbx
add rsp, 4096
pop rbx
ret
.LBB1_1:
call <alloc::alloc::Global as core::alloc::GlobalAlloc>::oom
ud2
which does a memset, alloc, memcpy dance.
I was accepting this as a fact of life, but today, I was looking at a random old version of rustc on godbolt, and it turns out before 1.12, the memset, alloc, memcpy dance wasn't happening:
example::foo:
push rbx
mov edi, 4096
mov esi, 1
call __rust_allocate@PLT
mov rbx, rax
test rbx, rbx
je .LBB0_2
xor esi, esi
mov edx, 4096
mov rdi, rbx
call memset@PLT
mov rax, rbx
pop rbx
ret
.LBB0_2:
call alloc::oom::oom@PLT
The llvm ir back then looks like the following:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define noalias dereferenceable(4096) [4096 x i8]* @example::foo() unnamed_addr #0 {
entry-block:
%0 = tail call i8* @__rust_allocate(i64 4096, i64 1) #1, !noalias !0
%1 = icmp eq i8* %0, null
br i1 %1, label %then-block-57-.i.i, label %"_ZN5alloc5boxed30_$LT$impl$u20$Box$LT$T$GT$$GT$3new17ha7ffa7dfb1e725d2E.exit"
then-block-57-.i.i: ; preds = %entry-block
tail call void @alloc::oom::oom(), !noalias !0
unreachable
"_ZN5alloc5boxed30_$LT$impl$u20$Box$LT$T$GT$$GT$3new17ha7ffa7dfb1e725d2E.exit": ; preds = %entry-block
%2 = bitcast i8* %0 to [4096 x i8]*
call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 0, i64 4096, i32 1, i1 false)
ret [4096 x i8]* %2
}
declare noalias i8* @__rust_allocate(i64, i64) unnamed_addr #1
declare void @alloc::oom::oom() unnamed_addr #2
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3
attributes #0 = { uwtable }
attributes #1 = { nounwind }
attributes #2 = { cold noinline noreturn }
attributes #3 = { argmemonly nounwind }
!0 = !{!1}
!1 = distinct !{!1, !2, !"alloc::boxed::<impl Box<T>>::new: %x"}
!2 = distinct !{!2, !"alloc::boxed::<impl Box<T>>::new"}
while on nightly, it looks like:
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define internal fastcc void @"<alloc::alloc::Global as core::alloc::GlobalAlloc>::oom"() unnamed_addr #0 {
tail call void @__rust_oom()
unreachable
}
define noalias align 1 dereferenceable(4096) [4096 x i8]* @example::foo() unnamed_addr #1 {
%_1 = alloca [4096 x i8], align 1
%_1.0.sroa_idx2 = getelementptr inbounds [4096 x i8], [4096 x i8]* %_1, i64 0, i64 0
call void @llvm.lifetime.start.p0i8(i64 4096, i8* nonnull %_1.0.sroa_idx2)
call void @llvm.memset.p0i8.i64(i8* nonnull %_1.0.sroa_idx2, i8 0, i64 4096, i32 1, i1 false)
%0 = tail call i8* @__rust_alloc(i64 4096, i64 1) #5, !noalias !0
%1 = icmp eq i8* %0, null
br i1 %1, label %bb7.i.i, label %"_ZN35_$LT$alloc..boxed..Box$LT$T$GT$$GT$3new17hbb8214c4d412a6d3E.exit"
bb7.i.i: ; preds = %start
tail call fastcc void @"<alloc::alloc::Global as core::alloc::GlobalAlloc>::oom"() #5, !noalias !0
unreachable
"_ZN35_$LT$alloc..boxed..Box$LT$T$GT$$GT$3new17hbb8214c4d412a6d3E.exit": ; preds = %start
%2 = bitcast i8* %0 to [4096 x i8]*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %0, i8* nonnull %_1.0.sroa_idx2, i64 4096, i32 1, i1 false) #5
call void @llvm.lifetime.end.p0i8(i64 4096, i8* nonnull %_1.0.sroa_idx2)
ret [4096 x i8]* %2
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #2
declare void @__rust_oom() unnamed_addr #3
declare noalias i8* @__rust_alloc(i64, i64) unnamed_addr #4
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #2
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2
attributes #0 = { inlinehint noreturn nounwind uwtable "probe-stack"="__rust_probestack" }
attributes #1 = { nounwind uwtable "probe-stack"="__rust_probestack" }
attributes #2 = { argmemonly nounwind }
attributes #3 = { cold noreturn nounwind "probe-stack"="__rust_probestack" }
attributes #4 = { nounwind "probe-stack"="__rust_probestack" }
attributes #5 = { nounwind }
!0 = !{!1}
!1 = distinct !{!1, !2, !"<alloc::boxed::Box<T>>::new: %x"}
!2 = distinct !{!2, !"<alloc::boxed::Box<T>>::new"}