Skip to content

Box::new(expr) first puts expr on the stack, then copies. #50047

Closed
@glandium

Description

@glandium

Consider the following code:

pub fn foo() -> Box<[u8; 4096]> {
    Box::new([0; 4096])
}

(made it big because it's kind of simpler to see the memset and memcpy calls in the resulting asm)

It generates the following assembly:

example::foo:
  push rbx
  mov eax, 4096
  call __rust_probestack
  sub rsp, rax
  mov rdi, rsp
  xor esi, esi
  mov edx, 4096
  call memset@PLT
  mov edi, 4096
  mov esi, 1
  call __rust_alloc@PLT
  mov rbx, rax
  test rbx, rbx
  je .LBB1_1
  mov rsi, rsp
  mov edx, 4096
  mov rdi, rbx
  call memcpy@PLT
  mov rax, rbx
  add rsp, 4096
  pop rbx
  ret
.LBB1_1:
  call <alloc::alloc::Global as core::alloc::GlobalAlloc>::oom
  ud2

which does a memset, alloc, memcpy dance.

I was accepting this as a fact of life, but today, I was looking at a random old version of rustc on godbolt, and it turns out before 1.12, the memset, alloc, memcpy dance wasn't happening:

example::foo:
  push rbx
  mov edi, 4096
  mov esi, 1
  call __rust_allocate@PLT
  mov rbx, rax
  test rbx, rbx
  je .LBB0_2
  xor esi, esi
  mov edx, 4096
  mov rdi, rbx
  call memset@PLT
  mov rax, rbx
  pop rbx
  ret
.LBB0_2:
  call alloc::oom::oom@PLT

https://godbolt.org/g/J3cy5E

The llvm ir back then looks like the following:

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define noalias dereferenceable(4096) [4096 x i8]* @example::foo() unnamed_addr #0 {
entry-block:
  %0 = tail call i8* @__rust_allocate(i64 4096, i64 1) #1, !noalias !0
  %1 = icmp eq i8* %0, null
  br i1 %1, label %then-block-57-.i.i, label %"_ZN5alloc5boxed30_$LT$impl$u20$Box$LT$T$GT$$GT$3new17ha7ffa7dfb1e725d2E.exit"

then-block-57-.i.i: ; preds = %entry-block
  tail call void @alloc::oom::oom(), !noalias !0
  unreachable

"_ZN5alloc5boxed30_$LT$impl$u20$Box$LT$T$GT$$GT$3new17ha7ffa7dfb1e725d2E.exit": ; preds = %entry-block
  %2 = bitcast i8* %0 to [4096 x i8]*
  call void @llvm.memset.p0i8.i64(i8* nonnull %0, i8 0, i64 4096, i32 1, i1 false)
  ret [4096 x i8]* %2
}

declare noalias i8* @__rust_allocate(i64, i64) unnamed_addr #1

declare void @alloc::oom::oom() unnamed_addr #2

declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #3

attributes #0 = { uwtable }
attributes #1 = { nounwind }
attributes #2 = { cold noinline noreturn }
attributes #3 = { argmemonly nounwind }

!0 = !{!1}
!1 = distinct !{!1, !2, !"alloc::boxed::<impl Box<T>>::new: %x"}
!2 = distinct !{!2, !"alloc::boxed::<impl Box<T>>::new"}

while on nightly, it looks like:

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define internal fastcc void @"<alloc::alloc::Global as core::alloc::GlobalAlloc>::oom"() unnamed_addr #0 {
  tail call void @__rust_oom()
  unreachable
}

define noalias align 1 dereferenceable(4096) [4096 x i8]* @example::foo() unnamed_addr #1 {
  %_1 = alloca [4096 x i8], align 1
  %_1.0.sroa_idx2 = getelementptr inbounds [4096 x i8], [4096 x i8]* %_1, i64 0, i64 0
  call void @llvm.lifetime.start.p0i8(i64 4096, i8* nonnull %_1.0.sroa_idx2)
  call void @llvm.memset.p0i8.i64(i8* nonnull %_1.0.sroa_idx2, i8 0, i64 4096, i32 1, i1 false)
  %0 = tail call i8* @__rust_alloc(i64 4096, i64 1) #5, !noalias !0
  %1 = icmp eq i8* %0, null
  br i1 %1, label %bb7.i.i, label %"_ZN35_$LT$alloc..boxed..Box$LT$T$GT$$GT$3new17hbb8214c4d412a6d3E.exit"

bb7.i.i: ; preds = %start
  tail call fastcc void @"<alloc::alloc::Global as core::alloc::GlobalAlloc>::oom"() #5, !noalias !0
  unreachable

"_ZN35_$LT$alloc..boxed..Box$LT$T$GT$$GT$3new17hbb8214c4d412a6d3E.exit": ; preds = %start
  %2 = bitcast i8* %0 to [4096 x i8]*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull %0, i8* nonnull %_1.0.sroa_idx2, i64 4096, i32 1, i1 false) #5
  call void @llvm.lifetime.end.p0i8(i64 4096, i8* nonnull %_1.0.sroa_idx2)
  ret [4096 x i8]* %2
}

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #2

declare void @__rust_oom() unnamed_addr #3

declare noalias i8* @__rust_alloc(i64, i64) unnamed_addr #4

declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #2

declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #2

declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #2

attributes #0 = { inlinehint noreturn nounwind uwtable "probe-stack"="__rust_probestack" }
attributes #1 = { nounwind uwtable "probe-stack"="__rust_probestack" }
attributes #2 = { argmemonly nounwind }
attributes #3 = { cold noreturn nounwind "probe-stack"="__rust_probestack" }
attributes #4 = { nounwind "probe-stack"="__rust_probestack" }
attributes #5 = { nounwind }

!0 = !{!1}
!1 = distinct !{!1, !2, !"<alloc::boxed::Box<T>>::new: %x"}
!2 = distinct !{!2, !"<alloc::boxed::Box<T>>::new"}

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-MIRArea: Mid-level IR (MIR) - https://blog.rust-lang.org/2016/04/19/MIR.htmlA-boxArea: Our favorite opsem complicationC-enhancementCategory: An issue proposing an enhancement or a PR with one.I-slowIssue: Problems and improvements with respect to performance of generated code.WG-llvmWorking group: LLVM backend code generation

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions