Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IR generation quality litmus test #6923

Closed
thestinger opened this issue Jun 4, 2013 · 4 comments
Closed

IR generation quality litmus test #6923

thestinger opened this issue Jun 4, 2013 · 4 comments
Labels
A-codegen Area: Code generation I-slow Issue: Problems and improvements with respect to performance of generated code.

Comments

@thestinger
Copy link
Contributor

This compiles to the same code post-optimization, so I think it's a good example of how we're turning simple code in very complex IR that LLVM then has to cut down.

Here's a simple C++ program:

#include <cstddef>
#include <utility>

__attribute__((noinline)) void foo(size_t &x, size_t &y) {
    std::swap(x, y);
}

int main() {
    size_t x = 5;
    size_t y = 10;
    foo(x, y);
}

Clang generates this IR at -O0:

; ModuleID = 'foo.cc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @_Z3fooRmS_(i64* %x, i64* %y) uwtable noinline {
  %1 = alloca i64*, align 8
  %2 = alloca i64*, align 8
  store i64* %x, i64** %1, align 8
  store i64* %y, i64** %2, align 8
  %3 = load i64** %1, align 8
  %4 = load i64** %2, align 8
  call void @_ZSt4swapImEvRT_S1_(i64* %3, i64* %4)
  ret void
}

define linkonce_odr void @_ZSt4swapImEvRT_S1_(i64* %__a, i64* %__b) nounwind uwtable inlinehint {
  %1 = alloca i64*, align 8
  %2 = alloca i64*, align 8
  %__tmp = alloca i64, align 8
  store i64* %__a, i64** %1, align 8
  store i64* %__b, i64** %2, align 8
  %3 = load i64** %1, align 8
  %4 = load i64* %3, align 8
  store i64 %4, i64* %__tmp, align 8
  %5 = load i64** %2, align 8
  %6 = load i64* %5, align 8
  %7 = load i64** %1, align 8
  store i64 %6, i64* %7, align 8
  %8 = load i64* %__tmp, align 8
  %9 = load i64** %2, align 8
  store i64 %8, i64* %9, align 8
  ret void
}

define i32 @main() uwtable {
  %x = alloca i64, align 8
  %y = alloca i64, align 8
  store i64 5, i64* %x, align 8
  store i64 10, i64* %y, align 8
  call void @_Z3fooRmS_(i64* %x, i64* %y)
  ret i32 0
}

The same thing in Rust:

use std::util;

#[inline(never)]
fn swap(x: &mut int, y: &mut int) {
    util::swap(x, y);
}

fn main() {
    let mut x = 5;
    let mut y = 10;
    swap(&mut x, &mut y);
}

The horror we generate:

; ModuleID = 'foo.rc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

%tydesc = type { i64, i64, void ({}*, %tydesc**, i8*)*, void ({}*, %tydesc**, i8*)*, void ({}*, %tydesc**, i8*)*, void ({}*, %tydesc**, i8*)*, i8*, i8* }

@_rust_crate_map_toplevel = global { i32, i8*, i64, [2 x i64] } { i32 1, i8* bitcast ({} ({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*)* @_ZN7cleanup10annihilate17_c17425bfbdabf8296_07preE to i8*), i64 ptrtoint ([1 x { i64, i64 }]* @_rust_mod_map to i64), [2 x i64] [i64 ptrtoint (i64* @_rust_crate_map_std_0.7-pre_c3ca5d77d81b46c1 to i64), i64 0] }
@_rust_crate_map_std_0.7-pre_c3ca5d77d81b46c1 = external global i64
@_rust_mod_map = internal global [1 x { i64, i64 }] zeroinitializer
@rust_abi_version = constant i64 1

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0

define internal {} @_ZN4swap17_a71830ca3ed2d65d3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64*, i64*) #1 {
static_allocas:
  %3 = alloca {}
  %4 = alloca i64
  %5 = alloca i64
  %6 = alloca {}
  %7 = alloca i64*
  %8 = alloca i64*
  %9 = alloca i64
  %10 = alloca i64*
  %11 = alloca i64*
  %12 = alloca i64
  %13 = alloca {}
  %14 = alloca i64*
  %15 = alloca i64*
  %16 = alloca i64
  %17 = alloca i64*
  %18 = alloca i64*
  %19 = alloca i64
  %20 = alloca {}
  %21 = alloca i64*
  %22 = alloca i64*
  %23 = alloca i64
  %24 = alloca i64*
  %25 = alloca i64*
  %26 = alloca i64
  %27 = alloca {}
  %28 = alloca i64*
  %29 = alloca i64*
  %30 = alloca i64
  %31 = alloca i64*
  %32 = alloca i64*
  %33 = alloca i64*
  %34 = alloca i64
  %35 = alloca i64*
  %36 = alloca i64*
  %37 = alloca i64
  %38 = alloca i64*
  %39 = alloca i64*
  %40 = alloca i64
  %41 = alloca i64
  %42 = alloca {}
  %43 = alloca i64*
  %44 = alloca i64*
  %45 = alloca i64*
  %46 = alloca i64*
  br label %48

return:                                           ; preds = %179
  %47 = load {}* %42
  ret {} %47

; <label>:48                                      ; preds = %static_allocas
  store i64* %1, i64** %43
  store i64* %2, i64** %44
  br label %49

; <label>:49                                      ; preds = %48
  %50 = load i64** %43
  store i64* %50, i64** %45
  %51 = load i64** %45
  %52 = load i64** %44
  store i64* %52, i64** %46
  %53 = load i64** %46
  %54 = bitcast {}* %27 to i8*
  call void @llvm.lifetime.start(i64 0, i8* %54)
  %55 = bitcast i64** %28 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %55)
  %56 = bitcast i64** %29 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %56)
  %57 = bitcast i64* %30 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %57)
  %58 = bitcast i64** %31 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %58)
  %59 = bitcast i64** %32 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %59)
  %60 = bitcast i64** %33 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %60)
  %61 = bitcast i64* %34 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %61)
  %62 = bitcast i64** %35 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %62)
  %63 = bitcast i64** %36 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %63)
  %64 = bitcast i64* %37 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %64)
  %65 = bitcast i64** %38 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %65)
  %66 = bitcast i64** %39 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %66)
  %67 = bitcast i64* %40 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %67)
  %68 = bitcast i64* %41 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %68)
  store i64* %51, i64** %28
  store i64* %53, i64** %29
  %69 = call i64 @_ZN8unstable10intrinsics11uninit_287516_b650e1ca1ea7f553_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef)
  store i64 %69, i64* %30
  store i64* %30, i64** %31
  %70 = load i64** %31
  store i64* %70, i64** %32
  %71 = load i64** %32
  %72 = load i64** %28
  store i64* %72, i64** %33
  %73 = load i64** %33
  store i64 1, i64* %34
  %74 = load i64* %34
  %75 = bitcast {}* %20 to i8*
  call void @llvm.lifetime.start(i64 0, i8* %75)
  %76 = bitcast i64** %21 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %76)
  %77 = bitcast i64** %22 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %77)
  %78 = bitcast i64* %23 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %78)
  %79 = bitcast i64** %24 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %79)
  %80 = bitcast i64** %25 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %80)
  %81 = bitcast i64* %26 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %81)
  store i64* %71, i64** %21
  store i64* %73, i64** %22
  store i64 %74, i64* %23
  %82 = load i64** %21
  store i64* %82, i64** %24
  %83 = load i64** %24
  %84 = load i64** %22
  store i64* %84, i64** %25
  %85 = load i64** %25
  %86 = load i64* %23
  store i64 %86, i64* %26
  %87 = load i64* %26
  %88 = call {} @_ZN8unstable10intrinsics13memcpy64_288117_9dac8e5426d6e7233_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %83, i64* %85, i64 %87)
  %89 = load {}* %20
  %90 = bitcast {}* %20 to i8*
  call void @llvm.lifetime.end(i64 0, i8* %90)
  %91 = bitcast i64** %21 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %91)
  %92 = bitcast i64** %22 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %92)
  %93 = bitcast i64* %23 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %93)
  %94 = bitcast i64** %24 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %94)
  %95 = bitcast i64** %25 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %95)
  %96 = bitcast i64* %26 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %96)
  %97 = load i64** %28
  store i64* %97, i64** %35
  %98 = load i64** %35
  %99 = load i64** %29
  store i64* %99, i64** %36
  %100 = load i64** %36
  store i64 1, i64* %37
  %101 = load i64* %37
  %102 = bitcast {}* %13 to i8*
  call void @llvm.lifetime.start(i64 0, i8* %102)
  %103 = bitcast i64** %14 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %103)
  %104 = bitcast i64** %15 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %104)
  %105 = bitcast i64* %16 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %105)
  %106 = bitcast i64** %17 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %106)
  %107 = bitcast i64** %18 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %107)
  %108 = bitcast i64* %19 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %108)
  store i64* %98, i64** %14
  store i64* %100, i64** %15
  store i64 %101, i64* %16
  %109 = load i64** %14
  store i64* %109, i64** %17
  %110 = load i64** %17
  %111 = load i64** %15
  store i64* %111, i64** %18
  %112 = load i64** %18
  %113 = load i64* %16
  store i64 %113, i64* %19
  %114 = load i64* %19
  %115 = call {} @_ZN8unstable10intrinsics13memcpy64_288117_9dac8e5426d6e7233_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %110, i64* %112, i64 %114)
  %116 = load {}* %13
  %117 = bitcast {}* %13 to i8*
  call void @llvm.lifetime.end(i64 0, i8* %117)
  %118 = bitcast i64** %14 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %118)
  %119 = bitcast i64** %15 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %119)
  %120 = bitcast i64* %16 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %120)
  %121 = bitcast i64** %17 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %121)
  %122 = bitcast i64** %18 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %122)
  %123 = bitcast i64* %19 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %123)
  %124 = load i64** %29
  store i64* %124, i64** %38
  %125 = load i64** %38
  %126 = load i64** %31
  store i64* %126, i64** %39
  %127 = load i64** %39
  store i64 1, i64* %40
  %128 = load i64* %40
  %129 = bitcast {}* %6 to i8*
  call void @llvm.lifetime.start(i64 0, i8* %129)
  %130 = bitcast i64** %7 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %130)
  %131 = bitcast i64** %8 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %131)
  %132 = bitcast i64* %9 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %132)
  %133 = bitcast i64** %10 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %133)
  %134 = bitcast i64** %11 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %134)
  %135 = bitcast i64* %12 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %135)
  store i64* %125, i64** %7
  store i64* %127, i64** %8
  store i64 %128, i64* %9
  %136 = load i64** %7
  store i64* %136, i64** %10
  %137 = load i64** %10
  %138 = load i64** %8
  store i64* %138, i64** %11
  %139 = load i64** %11
  %140 = load i64* %9
  store i64 %140, i64* %12
  %141 = load i64* %12
  %142 = call {} @_ZN8unstable10intrinsics13memcpy64_288117_9dac8e5426d6e7233_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %137, i64* %139, i64 %141)
  %143 = load {}* %6
  %144 = bitcast {}* %6 to i8*
  call void @llvm.lifetime.end(i64 0, i8* %144)
  %145 = bitcast i64** %7 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %145)
  %146 = bitcast i64** %8 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %146)
  %147 = bitcast i64* %9 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %147)
  %148 = bitcast i64** %10 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %148)
  %149 = bitcast i64** %11 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %149)
  %150 = bitcast i64* %12 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %150)
  %151 = load i64* %30
  store i64 %151, i64* %41
  %152 = load i64* %41
  %153 = bitcast {}* %3 to i8*
  call void @llvm.lifetime.start(i64 0, i8* %153)
  %154 = bitcast i64* %4 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %154)
  %155 = bitcast i64* %5 to i8*
  call void @llvm.lifetime.start(i64 8, i8* %155)
  store i64 %152, i64* %4
  %156 = load i64* %4
  store i64 %156, i64* %5
  %157 = load i64* %5
  %158 = call {} @_ZN8unstable10intrinsics11forget_288717_9475c65f257b4c1e3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64 %157)
  %159 = load {}* %3
  %160 = bitcast {}* %3 to i8*
  call void @llvm.lifetime.end(i64 0, i8* %160)
  %161 = bitcast i64* %4 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %161)
  %162 = bitcast i64* %5 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %162)
  %163 = load {}* %27
  %164 = bitcast {}* %27 to i8*
  call void @llvm.lifetime.end(i64 0, i8* %164)
  %165 = bitcast i64** %28 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %165)
  %166 = bitcast i64** %29 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %166)
  %167 = bitcast i64* %30 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %167)
  %168 = bitcast i64** %31 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %168)
  %169 = bitcast i64** %32 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %169)
  %170 = bitcast i64** %33 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %170)
  %171 = bitcast i64* %34 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %171)
  %172 = bitcast i64** %35 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %172)
  %173 = bitcast i64** %36 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %173)
  %174 = bitcast i64* %37 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %174)
  %175 = bitcast i64** %38 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %175)
  %176 = bitcast i64** %39 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %176)
  %177 = bitcast i64* %40 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %177)
  %178 = bitcast i64* %41 to i8*
  call void @llvm.lifetime.end(i64 8, i8* %178)
  br label %179

; <label>:179                                     ; preds = %49
  br label %return
}

define internal i64 @_ZN8unstable10intrinsics11uninit_287516_b650e1ca1ea7f553_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*) {
static_allocas:
  %1 = alloca i64
  br label %3

return:                                           ; preds = %3
  %2 = load i64* %1
  ret i64 %2

; <label>:3                                       ; preds = %static_allocas
  br label %return
}

define internal {} @_ZN8unstable10intrinsics13memcpy64_288117_9dac8e5426d6e7233_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64*, i64*, i64) {
static_allocas:
  %4 = alloca {}
  br label %6

return:                                           ; preds = %6
  %5 = load {}* %4
  ret {} %5

; <label>:6                                       ; preds = %static_allocas
  %7 = bitcast i64* %1 to i8*
  %8 = bitcast i64* %2 to i8*
  %9 = mul i64 8, %3
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 %9, i32 8, i1 false)
  br label %return
}

define internal {} @_ZN8unstable10intrinsics11forget_288717_9475c65f257b4c1e3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64) {
static_allocas:
  %2 = alloca {}
  br label %4

return:                                           ; preds = %4
  %3 = load {}* %2
  ret {} %3

; <label>:4                                       ; preds = %static_allocas
  br label %return
}

define internal {} @_ZN4main17_3ab48cd538faebd53_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*) #2 {
static_allocas:
  %1 = alloca {}
  %2 = alloca i64
  %3 = alloca i64
  %4 = alloca i64*
  %5 = alloca i64*
  br label %7

return:                                           ; preds = %12
  %6 = load {}* %1
  ret {} %6

; <label>:7                                       ; preds = %static_allocas
  store i64 5, i64* %2
  store i64 10, i64* %3
  br label %8

; <label>:8                                       ; preds = %7
  store i64* %2, i64** %4
  %9 = load i64** %4
  store i64* %3, i64** %5
  %10 = load i64** %5
  %11 = call {} @_ZN4swap17_a71830ca3ed2d65d3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %9, i64* %10)
  br label %12

; <label>:12                                      ; preds = %8
  br label %return
}

define {} @_rust_main({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*) {
static_allocas:
  %1 = alloca {}
  br label %3

return:                                           ; preds = %3
  %2 = load {}* %1
  ret {} %2

; <label>:3                                       ; preds = %static_allocas
  %4 = call {} @_ZN4main17_3ab48cd538faebd53_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* %0)
  store {} %4, {}* %1
  br label %return
}

define i64 @main(i64, i8**) {
top:
  %2 = call i64 @_ZN8unstable4lang5start17_80798dc0c0e380dc6_07preE({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* null, i8* bitcast ({} ({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*)* @_rust_main to i8*), i64 %0, i8** %1, i8* bitcast ({ i32, i8*, i64, [2 x i64] }* @_rust_crate_map_toplevel to i8*))
  ret i64 %2
}

declare i64 @_ZN8unstable4lang5start17_80798dc0c0e380dc6_07preE({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i8*, i64, i8**, i8*)

declare {} @_ZN7cleanup10annihilate17_c17425bfbdabf8296_07preE({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*)

declare void @llvm.lifetime.start(i64, i8* nocapture) #0

declare void @llvm.lifetime.end(i64, i8* nocapture) #0

attributes #0 = { nounwind }
attributes #1 = { noinline uwtable }
attributes #2 = { uwtable }
@Aatch
Copy link
Contributor

Aatch commented Jun 4, 2013

Ok, I did a bit more experimenting, and it's not /quite/ as bad as it seems. The same code, with inlining disabled completely, produces this:

define {} @_ZN4swap16_d0923be40fc71903_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64*, i64*) #1 {
static_allocas:
  %3 = alloca {}
  %4 = alloca i64*
  %5 = alloca i64*
  %6 = alloca i64*
  %7 = alloca i64*
  br label %9

return:                                           ; preds = %16
  %8 = load {}* %3
  ret {} %8

; <label>:9                                       ; preds = %static_allocas
  store i64* %1, i64** %4
  store i64* %2, i64** %5
  br label %10

; <label>:10                                      ; preds = %9
  %11 = load i64** %4
  store i64* %11, i64** %6
  %12 = load i64** %6
  %13 = load i64** %5
  store i64* %13, i64** %7
  %14 = load i64** %7
  %15 = call {} @_ZN4util9swap_287416_d0923be40fc71903_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %12, i64* %14)
  br label %16

; <label>:16                                      ; preds = %10
  br label %return
}

define internal {} @_ZN4util9swap_287416_d0923be40fc71903_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64*, i64*) #2 {
static_allocas:
  %3 = alloca {}
  %4 = alloca i64*
  %5 = alloca i64*
  %6 = alloca i64
  %7 = alloca i64*
  %8 = alloca i64*
  %9 = alloca i64*
  %10 = alloca i64
  %11 = alloca i64*
  %12 = alloca i64*
  %13 = alloca i64
  %14 = alloca i64*
  %15 = alloca i64*
  %16 = alloca i64
  %17 = alloca i64
  br label %19

return:                                           ; preds = %53
  %18 = load {}* %3
  ret {} %18

; <label>:19                                      ; preds = %static_allocas
  store i64* %1, i64** %4
  store i64* %2, i64** %5
  br label %20

; <label>:20                                      ; preds = %19
  br label %21

; <label>:21                                      ; preds = %20
  %22 = call i64 @_ZN8unstable10intrinsics11uninit_287617_9d40a93ccd5df7ef3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef)
  store i64 %22, i64* %6
  br label %23

; <label>:23                                      ; preds = %21
  store i64* %6, i64** %7
  br label %24

; <label>:24                                      ; preds = %23
  %25 = load i64** %7
  store i64* %25, i64** %8
  %26 = load i64** %8
  %27 = load i64** %4
  store i64* %27, i64** %9
  %28 = load i64** %9
  store i64 1, i64* %10
  %29 = load i64* %10
  %30 = call {} @_ZN3ptr31copy_nonoverlapping_memory_287917_cf9a2fc5e9ba32573_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %26, i64* %28, i64 %29)
  br label %31

; <label>:31                                      ; preds = %24
  br label %32

; <label>:32                                      ; preds = %31
  %33 = load i64** %4
  store i64* %33, i64** %11
  %34 = load i64** %11
  %35 = load i64** %5
  store i64* %35, i64** %12
  %36 = load i64** %12
  store i64 1, i64* %13
  %37 = load i64* %13
  %38 = call {} @_ZN3ptr31copy_nonoverlapping_memory_287917_cf9a2fc5e9ba32573_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %34, i64* %36, i64 %37)
  br label %39

; <label>:39                                      ; preds = %32
  br label %40

; <label>:40                                      ; preds = %39
  %41 = load i64** %5
  store i64* %41, i64** %14
  %42 = load i64** %14
  %43 = load i64** %7
  store i64* %43, i64** %15
  %44 = load i64** %15
  store i64 1, i64* %16
  %45 = load i64* %16
  %46 = call {} @_ZN3ptr31copy_nonoverlapping_memory_287917_cf9a2fc5e9ba32573_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %42, i64* %44, i64 %45)
  br label %47

; <label>:47                                      ; preds = %40
  br label %48

; <label>:48                                      ; preds = %47
  %49 = load i64* %6
  store i64 %49, i64* %17
  %50 = load i64* %17
  %51 = call {} @_ZN4cast11forget_288516_b11419b9f83337d3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64 %50)
  br label %52

; <label>:52                                      ; preds = %48
  br label %53

; <label>:53                                      ; preds = %52
  br label %return
}

define internal i64 @_ZN8unstable10intrinsics11uninit_287617_9d40a93ccd5df7ef3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*) {
static_allocas:
  %1 = alloca i64
  br label %3

return:                                           ; preds = %3
  %2 = load i64* %1
  ret i64 %2

; <label>:3                                       ; preds = %static_allocas
  br label %return
}

define internal {} @_ZN3ptr31copy_nonoverlapping_memory_287917_cf9a2fc5e9ba32573_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64*, i64*, i64) #2 {
static_allocas:
  %4 = alloca {}
  %5 = alloca i64*
  %6 = alloca i64*
  %7 = alloca i64
  %8 = alloca i64*
  %9 = alloca i64*
  %10 = alloca i64
  br label %12

return:                                           ; preds = %21
  %11 = load {}* %4
  ret {} %11

; <label>:12                                      ; preds = %static_allocas
  store i64* %1, i64** %5
  store i64* %2, i64** %6
  store i64 %3, i64* %7
  br label %13

; <label>:13                                      ; preds = %12
  %14 = load i64** %5
  store i64* %14, i64** %8
  %15 = load i64** %8
  %16 = load i64** %6
  store i64* %16, i64** %9
  %17 = load i64** %9
  %18 = load i64* %7
  store i64 %18, i64* %10
  %19 = load i64* %10
  %20 = call {} @_ZN8unstable10intrinsics13memcpy64_288216_e3b16c8f2d61a2b3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %15, i64* %17, i64 %19)
  br label %21

; <label>:21                                      ; preds = %13
  br label %return
}

define internal {} @_ZN8unstable10intrinsics13memcpy64_288216_e3b16c8f2d61a2b3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64*, i64*, i64) {
static_allocas:
  %4 = alloca {}
  br label %6

return:                                           ; preds = %6
  %5 = load {}* %4
  ret {} %5

; <label>:6                                       ; preds = %static_allocas
  %7 = bitcast i64* %1 to i8*
  %8 = bitcast i64* %2 to i8*
  %9 = mul i64 8, %3
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 %9, i32 8, i1 false)
  br label %return
}

define internal {} @_ZN4cast11forget_288516_b11419b9f83337d3_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64) #2 {
static_allocas:
  %2 = alloca {}
  %3 = alloca i64
  %4 = alloca i64
  br label %6

return:                                           ; preds = %11
  %5 = load {}* %2
  ret {} %5

; <label>:6                                       ; preds = %static_allocas
  store i64 %1, i64* %3
  br label %7

; <label>:7                                       ; preds = %6
  %8 = load i64* %3
  store i64 %8, i64* %4
  %9 = load i64* %4
  %10 = call {} @_ZN8unstable10intrinsics11forget_288816_dd7eac2f264cb903_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64 %9)
  br label %11

; <label>:11                                      ; preds = %7
  br label %return
}

define internal {} @_ZN8unstable10intrinsics11forget_288816_dd7eac2f264cb903_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*, i64) {
static_allocas:
  %2 = alloca {}
  br label %4

return:                                           ; preds = %4
  %3 = load {}* %2
  ret {} %3

; <label>:4                                       ; preds = %static_allocas
  br label %return
}

define internal {} @_ZN4main17_8030a1726da236673_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)*) #1 {
static_allocas:
  %1 = alloca {}
  %2 = alloca i64
  %3 = alloca i64
  %4 = alloca i64*
  %5 = alloca i64*
  br label %7

return:                                           ; preds = %12
  %6 = load {}* %1
  ret {} %6

; <label>:7                                       ; preds = %static_allocas
  store i64 5, i64* %2
  store i64 10, i64* %3
  br label %8

; <label>:8                                       ; preds = %7
  store i64* %2, i64** %4
  %9 = load i64** %4
  store i64* %3, i64** %5
  %10 = load i64** %5
  %11 = call {} @_ZN4swap16_d0923be40fc71903_00E({ i64, %tydesc*, i8*, i8*, i8 } addrspace(1)* undef, i64* %9, i64* %10)
  br label %12

; <label>:12                                      ; preds = %8
  br label %return
}

Which still isn't amazing, but not as bad. Many of the casts and llvm.lifetime.{start,end} calls are generated by the inliner, not us.

@glinscott
Copy link
Contributor

This appears to be much, much better on current master, although there are still calls to copy_nonoverlapping_memory, where it appears just load/store could be done.

define void @"_ZN4swap17_8abdbe47da3a37657_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*, i64*, i64*) #1 {
static_allocas:
  %__arg = alloca i64*
  %__arg1 = alloca i64*
  br label %"function top level"

"function top level":                             ; preds = %static_allocas
  store i64* %1, i64** %__arg
  store i64* %2, i64** %__arg1
  %3 = load i64** %__arg
  %4 = load i64** %__arg1
  call void @"_ZN4util9swap_321717_8abdbe47da3a37657_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64* %3, i64* %4)
  br label %return

return:                                           ; preds = %"function top level"
  ret void
}

define internal void @"_ZN4util9swap_321717_8abdbe47da3a37657_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*, i64*, i64*) #2 {
static_allocas:
  %__arg = alloca i64*
  %__arg1 = alloca i64*
  %tmp = alloca i64
  %t = alloca i64*
  br label %"function top level"

"function top level":                             ; preds = %static_allocas
  store i64* %1, i64** %__arg
  store i64* %2, i64** %__arg1
  %3 = call i64 @"_ZN8unstable10intrinsics11uninit_321917_95a44b74b1a07b1f7_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef)
  store i64 %3, i64* %tmp
  store i64* %tmp, i64** %t
  %4 = load i64** %t
  %5 = load i64** %__arg
  call void @"_ZN3ptr31copy_nonoverlapping_memory_322216_525ce544f73e4927_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64* %4, i64* %5, i64 1)
  %6 = load i64** %__arg
  %7 = load i64** %__arg1
  call void @"_ZN3ptr31copy_nonoverlapping_memory_322216_525ce544f73e4927_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64* %6, i64* %7, i64 1)
  %8 = load i64** %__arg1
  %9 = load i64** %t
  call void @"_ZN3ptr31copy_nonoverlapping_memory_322216_525ce544f73e4927_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64* %8, i64* %9, i64 1)
  %10 = load i64* %tmp
  call void @"_ZN4cast11forget_322817_36feec33e3f151c97_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64 %10)
  br label %return

return:                                           ; preds = %"function top level"
  ret void
}

define internal i64 @"_ZN8unstable10intrinsics11uninit_321917_95a44b74b1a07b1f7_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*) {
static_allocas:
  %__make_return_pointer = alloca i64
  br label %"function top level"

return:                                           ; preds = %"function top level"
  %1 = load i64* %__make_return_pointer
  ret i64 %1

"function top level":                             ; preds = %static_allocas
  br label %return
}

define internal void @"_ZN3ptr31copy_nonoverlapping_memory_322216_525ce544f73e4927_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*, i64*, i64*, i64) #2 {
static_allocas:
  %__arg = alloca i64*
  %__arg1 = alloca i64*
  %__arg2 = alloca i64
  br label %"function top level"

"function top level":                             ; preds = %static_allocas
  store i64* %1, i64** %__arg
  store i64* %2, i64** %__arg1
  store i64 %3, i64* %__arg2
  %4 = load i64** %__arg
  %5 = load i64** %__arg1
  %6 = load i64* %__arg2
  call void @"_ZN8unstable10intrinsics13memcpy64_322517_bdf0af8f1983aa627_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64* %4, i64* %5, i64 %6)
  br label %return

return:                                           ; preds = %"function top level"
  ret void
}

define internal void @"_ZN8unstable10intrinsics13memcpy64_322517_bdf0af8f1983aa627_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*, i64*, i64*, i64) {
static_allocas:
  br label %"function top level"

return:                                           ; preds = %"function top level"
  ret void

"function top level":                             ; preds = %static_allocas
  %4 = bitcast i64* %1 to i8*
  %5 = bitcast i64* %2 to i8*
  %6 = mul i64 8, %3
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 %6, i32 8, i1 false)
  br label %return
}

define internal void @"_ZN4cast11forget_322817_36feec33e3f151c97_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*, i64) #2 {
static_allocas:
  %__arg = alloca i64
  br label %"function top level"

"function top level":                             ; preds = %static_allocas
  store i64 %1, i64* %__arg
  %2 = load i64* %__arg
  call void @"_ZN8unstable10intrinsics11forget_323116_b1abe21e5555db37_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64 %2)
  br label %return

return:                                           ; preds = %"function top level"
  ret void
}

define internal void @"_ZN8unstable10intrinsics11forget_323116_b1abe21e5555db37_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*, i64) {
static_allocas:
  br label %"function top level"

return:                                           ; preds = %"function top level"
  ret void

"function top level":                             ; preds = %static_allocas
  br label %return
}

define void @"_ZN4main16_eeec6a0647358e17_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }*) #3 {
static_allocas:
  %x = alloca i64
  %y = alloca i64
  br label %"function top level"

"function top level":                             ; preds = %static_allocas
  store i64 5, i64* %x
  store i64 10, i64* %y
  call void @"_ZN4swap17_8abdbe47da3a37657_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* undef, i64* %x, i64* %y)
  br label %return

return:                                           ; preds = %"function top level"
  ret void
}

define void @_rust_main({ i64, %tydesc*, i8*, i8*, i8 }*) {
static_allocas:
  br label %"function top level"

return:                                           ; preds = %"function top level"
  ret void

"function top level":                             ; preds = %static_allocas
  call void @"_ZN4main16_eeec6a0647358e17_0$x2e0E"({ i64, %tydesc*, i8*, i8*, i8 }* %0)
  br label %return
}

define i64 @main(i64, i8**) {
top:
  %2 = call i64 @"_ZN8unstable4lang5start17_76d6c774aa357c7a14_0$x2e8$x2dpreE"({ i64, %tydesc*, i8*, i8*, i8 }* null, i8* bitcast (void ({ i64, %tydesc*, i8*, i8*, i8 }*)* @_rust_main to i8*), i64 %0, i8** %1, i8* bitcast ({ i32, i8*, i64, [2 x i64] }* @_rust_crate_map_toplevel to i8*))
  ret i64 %2
}

@pnkfelix
Copy link
Member

pnkfelix commented Sep 4, 2013

visiting for triage, email from 2013-09-02.

This is "just" a work item, so no need for milestone nomination. (At most, one might consider trying to incorporate it into some sort of static code quality regression test based on the size of the generated IR. But I do not think that's necessary.)

Labels look right too; moving on.

@thestinger
Copy link
Contributor Author

Things have improved a lot since I filed this, so I'll close the issue and file some more specific bugs.

flip1995 pushed a commit to flip1995/rust that referenced this issue Apr 7, 2022
Fix unnecessary_cast suggestion for type aliasses

Fix rust-lang#6923. The [`unnecessary_cast`] lint now will skip casting to non-primitive type.

changelog: fix lint [`unnecessary_cast `]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
A-codegen Area: Code generation I-slow Issue: Problems and improvements with respect to performance of generated code.
Projects
None yet
Development

No branches or pull requests

4 participants