Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update README.md #8

Merged
merged 1 commit into from
Feb 23, 2023
Merged

Update README.md #8

merged 1 commit into from
Feb 23, 2023

Conversation

vtjnash
Copy link

@vtjnash vtjnash commented Feb 23, 2023

Some updates, though I am still not sure it is complete, as I get that it can't find autodiff:

$ cargo +enzyme run --release --example sin -Z unstable-options --config 'lto="fat"' --verbose
   Compiling adRust v0.1.0 (/data/vtjnash/rust-ml-autodiff-examples)
     Running `rustc --crate-name sin --edition=2021 examples/sin.rs --error-format=json --json=diagnostic-rendered-ansi,artifacts,future-incompat --diagnostic-width=196 --crate-type bin --emit=dep-info,link -C opt-level=3 -C lto=fat -C metadata=64de30f76d150efa -C extra-filename=-64de30f76d150efa --out-dir /data/vtjnash/rust-ml-autodiff-examples/target/release/examples -L dependency=/data/vtjnash/rust-ml-autodiff-examples/target/release/deps`
error: cannot find attribute `autodiff` in this scope
 --> examples/sin.rs:1:3
  |
1 | #[autodiff()]
  |   ^^^^^^^^

error: cannot find attribute `autodiff` in this scope
 --> examples/sin.rs:5:3
  |
5 | #[autodiff(mode = "reverse", Active, Active)]
  |   ^^^^^^^^

error: could not compile `adRust` due to 2 previous errors

Caused by:
  process didn't exit successfully: `rustc --crate-name sin --edition=2021 examples/sin.rs --error-format=json --json=diagnostic-rendered-ansi,artifacts,future-incompat --diagnostic-width=196 --crate-type bin --emit=dep-info,link -C opt-level=3 -C lto=fat -C metadata=64de30f76d150efa -C extra-filename=-64de30f76d150efa --out-dir /data/vtjnash/rust-ml-autodiff-examples/target/release/examples -L dependency=/data/vtjnash/rust-ml-autodiff-examples/target/release/deps` (exit status: 1)

@ZuseZ4 ZuseZ4 merged commit 366cb56 into EnzymeAD:master Feb 23, 2023
@ZuseZ4
Copy link
Member

ZuseZ4 commented Feb 23, 2023

Thanks!
Yes, we do need to update the examples since the macro changed quite a few times.

@vtjnash vtjnash deleted the patch-1 branch February 23, 2023 16:30
@vtjnash
Copy link
Author

vtjnash commented Feb 23, 2023

Great. Maybe we can talk today, since I tried a few different things, and seemed like it was almost working, but I was making rather random changes.

@ZuseZ4
Copy link
Member

ZuseZ4 commented Feb 23, 2023

Yes definitely! We should also have the fixed examples locally, we can sit together later and upload them.

@ZuseZ4
Copy link
Member

ZuseZ4 commented Feb 23, 2023

Examples are fixed, but if you have more examples which you want to test, we are always looking for new testcases.

ZuseZ4 pushed a commit that referenced this pull request Oct 27, 2024
…raheemdev

Optimize `Box::default` and `Arc::default` to construct more types in place

Both the `Arc` and `Box` `Default` impls currently call `T::default()` before allocating, and then moving the resulting `T` into the allocation.

Most `Default` impls are trivial, which should in theory allow
LLVM to construct `T: Default` directly in the `Box` allocation when calling
`<Box<T>>::default()`.

However, the allocation may fail, which necessitates calling `T`'s destructor if it has one.
If the destructor is non-trivial, then LLVM has a hard time proving that it's
sound to elide, which makes it construct `T` on the stack first, and then copy it into the allocation.

Change both of these impls to allocate first, and then call `T::default` into the uninitialized allocation, so that LLVM doesn't have to prove that it's sound to elide the destructor/initial stack copy.

For example, given the following Rust code:

```rust
#[derive(Default, Clone)]
struct Foo {
    x: Vec<u8>,
    z: String,
    y: Vec<u8>,
}

#[no_mangle]
pub fn src() -> Box<Foo> {
    Box::default()
}
```

<details open>
<summary>Before this PR:</summary>

```llvm
`@__rust_no_alloc_shim_is_unstable` = external global i8

; drop_in_place() generated in case the allocation fails

; core::ptr::drop_in_place<playground::Foo>
; Function Attrs: nounwind nonlazybind uwtable
define internal fastcc void `@"_ZN4core3ptr36drop_in_place$LT$playground..Foo$GT$17hff376aece491233bE"(ptr` noalias nocapture noundef readonly align 8 dereferenceable(72) %_1) unnamed_addr #0 personality ptr `@rust_eh_personality` {
start:
  %_1.val = load i64, ptr %_1, align 8
  %0 = icmp eq i64 %_1.val, 0
  br i1 %0, label %bb6, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i"

"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i": ; preds = %start
  %1 = getelementptr inbounds i8, ptr %_1, i64 8
  %_1.val6 = load ptr, ptr %1, align 8, !nonnull !3, !noundef !3
  tail call void `@__rust_dealloc(ptr` noundef nonnull %_1.val6, i64 noundef %_1.val, i64 noundef 1) #8
  br label %bb6

bb6:                                              ; preds = %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i", %start
  %2 = getelementptr inbounds i8, ptr %_1, i64 24
  %.val9 = load i64, ptr %2, align 8
  %3 = icmp eq i64 %.val9, 0
  br i1 %3, label %bb5, label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11"

"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11": ; preds = %bb6
  %4 = getelementptr inbounds i8, ptr %_1, i64 32
  %.val10 = load ptr, ptr %4, align 8, !nonnull !3, !noundef !3
  tail call void `@__rust_dealloc(ptr` noundef nonnull %.val10, i64 noundef %.val9, i64 noundef 1) #8
  br label %bb5

bb5:                                              ; preds = %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i.i11", %bb6
  %5 = getelementptr inbounds i8, ptr %_1, i64 48
  %.val4 = load i64, ptr %5, align 8
  %6 = icmp eq i64 %.val4, 0
  br i1 %6, label %"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16", label %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15"

"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15": ; preds = %bb5
  %7 = getelementptr inbounds i8, ptr %_1, i64 56
  %.val5 = load ptr, ptr %7, align 8, !nonnull !3, !noundef !3
  tail call void `@__rust_dealloc(ptr` noundef nonnull %.val5, i64 noundef %.val4, i64 noundef 1) #8
  br label %"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16"

"_ZN4core3ptr46drop_in_place$LT$alloc..vec..Vec$LT$u8$GT$$GT$17hb5ca95423e113cf7E.exit16": ; preds = %bb5, %"_ZN63_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$10deallocate17heaa87468709346b1E.exit.i.i.i4.i15"
  ret void
}

; Function Attrs: nonlazybind uwtable
define noalias noundef nonnull align 8 ptr `@src()` unnamed_addr #1 personality ptr `@rust_eh_personality` {
start:

; alloca to place `Foo` in.
  %_1 = alloca [72 x i8], align 8
  call void `@llvm.lifetime.start.p0(i64` 72, ptr nonnull %_1)
  store i64 0, ptr %_1, align 8
  %_2.sroa.4.0._1.sroa_idx = getelementptr inbounds i8, ptr %_1, i64 8
  store ptr inttoptr (i64 1 to ptr), ptr %_2.sroa.4.0._1.sroa_idx, align 8
  %_2.sroa.5.0._1.sroa_idx = getelementptr inbounds i8, ptr %_1, i64 16
  %_3.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 32
  call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_2.sroa.5.0._1.sroa_idx, i8 0, i64 16, i1 false)
  store ptr inttoptr (i64 1 to ptr), ptr %_3.sroa.4.0..sroa_idx, align 8
  %_3.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 40
  %_4.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 56
  call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_3.sroa.5.0..sroa_idx, i8 0, i64 16, i1 false)
  store ptr inttoptr (i64 1 to ptr), ptr %_4.sroa.4.0..sroa_idx, align 8
  %_4.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %_1, i64 64
  store i64 0, ptr %_4.sroa.5.0..sroa_idx, align 8
  %0 = load volatile i8, ptr `@__rust_no_alloc_shim_is_unstable,` align 1, !noalias !4
  %_0.i.i.i = tail call noalias noundef align 8 dereferenceable_or_null(72) ptr `@__rust_alloc(i64` noundef 72, i64 noundef 8) #8, !noalias !4
  %1 = icmp eq ptr %_0.i.i.i, null
  br i1 %1, label %bb2.i, label %"_ZN5alloc5boxed12Box$LT$T$GT$3new17h0864de14f863a27aE.exit"

bb2.i:                                            ; preds = %start
; invoke alloc::alloc::handle_alloc_error
  invoke void `@_ZN5alloc5alloc18handle_alloc_error17h98142d0d8d74161bE(i64` noundef 8, i64 noundef 72) #9
          to label %.noexc unwind label %cleanup.i

.noexc:                                           ; preds = %bb2.i
  unreachable

cleanup.i:                                        ; preds = %bb2.i
  %2 = landingpad { ptr, i32 }
          cleanup
; call core::ptr::drop_in_place<playground::Foo>
  call fastcc void `@"_ZN4core3ptr36drop_in_place$LT$playground..Foo$GT$17hff376aece491233bE"(ptr` noalias noundef nonnull align 8 dereferenceable(72) %_1) #10
  resume { ptr, i32 } %2

"_ZN5alloc5boxed12Box$LT$T$GT$3new17h0864de14f863a27aE.exit": ; preds = %start

; Copy from stack to heap if allocation is successful
  call void `@llvm.memcpy.p0.p0.i64(ptr` noundef nonnull align 8 dereferenceable(72) %_0.i.i.i, ptr noundef nonnull align 8 dereferenceable(72) %_1, i64 72, i1 false)
  call void `@llvm.lifetime.end.p0(i64` 72, ptr nonnull %_1)
  ret ptr %_0.i.i.i
}

```
</details>

<details>
<summary>After this PR</summary>

```llvm
; Notice how there's no `drop_in_place()` generated as well

define noalias noundef nonnull align 8 ptr `@src()` unnamed_addr #0 personality ptr `@rust_eh_personality` {
start:
; no stack allocation

  %0 = load volatile i8, ptr `@__rust_no_alloc_shim_is_unstable,` align 1
  %_0.i.i.i.i.i = tail call noalias noundef align 8 dereferenceable_or_null(72) ptr `@__rust_alloc(i64` noundef 72, i64 noundef 8) #5
  %1 = icmp eq ptr %_0.i.i.i.i.i, null
  br i1 %1, label %bb3.i, label %"_ZN5alloc5boxed16Box$LT$T$C$A$GT$13new_uninit_in17h80d6355ef4b73ea3E.exit"

bb3.i:                                            ; preds = %start
; call alloc::alloc::handle_alloc_error
  tail call void `@_ZN5alloc5alloc18handle_alloc_error17h98142d0d8d74161bE(i64` noundef 8, i64 noundef 72) #6
  unreachable

"_ZN5alloc5boxed16Box$LT$T$C$A$GT$13new_uninit_in17h80d6355ef4b73ea3E.exit": ; preds = %start
; construct `Foo` directly into the allocation if successful

  store i64 0, ptr %_0.i.i.i.i.i, align 8
  %_8.sroa.4.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 8
  store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.4.0._1.sroa_idx, align 8
  %_8.sroa.5.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 16
  %_8.sroa.7.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 32
  tail call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_8.sroa.5.0._1.sroa_idx, i8 0, i64 16, i1 false)
  store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.7.0._1.sroa_idx, align 8
  %_8.sroa.8.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 40
  %_8.sroa.10.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 56
  tail call void `@llvm.memset.p0.i64(ptr` noundef nonnull align 8 dereferenceable(16) %_8.sroa.8.0._1.sroa_idx, i8 0, i64 16, i1 false)
  store ptr inttoptr (i64 1 to ptr), ptr %_8.sroa.10.0._1.sroa_idx, align 8
  %_8.sroa.11.0._1.sroa_idx = getelementptr inbounds i8, ptr %_0.i.i.i.i.i, i64 64
  store i64 0, ptr %_8.sroa.11.0._1.sroa_idx, align 8
  ret ptr %_0.i.i.i.i.i
}
```

</details>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants