diff --git a/tests/codegen-llvm/autodiff/abi_handling.rs b/tests/codegen-llvm/autodiff/abi_handling.rs index 5c8126898a8d7..a8bc482fc293f 100644 --- a/tests/codegen-llvm/autodiff/abi_handling.rs +++ b/tests/codegen-llvm/autodiff/abi_handling.rs @@ -38,14 +38,14 @@ fn square(x: f32) -> f32 { // CHECK-LABEL: ; abi_handling::df1 // CHECK-NEXT: Function Attrs // debug-NEXT: define internal { float, float } -// debug-SAME: (ptr align 4 %x, ptr align 4 %bx_0) +// debug-SAME: (ptr {{.*}}%x, ptr {{.*}}%bx_0) // release-NEXT: define internal fastcc float // release-SAME: (float %x.0.val, float %x.4.val) // CHECK-LABEL: ; abi_handling::f1 // CHECK-NEXT: Function Attrs // debug-NEXT: define internal float -// debug-SAME: (ptr align 4 %x) +// debug-SAME: (ptr {{.*}}%x) // release-NEXT: define internal fastcc noundef float // release-SAME: (float %x.0.val, float %x.4.val) #[autodiff_forward(df1, Dual, Dual)] @@ -58,7 +58,7 @@ fn f1(x: &[f32; 2]) -> f32 { // CHECK-NEXT: Function Attrs // debug-NEXT: define internal { float, float } // debug-SAME: (ptr %f, float %x, float %dret) -// release-NEXT: define internal fastcc float +// release-NEXT: define internal fastcc noundef float // release-SAME: (float noundef %x) // CHECK-LABEL: ; abi_handling::f2 @@ -77,13 +77,13 @@ fn f2(f: fn(f32) -> f32, x: f32) -> f32 { // CHECK-NEXT: Function Attrs // debug-NEXT: define internal { float, float } // debug-SAME: (ptr align 4 %x, ptr align 4 %bx_0, ptr align 4 %y, ptr align 4 %by_0) -// release-NEXT: define internal fastcc { float, float } +// release-NEXT: define internal fastcc float // release-SAME: (float %x.0.val) // CHECK-LABEL: ; abi_handling::f3 // CHECK-NEXT: Function Attrs // debug-NEXT: define internal float -// debug-SAME: (ptr align 4 %x, ptr align 4 %y) +// debug-SAME: (ptr {{.*}}%x, ptr {{.*}}%y) // release-NEXT: define internal fastcc noundef float // release-SAME: (float %x.0.val) #[autodiff_forward(df3, Dual, Dual, Dual)] @@ -160,7 +160,7 @@ fn f6(i: NestedInput) -> f32 { // CHECK-LABEL: ; abi_handling::f7 // CHECK-NEXT: Function Attrs // debug-NEXT: define internal float -// debug-SAME: (ptr align 4 %x.0, ptr align 4 %x.1) +// debug-SAME: (ptr {{.*}}%x.0, ptr {{.*}}%x.1) // release-NEXT: define internal fastcc noundef float // release-SAME: (float %x.0.0.val, float %x.1.0.val) #[autodiff_forward(df7, Dual, Dual)] diff --git a/tests/codegen-llvm/autodiff/batched.rs b/tests/codegen-llvm/autodiff/batched.rs index 0ff6134bc07d5..5a723ff041839 100644 --- a/tests/codegen-llvm/autodiff/batched.rs +++ b/tests/codegen-llvm/autodiff/batched.rs @@ -1,13 +1,11 @@ //@ compile-flags: -Zautodiff=Enable,NoTT,NoPostopt -C opt-level=3 -Clto=fat //@ no-prefer-dynamic //@ needs-enzyme -// -// In Enzyme, we test against a large range of LLVM versions (5+) and don't have overly many -// breakages. One benefit is that we match the IR generated by Enzyme only after running it -// through LLVM's O3 pipeline, which will remove most of the noise. -// However, our integration test could also be affected by changes in how rustc lowers MIR into -// LLVM-IR, which could cause additional noise and thus breakages. If that's the case, we should -// reduce this test to only match the first lines and the ret instructions. + +// This test combines two features of Enzyme, automatic differentiation and batching. As such, it is +// especially prone to breakages. I reduced it therefore to a minimal check matches argument/return +// types. Based on the original batching author, implementing the batching feature over MLIR instead +// of LLVM should give significantly more reliable performance. #![feature(autodiff)] @@ -22,69 +20,20 @@ fn square(x: &f32) -> f32 { x * x } +// The base ("scalar") case d_square3, without batching. +// CHECK: define internal fastcc float @fwddiffesquare(float %x.0.val, float %"x'.0.val") +// CHECK: %0 = fadd fast float %"x'.0.val", %"x'.0.val" +// CHECK-NEXT: %1 = fmul fast float %0, %x.0.val +// CHECK-NEXT: ret float %1 +// CHECK-NEXT: } + // d_square2 -// CHECK: define internal [4 x float] @fwddiffe4square(ptr noalias noundef readonly align 4 captures(none) dereferenceable(4) %x, [4 x ptr] %"x'") -// CHECK-NEXT: start: -// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0 -// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4 -// CHECK-NEXT: %1 = extractvalue [4 x ptr] %"x'", 1 -// CHECK-NEXT: %"_2'ipl1" = load float, ptr %1, align 4 -// CHECK-NEXT: %2 = extractvalue [4 x ptr] %"x'", 2 -// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4 -// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3 -// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4 -// CHECK-NEXT: %_2 = load float, ptr %x, align 4 -// CHECK-NEXT: %4 = fmul fast float %"_2'ipl", %_2 -// CHECK-NEXT: %5 = fmul fast float %"_2'ipl1", %_2 -// CHECK-NEXT: %6 = fmul fast float %"_2'ipl2", %_2 -// CHECK-NEXT: %7 = fmul fast float %"_2'ipl3", %_2 -// CHECK-NEXT: %8 = fmul fast float %"_2'ipl", %_2 -// CHECK-NEXT: %9 = fmul fast float %"_2'ipl1", %_2 -// CHECK-NEXT: %10 = fmul fast float %"_2'ipl2", %_2 -// CHECK-NEXT: %11 = fmul fast float %"_2'ipl3", %_2 -// CHECK-NEXT: %12 = fadd fast float %4, %8 -// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0 -// CHECK-NEXT: %14 = fadd fast float %5, %9 -// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1 -// CHECK-NEXT: %16 = fadd fast float %6, %10 -// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2 -// CHECK-NEXT: %18 = fadd fast float %7, %11 -// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3 -// CHECK-NEXT: ret [4 x float] %19 +// CHECK: define internal fastcc [4 x float] @fwddiffe4square(float %x.0.val, [4 x ptr] %"x'") +// CHECK: ret [4 x float] // CHECK-NEXT: } -// d_square3, the extra float is the original return value (x * x) -// CHECK: define internal { float, [4 x float] } @fwddiffe4square.1(ptr noalias noundef readonly align 4 captures(none) dereferenceable(4) %x, [4 x ptr] %"x'") -// CHECK-NEXT: start: -// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0 -// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4 -// CHECK-NEXT: %1 = extractvalue [4 x ptr] %"x'", 1 -// CHECK-NEXT: %"_2'ipl1" = load float, ptr %1, align 4 -// CHECK-NEXT: %2 = extractvalue [4 x ptr] %"x'", 2 -// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4 -// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3 -// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4 -// CHECK-NEXT: %_2 = load float, ptr %x, align 4 -// CHECK-NEXT: %_0 = fmul float %_2, %_2 -// CHECK-NEXT: %4 = fmul fast float %"_2'ipl", %_2 -// CHECK-NEXT: %5 = fmul fast float %"_2'ipl1", %_2 -// CHECK-NEXT: %6 = fmul fast float %"_2'ipl2", %_2 -// CHECK-NEXT: %7 = fmul fast float %"_2'ipl3", %_2 -// CHECK-NEXT: %8 = fmul fast float %"_2'ipl", %_2 -// CHECK-NEXT: %9 = fmul fast float %"_2'ipl1", %_2 -// CHECK-NEXT: %10 = fmul fast float %"_2'ipl2", %_2 -// CHECK-NEXT: %11 = fmul fast float %"_2'ipl3", %_2 -// CHECK-NEXT: %12 = fadd fast float %4, %8 -// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0 -// CHECK-NEXT: %14 = fadd fast float %5, %9 -// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1 -// CHECK-NEXT: %16 = fadd fast float %6, %10 -// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2 -// CHECK-NEXT: %18 = fadd fast float %7, %11 -// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3 -// CHECK-NEXT: %20 = insertvalue { float, [4 x float] } undef, float %_0, 0 -// CHECK-NEXT: %21 = insertvalue { float, [4 x float] } %20, [4 x float] %19, 1 -// CHECK-NEXT: ret { float, [4 x float] } %21 +// CHECK: define internal fastcc { float, [4 x float] } @fwddiffe4square.{{.*}}(float %x.0.val, [4 x ptr] %"x'") +// CHECK: ret { float, [4 x float] } // CHECK-NEXT: } fn main() { diff --git a/tests/codegen-llvm/autodiff/generic.rs b/tests/codegen-llvm/autodiff/generic.rs index 6f56460a2b6d1..b31468c91c9c2 100644 --- a/tests/codegen-llvm/autodiff/generic.rs +++ b/tests/codegen-llvm/autodiff/generic.rs @@ -1,6 +1,14 @@ //@ compile-flags: -Zautodiff=Enable -Zautodiff=NoPostopt -C opt-level=3 -Clto=fat //@ no-prefer-dynamic //@ needs-enzyme +//@ revisions: F32 F64 Main + +// Here we verify that the function `square` can be differentiated over f64. +// This is interesting to test, since the user never calls `square` with f64, so on it's own rustc +// would have no reason to monomorphize it that way. However, Enzyme needs the f64 version of +// `square` in order to be able to differentiate it, so we have logic to enforce the +// monomorphization. Here, we test this logic. + #![feature(autodiff)] use std::autodiff::autodiff_reverse; @@ -12,32 +20,37 @@ fn square + Copy>(x: &T) -> T { } // Ensure that `d_square::` code is generated -// -// CHECK: ; generic::square -// CHECK-NEXT: ; Function Attrs: {{.*}} -// CHECK-NEXT: define internal {{.*}} float -// CHECK-NEXT: start: -// CHECK-NOT: ret -// CHECK: fmul float + +// F32-LABEL: ; generic::square:: +// F32-NEXT: ; Function Attrs: {{.*}} +// F32-NEXT: define internal {{.*}} float +// F32-NEXT: start: +// F32-NOT: ret +// F32: fmul float // Ensure that `d_square::` code is generated even if `square::` was never called -// -// CHECK: ; generic::square -// CHECK-NEXT: ; Function Attrs: -// CHECK-NEXT: define internal {{.*}} double -// CHECK-NEXT: start: -// CHECK-NOT: ret -// CHECK: fmul double + +// F64-LABEL: ; generic::d_square:: +// F64-NEXT: ; Function Attrs: {{.*}} +// F64-NEXT: define internal {{.*}} void +// F64-NEXT: start: +// F64-NEXT: {{(tail )?}}call {{(fastcc )?}}void @diffe_{{.*}}(double {{.*}}, ptr {{.*}}) +// F64-NEXT: ret void + +// Main-LABEL: ; generic::main +// Main: ; call generic::square:: +// Main: ; call generic::d_square:: fn main() { let xf32: f32 = std::hint::black_box(3.0); let xf64: f64 = std::hint::black_box(3.0); + let seed: f64 = std::hint::black_box(1.0); let outputf32 = square::(&xf32); assert_eq!(9.0, outputf32); let mut df_dxf64: f64 = std::hint::black_box(0.0); - let output_f64 = d_square::(&xf64, &mut df_dxf64, 1.0); + let output_f64 = d_square::(&xf64, &mut df_dxf64, seed); assert_eq!(6.0, df_dxf64); } diff --git a/tests/codegen-llvm/autodiff/identical_fnc.rs b/tests/codegen-llvm/autodiff/identical_fnc.rs index 1c18e7acc4b63..a8b186c302ea1 100644 --- a/tests/codegen-llvm/autodiff/identical_fnc.rs +++ b/tests/codegen-llvm/autodiff/identical_fnc.rs @@ -8,7 +8,7 @@ // merged placeholder function anymore, and compilation would fail. We prevent this by disabling // LLVM's merge_function pass before AD. Here we implicetely test that our solution keeps working. // We also explicetly test that we keep running merge_function after AD, by checking for two -// identical function calls in the LLVM-IR, while having two different calls in the Rust code. +// identical function calls in the LLVM-IR, despite having two different calls in the Rust code. #![feature(autodiff)] use std::autodiff::autodiff_reverse; @@ -27,14 +27,14 @@ fn square2(x: &f64) -> f64 { // CHECK:; identical_fnc::main // CHECK-NEXT:; Function Attrs: -// CHECK-NEXT:define internal void @_ZN13identical_fnc4main17h6009e4f751bf9407E() +// CHECK-NEXT:define internal void // CHECK-NEXT:start: // CHECK-NOT:br // CHECK-NOT:ret // CHECK:; call identical_fnc::d_square -// CHECK-NEXT:call fastcc void @_ZN13identical_fnc8d_square[[HASH:.+]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx1) +// CHECK-NEXT:call fastcc void @[[HASH:.+]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx1) // CHECK:; call identical_fnc::d_square -// CHECK-NEXT:call fastcc void @_ZN13identical_fnc8d_square[[HASH]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx2) +// CHECK-NEXT:call fastcc void @[[HASH]](double %x.val, ptr noalias noundef align 8 dereferenceable(8) %dx2) fn main() { let x = std::hint::black_box(3.0);