From fe90883ef7124f5b9dae69374f9bfb37c9aebb8f Mon Sep 17 00:00:00 2001
From: Manuel Drehwald <git@manuel.drehwald.info>
Date: Fri, 21 Feb 2025 21:45:29 -0500
Subject: [PATCH 1/5] fix build regressions

---
 compiler/rustc_builtin_macros/src/autodiff.rs |  1 +
 src/bootstrap/src/core/build_steps/compile.rs | 24 +++++++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/compiler/rustc_builtin_macros/src/autodiff.rs b/compiler/rustc_builtin_macros/src/autodiff.rs
index 24c54edd705a1..8d7ac6de8010b 100644
--- a/compiler/rustc_builtin_macros/src/autodiff.rs
+++ b/compiler/rustc_builtin_macros/src/autodiff.rs
@@ -242,6 +242,7 @@ mod llvm_enzyme {
             defaultness: ast::Defaultness::Final,
             sig: d_sig,
             generics: Generics::default(),
+            contract: None,
             body: Some(d_body),
         });
         let mut rustc_ad_attr =
diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs
index aa08dd7e42488..8e9b5856096ae 100644
--- a/src/bootstrap/src/core/build_steps/compile.rs
+++ b/src/bootstrap/src/core/build_steps/compile.rs
@@ -1174,9 +1174,15 @@ pub fn rustc_cargo(
     // We want to link against registerEnzyme and in the future we want to use additional
     // functionality from Enzyme core. For that we need to link against Enzyme.
     if builder.config.llvm_enzyme {
-        let llvm_config = builder.llvm_config(builder.config.build).unwrap();
-        let llvm_version_major = llvm::get_llvm_version_major(builder, &llvm_config);
-        cargo.rustflag("-l").rustflag(&format!("Enzyme-{llvm_version_major}"));
+        let arch = builder.build.build;
+        let enzyme_dir = builder.build.out.join(arch).join("enzyme").join("lib");
+        cargo.rustflag("-L").rustflag(enzyme_dir.to_str().expect("Invalid path"));
+
+        if !builder.config.dry_run() {
+            let llvm_config = builder.llvm_config(builder.config.build).unwrap();
+            let llvm_version_major = llvm::get_llvm_version_major(builder, &llvm_config);
+            cargo.rustflag("-l").rustflag(&format!("Enzyme-{llvm_version_major}"));
+        }
     }
 
     // Building with protected visibility reduces the number of dynamic relocations needed, giving
@@ -2028,16 +2034,20 @@ impl Step for Assemble {
         let mut build_compiler = builder.compiler(target_compiler.stage - 1, builder.config.build);
 
         // Build enzyme
-        if builder.config.llvm_enzyme {
+        if builder.config.llvm_enzyme && !builder.config.dry_run() {
             debug!("`llvm_enzyme` requested");
             let enzyme_install = builder.ensure(llvm::Enzyme { target: build_compiler.host });
+            let llvm_config = builder.llvm_config(builder.config.build).unwrap();
+            let llvm_version_major = llvm::get_llvm_version_major(builder, &llvm_config);
             let lib_ext = std::env::consts::DLL_EXTENSION;
-            let src_lib = enzyme_install.join("build/Enzyme/libEnzyme-19").with_extension(lib_ext);
+            let libenzyme = format!("libEnzyme-{llvm_version_major}");
+            let src_lib =
+                enzyme_install.join("build/Enzyme").join(&libenzyme).with_extension(lib_ext);
             let libdir = builder.sysroot_target_libdir(build_compiler, build_compiler.host);
             let target_libdir =
                 builder.sysroot_target_libdir(target_compiler, target_compiler.host);
-            let dst_lib = libdir.join("libEnzyme-19").with_extension(lib_ext);
-            let target_dst_lib = target_libdir.join("libEnzyme-19").with_extension(lib_ext);
+            let dst_lib = libdir.join(&libenzyme).with_extension(lib_ext);
+            let target_dst_lib = target_libdir.join(&libenzyme).with_extension(lib_ext);
             builder.copy_link(&src_lib, &dst_lib);
             builder.copy_link(&src_lib, &target_dst_lib);
         }

From f4e2218b131b82097c720d28c1c2ea10922b0d47 Mon Sep 17 00:00:00 2001
From: Manuel Drehwald <git@manuel.drehwald.info>
Date: Fri, 21 Feb 2025 21:47:48 -0500
Subject: [PATCH 2/5] clean up autodiff code/comments

---
 compiler/rustc_ast/src/expand/autodiff_attrs.rs   |  1 -
 compiler/rustc_codegen_llvm/src/back/write.rs     | 15 +++++----------
 .../src/partitioning/autodiff.rs                  |  2 +-
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/compiler/rustc_ast/src/expand/autodiff_attrs.rs b/compiler/rustc_ast/src/expand/autodiff_attrs.rs
index 70222f4acabe1..c8ec185ee5e29 100644
--- a/compiler/rustc_ast/src/expand/autodiff_attrs.rs
+++ b/compiler/rustc_ast/src/expand/autodiff_attrs.rs
@@ -17,7 +17,6 @@ use crate::{Ty, TyKind};
 /// functions. The proper solution is to recognize and resolve this DAG of autodiff invocations,
 /// as it's already done in the C++ and Julia frontend of Enzyme.
 ///
-/// (FIXME) remove *First variants.
 /// Documentation for using [reverse](https://enzyme.mit.edu/rust/rev.html) and
 /// [forward](https://enzyme.mit.edu/rust/fwd.html) mode is available online.
 #[derive(Clone, Copy, Eq, PartialEq, Encodable, Decodable, Debug, HashStable_Generic)]
diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs
index 9fa10e960680b..b67890c046572 100644
--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@@ -564,19 +564,16 @@ pub(crate) unsafe fn llvm_optimize(
     // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
     // differentiated.
 
+    let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
+    let run_enzyme = autodiff_stage == AutodiffStage::DuringAD;
     let unroll_loops;
     let vectorize_slp;
     let vectorize_loop;
-    let run_enzyme = cfg!(llvm_enzyme) && autodiff_stage == AutodiffStage::DuringAD;
 
     // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
     // optimizations until after differentiation. Our pipeline is thus: (opt + enzyme), (full opt).
     // We therefore have two calls to llvm_optimize, if autodiff is used.
-    //
-    // FIXME(ZuseZ4): Before shipping on nightly,
-    // we should make this more granular, or at least check that the user has at least one autodiff
-    // call in their code, to justify altering the compilation pipeline.
-    if cfg!(llvm_enzyme) && autodiff_stage != AutodiffStage::PostAD {
+    if consider_ad && autodiff_stage != AutodiffStage::PostAD {
         unroll_loops = false;
         vectorize_slp = false;
         vectorize_loop = false;
@@ -706,10 +703,8 @@ pub(crate) unsafe fn optimize(
 
         // If we know that we will later run AD, then we disable vectorization and loop unrolling.
         // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD).
-        // FIXME(ZuseZ4): Make this more granular, only set PreAD if we actually have autodiff
-        // usages, not just if we build rustc with autodiff support.
-        let autodiff_stage =
-            if cfg!(llvm_enzyme) { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
+        let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable);
+        let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD };
         return unsafe {
             llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage)
         };
diff --git a/compiler/rustc_monomorphize/src/partitioning/autodiff.rs b/compiler/rustc_monomorphize/src/partitioning/autodiff.rs
index bce31bf0748e0..0c855508434e0 100644
--- a/compiler/rustc_monomorphize/src/partitioning/autodiff.rs
+++ b/compiler/rustc_monomorphize/src/partitioning/autodiff.rs
@@ -66,7 +66,7 @@ pub(crate) fn find_autodiff_source_functions<'tcx>(
     let mut autodiff_items: Vec<AutoDiffItem> = vec![];
     for (item, instance) in autodiff_mono_items {
         let target_id = instance.def_id();
-        let cg_fn_attr = tcx.codegen_fn_attrs(target_id).autodiff_item.clone();
+        let cg_fn_attr = &tcx.codegen_fn_attrs(target_id).autodiff_item;
         let Some(target_attrs) = cg_fn_attr else {
             continue;
         };

From 161a4bf6ff3d0b10cd7e5b0984e908e4927d0890 Mon Sep 17 00:00:00 2001
From: Manuel Drehwald <git@manuel.drehwald.info>
Date: Fri, 21 Feb 2025 21:49:46 -0500
Subject: [PATCH 3/5] update enzyme submodule and users

---
 compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp | 9 ++++++---
 src/bootstrap/src/core/build_steps/llvm.rs       | 7 ++-----
 src/tools/enzyme                                 | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
index 7eb11f9e6087c..9ce4abdb432e8 100644
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@@ -692,9 +692,12 @@ struct LLVMRustSanitizerOptions {
   bool SanitizeKernelAddressRecover;
 };
 
-// This symbol won't be available or used when Enzyme is not enabled
+// This symbol won't be available or used when Enzyme is not enabled.
+// Always set AugmentPassBuilder to true, since it registers optimizations which
+// will improve the performance for Enzyme.
 #ifdef ENZYME
-extern "C" void registerEnzyme(llvm::PassBuilder &PB);
+extern "C" void registerEnzymeAndPassPipeline(llvm::PassBuilder &PB,
+                                              /* augmentPassBuilder */ bool);
 #endif
 
 extern "C" LLVMRustResult LLVMRustOptimize(
@@ -1023,7 +1026,7 @@ extern "C" LLVMRustResult LLVMRustOptimize(
   // now load "-enzyme" pass:
 #ifdef ENZYME
   if (RunEnzyme) {
-    registerEnzyme(PB);
+    registerEnzymeAndPassPipeline(PB, true);
     if (auto Err = PB.parsePassPipeline(MPM, "enzyme")) {
       std::string ErrMsg = toString(std::move(Err));
       LLVMRustSetLastError(ErrMsg.c_str());
diff --git a/src/bootstrap/src/core/build_steps/llvm.rs b/src/bootstrap/src/core/build_steps/llvm.rs
index 3025f95566070..40d701f22c137 100644
--- a/src/bootstrap/src/core/build_steps/llvm.rs
+++ b/src/bootstrap/src/core/build_steps/llvm.rs
@@ -996,13 +996,11 @@ impl Step for Enzyme {
             .config
             .update_submodule(Path::new("src").join("tools").join("enzyme").to_str().unwrap());
         let mut cfg = cmake::Config::new(builder.src.join("src/tools/enzyme/enzyme/"));
-        // FIXME(ZuseZ4): Find a nicer way to use Enzyme Debug builds
-        //cfg.profile("Debug");
-        //cfg.define("CMAKE_BUILD_TYPE", "Debug");
         configure_cmake(builder, target, &mut cfg, true, LdFlags::default(), &[]);
 
         // Re-use the same flags as llvm to control the level of debug information
-        // generated for lld.
+        // generated by Enzyme.
+        // FIXME(ZuseZ4): Find a nicer way to use Enzyme Debug builds.
         let profile = match (builder.config.llvm_optimize, builder.config.llvm_release_debuginfo) {
             (false, _) => "Debug",
             (true, false) => "Release",
@@ -1015,7 +1013,6 @@ impl Step for Enzyme {
             .env("LLVM_CONFIG_REAL", &llvm_config)
             .define("LLVM_ENABLE_ASSERTIONS", "ON")
             .define("ENZYME_EXTERNAL_SHARED_LIB", "ON")
-            .define("ENZYME_RUNPASS", "ON")
             .define("LLVM_DIR", builder.llvm_out(target));
 
         cfg.build();
diff --git a/src/tools/enzyme b/src/tools/enzyme
index 7f3b207c4413c..5004a8f6f5d84 160000
--- a/src/tools/enzyme
+++ b/src/tools/enzyme
@@ -1 +1 @@
-Subproject commit 7f3b207c4413c9d715fd54b36b8a8fd3179e0b67
+Subproject commit 5004a8f6f5d8468b64fae457afb7d96e1784c783

From e2d250c3f63d14e068e92ab3048817af6e1770c2 Mon Sep 17 00:00:00 2001
From: Manuel Drehwald <git@manuel.drehwald.info>
Date: Fri, 21 Feb 2025 21:51:20 -0500
Subject: [PATCH 4/5] update autodiff flags

---
 compiler/rustc_codegen_llvm/messages.ftl      |  1 +
 compiler/rustc_codegen_llvm/src/back/lto.rs   | 85 ++++++++++++-----
 .../src/builder/autodiff.rs                   | 13 ++-
 compiler/rustc_codegen_llvm/src/errors.rs     |  5 +-
 .../rustc_codegen_llvm/src/llvm/enzyme_ffi.rs | 94 +++++++++++++++++++
 compiler/rustc_codegen_ssa/src/back/write.rs  |  3 +-
 compiler/rustc_interface/src/tests.rs         |  2 +-
 compiler/rustc_session/src/config.rs          | 25 ++---
 compiler/rustc_session/src/options.rs         | 39 ++++----
 .../src/compiler-flags/autodiff.md            |  9 +-
 tests/codegen/autodiff.rs                     |  2 +-
 11 files changed, 203 insertions(+), 75 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/messages.ftl b/compiler/rustc_codegen_llvm/messages.ftl
index 399d7ffea8e6d..17f2e7ca9f702 100644
--- a/compiler/rustc_codegen_llvm/messages.ftl
+++ b/compiler/rustc_codegen_llvm/messages.ftl
@@ -1,3 +1,4 @@
+codegen_llvm_autodiff_without_enable = using the autodiff feature requires -Z autodiff=Enable
 codegen_llvm_autodiff_without_lto = using the autodiff feature requires using fat-lto
 
 codegen_llvm_copy_bitcode = failed to copy bitcode to object file: {$err}
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index 3e25b94961b00..99906ea7bce39 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -586,6 +586,42 @@ fn thin_lto(
     }
 }
 
+fn enable_autodiff_settings(ad: &[config::AutoDiff], module: &mut ModuleCodegen<ModuleLlvm>) {
+    for &val in ad {
+        match val {
+            config::AutoDiff::PrintModBefore => {
+                unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
+            }
+            config::AutoDiff::PrintPerf => {
+                llvm::set_print_perf(true);
+            }
+            config::AutoDiff::PrintAA => {
+                llvm::set_print_activity(true);
+            }
+            config::AutoDiff::PrintTA => {
+                llvm::set_print_type(true);
+            }
+            config::AutoDiff::Inline => {
+                llvm::set_inline(true);
+            }
+            config::AutoDiff::LooseTypes => {
+                llvm::set_loose_types(false);
+            }
+            config::AutoDiff::PrintSteps => {
+                llvm::set_print(true);
+            }
+            // We handle this below
+            config::AutoDiff::PrintModAfter => {}
+            // This is required and already checked
+            config::AutoDiff::Enable => {}
+        }
+    }
+    // This helps with handling enums for now.
+    llvm::set_strict_aliasing(false);
+    // FIXME(ZuseZ4): Test this, since it was added a long time ago.
+    llvm::set_rust_rules(true);
+}
+
 pub(crate) fn run_pass_manager(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     dcx: DiagCtxtHandle<'_>,
@@ -604,34 +640,37 @@ pub(crate) fn run_pass_manager(
     let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
     let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
 
-    // If this rustc version was build with enzyme/autodiff enabled, and if users applied the
-    // `#[autodiff]` macro at least once, then we will later call llvm_optimize a second time.
-    debug!("running llvm pm opt pipeline");
+    // The PostAD behavior is the same that we would have if no autodiff was used.
+    // It will run the default optimization pipeline. If AD is enabled we select
+    // the DuringAD stage, which will disable vectorization and loop unrolling, and
+    // schedule two autodiff optimization + differentiation passes.
+    // We then run the llvm_optimize function a second time, to optimize the code which we generated
+    // in the enzyme differentiation pass.
+    let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable);
+    let stage =
+        if enable_ad { write::AutodiffStage::DuringAD } else { write::AutodiffStage::PostAD };
+
+    if enable_ad {
+        enable_autodiff_settings(&config.autodiff, module);
+    }
+
     unsafe {
-        write::llvm_optimize(
-            cgcx,
-            dcx,
-            module,
-            config,
-            opt_level,
-            opt_stage,
-            write::AutodiffStage::DuringAD,
-        )?;
+        write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
     }
-    // FIXME(ZuseZ4): Make this more granular
-    if cfg!(llvm_enzyme) && !thin {
+
+    if cfg!(llvm_enzyme) && enable_ad {
+        let opt_stage = llvm::OptStage::FatLTO;
+        let stage = write::AutodiffStage::PostAD;
         unsafe {
-            write::llvm_optimize(
-                cgcx,
-                dcx,
-                module,
-                config,
-                opt_level,
-                llvm::OptStage::FatLTO,
-                write::AutodiffStage::PostAD,
-            )?;
+            write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?;
+        }
+
+        // This is the final IR, so people should be able to inspect the optimized autodiff output.
+        if config.autodiff.contains(&config::AutoDiff::PrintModAfter) {
+            unsafe { llvm::LLVMDumpModule(module.module_llvm.llmod()) };
         }
     }
+
     debug!("lto done");
     Ok(())
 }
diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
index b2c1088e3fc02..2c7899975e3ee 100644
--- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
+++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs
@@ -10,7 +10,7 @@ use crate::back::write::llvm_err;
 use crate::builder::SBuilder;
 use crate::context::SimpleCx;
 use crate::declare::declare_simple_fn;
-use crate::errors::LlvmError;
+use crate::errors::{AutoDiffWithoutEnable, LlvmError};
 use crate::llvm::AttributePlace::Function;
 use crate::llvm::{Metadata, True};
 use crate::value::Value;
@@ -46,9 +46,6 @@ fn generate_enzyme_call<'ll>(
     let output = attrs.ret_activity;
 
     // We have to pick the name depending on whether we want forward or reverse mode autodiff.
-    // FIXME(ZuseZ4): The new pass based approach should not need the {Forward/Reverse}First method anymore, since
-    // it will handle higher-order derivatives correctly automatically (in theory). Currently
-    // higher-order derivatives fail, so we should debug that before adjusting this code.
     let mut ad_name: String = match attrs.mode {
         DiffMode::Forward => "__enzyme_fwddiff",
         DiffMode::Reverse => "__enzyme_autodiff",
@@ -291,6 +288,14 @@ pub(crate) fn differentiate<'ll>(
     let diag_handler = cgcx.create_dcx();
     let cx = SimpleCx { llmod: module.module_llvm.llmod(), llcx: module.module_llvm.llcx };
 
+    // First of all, did the user try to use autodiff without using the -Zautodiff=Enable flag?
+    if !diff_items.is_empty()
+        && !cgcx.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable)
+    {
+        let dcx = cgcx.create_dcx();
+        return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutEnable));
+    }
+
     // Before dumping the module, we want all the TypeTrees to become part of the module.
     for item in diff_items.iter() {
         let name = item.source.clone();
diff --git a/compiler/rustc_codegen_llvm/src/errors.rs b/compiler/rustc_codegen_llvm/src/errors.rs
index 97f4925616595..4c5a78ca74fe4 100644
--- a/compiler/rustc_codegen_llvm/src/errors.rs
+++ b/compiler/rustc_codegen_llvm/src/errors.rs
@@ -92,9 +92,12 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
 
 #[derive(Diagnostic)]
 #[diag(codegen_llvm_autodiff_without_lto)]
-#[note]
 pub(crate) struct AutoDiffWithoutLTO;
 
+#[derive(Diagnostic)]
+#[diag(codegen_llvm_autodiff_without_enable)]
+pub(crate) struct AutoDiffWithoutEnable;
+
 #[derive(Diagnostic)]
 #[diag(codegen_llvm_lto_disallowed)]
 pub(crate) struct LtoDisallowed;
diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
index 39bac13a9680c..daa6696e9634e 100644
--- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs
@@ -35,3 +35,97 @@ pub enum LLVMRustVerifierFailureAction {
     LLVMPrintMessageAction = 1,
     LLVMReturnStatusAction = 2,
 }
+
+#[cfg(llvm_enzyme)]
+pub use self::Enzyme_AD::*;
+
+#[cfg(llvm_enzyme)]
+pub mod Enzyme_AD {
+    use libc::c_void;
+    extern "C" {
+        pub fn EnzymeSetCLBool(arg1: *mut ::std::os::raw::c_void, arg2: u8);
+    }
+    extern "C" {
+        static mut EnzymePrintPerf: c_void;
+        static mut EnzymePrintActivity: c_void;
+        static mut EnzymePrintType: c_void;
+        static mut EnzymePrint: c_void;
+        static mut EnzymeStrictAliasing: c_void;
+        static mut looseTypeAnalysis: c_void;
+        static mut EnzymeInline: c_void;
+        static mut RustTypeRules: c_void;
+    }
+    pub fn set_print_perf(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintPerf), print as u8);
+        }
+    }
+    pub fn set_print_activity(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintActivity), print as u8);
+        }
+    }
+    pub fn set_print_type(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrintType), print as u8);
+        }
+    }
+    pub fn set_print(print: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymePrint), print as u8);
+        }
+    }
+    pub fn set_strict_aliasing(strict: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeStrictAliasing), strict as u8);
+        }
+    }
+    pub fn set_loose_types(loose: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(looseTypeAnalysis), loose as u8);
+        }
+    }
+    pub fn set_inline(val: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(EnzymeInline), val as u8);
+        }
+    }
+    pub fn set_rust_rules(val: bool) {
+        unsafe {
+            EnzymeSetCLBool(std::ptr::addr_of_mut!(RustTypeRules), val as u8);
+        }
+    }
+}
+
+#[cfg(not(llvm_enzyme))]
+pub use self::Fallback_AD::*;
+
+#[cfg(not(llvm_enzyme))]
+pub mod Fallback_AD {
+    #![allow(unused_variables)]
+
+    pub fn set_inline(val: bool) {
+        unimplemented!()
+    }
+    pub fn set_print_perf(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_print_activity(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_print_type(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_print(print: bool) {
+        unimplemented!()
+    }
+    pub fn set_strict_aliasing(strict: bool) {
+        unimplemented!()
+    }
+    pub fn set_loose_types(loose: bool) {
+        unimplemented!()
+    }
+    pub fn set_rust_rules(val: bool) {
+        unimplemented!()
+    }
+}
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index f029c08a80821..d2548deb8c7a4 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -405,7 +405,8 @@ fn generate_lto_work<B: ExtraBackendMethods>(
             B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
         if cgcx.lto == Lto::Fat && !autodiff.is_empty() {
             let config = cgcx.config(ModuleKind::Regular);
-            module = unsafe { module.autodiff(cgcx, autodiff, config).unwrap() };
+            module =
+                unsafe { module.autodiff(cgcx, autodiff, config).unwrap_or_else(|e| e.raise()) };
         }
         // We are adding a single work item, so the cost doesn't matter.
         vec![(WorkItem::LTO(module), 0)]
diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs
index 233bfcb529710..aabd235bcabed 100644
--- a/compiler/rustc_interface/src/tests.rs
+++ b/compiler/rustc_interface/src/tests.rs
@@ -759,7 +759,7 @@ fn test_unstable_options_tracking_hash() {
     tracked!(allow_features, Some(vec![String::from("lang_items")]));
     tracked!(always_encode_mir, true);
     tracked!(assume_incomplete_release, true);
-    tracked!(autodiff, vec![AutoDiff::Print]);
+    tracked!(autodiff, vec![AutoDiff::Enable]);
     tracked!(binary_dep_depinfo, true);
     tracked!(box_noalias, false);
     tracked!(
diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs
index 85ef69ea2b746..4f44eaf6437b6 100644
--- a/compiler/rustc_session/src/config.rs
+++ b/compiler/rustc_session/src/config.rs
@@ -198,33 +198,26 @@ pub enum CoverageLevel {
 /// The different settings that the `-Z autodiff` flag can have.
 #[derive(Clone, Copy, PartialEq, Hash, Debug)]
 pub enum AutoDiff {
+    /// Enable the autodiff opt pipeline
+    Enable,
+
     /// Print TypeAnalysis information
     PrintTA,
     /// Print ActivityAnalysis Information
     PrintAA,
     /// Print Performance Warnings from Enzyme
     PrintPerf,
-    /// Combines the three print flags above.
-    Print,
+    /// Print intermediate IR generation steps
+    PrintSteps,
     /// Print the whole module, before running opts.
     PrintModBefore,
-    /// Print the whole module just before we pass it to Enzyme.
-    /// For Debug purpose, prefer the OPT flag below
-    PrintModAfterOpts,
     /// Print the module after Enzyme differentiated everything.
-    PrintModAfterEnzyme,
+    PrintModAfter,
 
-    /// Enzyme's loose type debug helper (can cause incorrect gradients)
+    /// Enzyme's loose type debug helper (can cause incorrect gradients!!)
+    /// Usable in cases where Enzyme errors with `can not deduce type of X`.
     LooseTypes,
-
-    /// More flags
-    NoModOptAfter,
-    /// Tell Enzyme to run LLVM Opts on each function it generated. By default off,
-    /// since we already optimize the whole module after Enzyme is done.
-    EnableFncOpt,
-    NoVecUnroll,
-    RuntimeActivity,
-    /// Runs Enzyme specific Inlining
+    /// Runs Enzyme's aggressive inlining
     Inline,
 }
 
diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index 351dad3f3e422..0b94b4f122c4a 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -707,7 +707,7 @@ mod desc {
     pub(crate) const parse_list: &str = "a space-separated list of strings";
     pub(crate) const parse_list_with_polarity: &str =
         "a comma-separated list of strings, with elements beginning with + or -";
-    pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Print`, `PrintTA`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfterOpts`, `PrintModAfterEnzyme`, `LooseTypes`, `NoModOptAfter`, `EnableFncOpt`, `NoVecUnroll`, `Inline`";
+    pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `LooseTypes`, `Inline`";
     pub(crate) const parse_comma_list: &str = "a comma-separated list of strings";
     pub(crate) const parse_opt_comma_list: &str = parse_comma_list;
     pub(crate) const parse_number: &str = "a number";
@@ -1348,17 +1348,14 @@ pub mod parse {
         v.sort_unstable();
         for &val in v.iter() {
             let variant = match val {
+                "Enable" => AutoDiff::Enable,
                 "PrintTA" => AutoDiff::PrintTA,
                 "PrintAA" => AutoDiff::PrintAA,
                 "PrintPerf" => AutoDiff::PrintPerf,
-                "Print" => AutoDiff::Print,
+                "PrintSteps" => AutoDiff::PrintSteps,
                 "PrintModBefore" => AutoDiff::PrintModBefore,
-                "PrintModAfterOpts" => AutoDiff::PrintModAfterOpts,
-                "PrintModAfterEnzyme" => AutoDiff::PrintModAfterEnzyme,
+                "PrintModAfter" => AutoDiff::PrintModAfter,
                 "LooseTypes" => AutoDiff::LooseTypes,
-                "NoModOptAfter" => AutoDiff::NoModOptAfter,
-                "EnableFncOpt" => AutoDiff::EnableFncOpt,
-                "NoVecUnroll" => AutoDiff::NoVecUnroll,
                 "Inline" => AutoDiff::Inline,
                 _ => {
                     // FIXME(ZuseZ4): print an error saying which value is not recognized
@@ -2081,21 +2078,19 @@ options! {
     assume_incomplete_release: bool = (false, parse_bool, [TRACKED],
         "make cfg(version) treat the current version as incomplete (default: no)"),
     autodiff: Vec<crate::config::AutoDiff> = (Vec::new(), parse_autodiff, [TRACKED],
-        "a list of optional autodiff flags to enable
-         Optional extra settings:
-         `=PrintTA`
-         `=PrintAA`
-         `=PrintPerf`
-         `=Print`
-         `=PrintModBefore`
-         `=PrintModAfterOpts`
-         `=PrintModAfterEnzyme`
-         `=LooseTypes`
-         `=NoModOptAfter`
-         `=EnableFncOpt`
-         `=NoVecUnroll`
-         `=Inline`
-         Multiple options can be combined with commas."),
+        "a list of autodiff flags to enable
+        Mandatory setting:
+        `=Enable`
+        Optional extra settings:
+        `=PrintTA`
+        `=PrintAA`
+        `=PrintPerf`
+        `=PrintSteps`
+        `=PrintModBefore`
+        `=PrintModAfter`
+        `=LooseTypes`
+        `=Inline`
+        Multiple options can be combined with commas."),
     #[rustc_lint_opt_deny_field_access("use `Session::binary_dep_depinfo` instead of this field")]
     binary_dep_depinfo: bool = (false, parse_bool, [TRACKED],
         "include artifacts (sysroot, crate dependencies) used during compilation in dep-info \
diff --git a/src/doc/unstable-book/src/compiler-flags/autodiff.md b/src/doc/unstable-book/src/compiler-flags/autodiff.md
index 4e55be0ad95a0..95c188d1f3b29 100644
--- a/src/doc/unstable-book/src/compiler-flags/autodiff.md
+++ b/src/doc/unstable-book/src/compiler-flags/autodiff.md
@@ -8,16 +8,13 @@ This feature allows you to differentiate functions using automatic differentiati
 Set the `-Zautodiff=<options>` compiler flag to adjust the behaviour of the autodiff feature.
 Multiple options can be separated with a comma. Valid options are:
 
+`Enable` - Required flag to enable autodiff
 `PrintTA` - print Type Analysis Information
 `PrintAA` - print Activity Analysis Information
 `PrintPerf` - print Performance Warnings from Enzyme
-`Print` - prints all intermediate transformations
+`PrintSteps` - prints all intermediate transformations
 `PrintModBefore` - print the whole module, before running opts
-`PrintModAfterOpts` - print the whole module just before we pass it to Enzyme
-`PrintModAfterEnzyme` - print the module after Enzyme differentiated everything
+`PrintModAfter` - print the module after Enzyme differentiated everything
 `LooseTypes` - Enzyme's loose type debug helper (can cause incorrect gradients)
 `Inline` - runs Enzyme specific Inlining
-`NoModOptAfter` - do not optimize the module after Enzyme is done
-`EnableFncOpt` - tell Enzyme to run LLVM Opts on each function it generated
-`NoVecUnroll` - do not unroll vectorized loops
 `RuntimeActivity` - allow specifying activity at runtime
diff --git a/tests/codegen/autodiff.rs b/tests/codegen/autodiff.rs
index abf7fcf3e4bcd..cace0edb2b544 100644
--- a/tests/codegen/autodiff.rs
+++ b/tests/codegen/autodiff.rs
@@ -1,4 +1,4 @@
-//@ compile-flags: -C opt-level=3  -Clto=fat
+//@ compile-flags: -Zautodiff=Enable -C opt-level=3  -Clto=fat
 //@ no-prefer-dynamic
 //@ needs-enzyme
 #![feature(autodiff)]

From 49e9630641e44756b679187bfeefa85b06dfc86f Mon Sep 17 00:00:00 2001
From: Manuel Drehwald <git@manuel.drehwald.info>
Date: Fri, 21 Feb 2025 21:53:31 -0500
Subject: [PATCH 5/5] enable rustc_autodiff cross-crate encoding

---
 compiler/rustc_feature/src/builtin_attrs.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/rustc_feature/src/builtin_attrs.rs b/compiler/rustc_feature/src/builtin_attrs.rs
index 8eb9bf1582902..b2ada8fe61ef0 100644
--- a/compiler/rustc_feature/src/builtin_attrs.rs
+++ b/compiler/rustc_feature/src/builtin_attrs.rs
@@ -743,7 +743,7 @@ pub static BUILTIN_ATTRIBUTES: &[BuiltinAttribute] = &[
     rustc_attr!(
         rustc_autodiff, Normal,
         template!(Word, List: r#""...""#), DuplicatesOk,
-        EncodeCrossCrate::No, INTERNAL_UNSTABLE
+        EncodeCrossCrate::Yes, INTERNAL_UNSTABLE
     ),
 
     // ==========================================================================