diff --git a/.github/workflows/spack_default_build.yaml b/.github/workflows/spack_default_build.yaml index 7ce766d54..fcdb28c9b 100644 --- a/.github/workflows/spack_default_build.yaml +++ b/.github/workflows/spack_default_build.yaml @@ -100,6 +100,7 @@ jobs: # Minimal Build(s) - GHCR mirror speeds these up a lot! spack_spec: - gridkit@develop +enzyme + ^enzyme@0.0.173 steps: - name: Add LLVM diff --git a/cmake/FindEnzyme.cmake b/cmake/FindEnzyme.cmake index 8ef1371ef..00a76d372 100644 --- a/cmake/FindEnzyme.cmake +++ b/cmake/FindEnzyme.cmake @@ -32,13 +32,31 @@ find_library(ENZYME_LLVM_PLUGIN_LIBRARY REQUIRED) message(STATUS "Enzyme LLVM plugin library: ${ENZYME_LLVM_PLUGIN_LIBRARY}") +find_library(ENZYME_CLANG_PLUGIN_LIBRARY + NAMES + ClangEnzyme-${Enzyme_LLVM_VERSION_MAJOR}.so + ClangEnzyme-${Enzyme_LLVM_VERSION_MAJOR}.dylib + ClangEnzyme-${Enzyme_LLVM_VERSION_MAJOR}.dll + PATHS + ${ENZYME_DIR} + ENV LD_LIBRARY_PATH + ENV DYLD_LIBRARY_PATH + PATH_SUFFIXES + lib64 lib + REQUIRED) +message(STATUS "Enzyme Clang plugin library: ${ENZYME_CLANG_PLUGIN_LIBRARY}") + find_program(GRIDKIT_LLVM_LINK llvm-link PATHS ${Enzyme_LLVM_BINARY_DIR} + PATH_SUFFIXES + bin REQUIRED) message(STATUS "llvm-link: ${GRIDKIT_LLVM_LINK}") find_program(GRIDKIT_OPT opt PATHS ${Enzyme_LLVM_BINARY_DIR} + PATH_SUFFIXES + bin REQUIRED) message(STATUS "opt: ${GRIDKIT_OPT}") @@ -80,7 +98,7 @@ macro(enzyme_add_executable) add_custom_command( DEPENDS ${PHASE0} OUTPUT ${PHASE1} - COMMAND ${CMAKE_CXX_COMPILER} -flto -c ${PHASE0} ${INCLUDE_COMPILER_LIST} -O2 -fno-vectorize -ffast-math -fno-unroll-loops -o ${PHASE1} + COMMAND ${CMAKE_CXX_COMPILER} -flto -c ${PHASE0} ${INCLUDE_COMPILER_LIST} -O2 -fno-vectorize -ffast-math -fno-unroll-loops -fpass-plugin=${ENZYME_CLANG_PLUGIN_LIBRARY} -Xclang -load -Xclang ${ENZYME_CLANG_PLUGIN_LIBRARY} -mllvm -enable-load-pre=0 -mllvm -enzyme-auto-sparsity=1 -o ${PHASE1} COMMENT "Compiling ${SRC} to object file for target ${enzyme_add_executable_NAME}" ) set(OBJS "${OBJS} ${PHASE1}") diff --git a/examples/Enzyme/Standalone/CMakeLists.txt b/examples/Enzyme/Standalone/CMakeLists.txt index fe41dc744..d80ba4f50 100644 --- a/examples/Enzyme/Standalone/CMakeLists.txt +++ b/examples/Enzyme/Standalone/CMakeLists.txt @@ -9,5 +9,12 @@ enzyme_add_executable( LINK_LIBRARIES GRIDKIT::DenseMatrix ) +enzyme_add_executable( + NAME EnzymeStandaloneSparseCheck + SOURCES EnzymeSparse.cpp + LINK_LIBRARIES GRIDKIT::SparseMatrix +) + add_test(NAME "EnzymeStandaloneScalarCheck" COMMAND ${CMAKE_CURRENT_BINARY_DIR}/EnzymeStandaloneScalarCheck) add_test(NAME "EnzymeStandaloneVectorCheck" COMMAND ${CMAKE_CURRENT_BINARY_DIR}/EnzymeStandaloneVectorCheck) +add_test(NAME "EnzymeStandaloneSparseCheck" COMMAND ${CMAKE_CURRENT_BINARY_DIR}/EnzymeStandaloneSparseCheck) diff --git a/examples/Enzyme/Standalone/EnzymeSparse.cpp b/examples/Enzyme/Standalone/EnzymeSparse.cpp new file mode 100644 index 000000000..627cc78f9 --- /dev/null +++ b/examples/Enzyme/Standalone/EnzymeSparse.cpp @@ -0,0 +1,206 @@ +#include +#include +#include +#include +#include + +#include + +/** + * @brief Standalone example that computes the sparse Jacobian of a vector-valued function + * by automatic differentiation via Enzyme. + * + * TODO: Modify the sparse storage provided to Enzyme to directly operate on std::vector and COO_Matrix + * TODO: Convert this into a unit test. + */ + +using SparseMatrix = COO_Matrix; +extern int enzyme_dup; +extern int enzyme_const; +extern int enzyme_dupnoneed; + +template +extern T __enzyme_fwddiff(void*, Tys...) noexcept; + +template +extern T __enzyme_todense(Tys...) noexcept; + +/// Sparse storage for Enzyme +template +struct Triple +{ + size_t row; + size_t col; + T val; + Triple(Triple&&) = default; + + Triple(size_t row, size_t col, T val) + : row(row), + col(col), + val(val) + { + } +}; + +__attribute__((enzyme_sparse_accumulate)) static void inner_storeflt(int64_t row, int64_t col, float val, std::vector>& triplets) +{ + triplets.emplace_back(row, col, val); +} + +__attribute__((enzyme_sparse_accumulate)) static void inner_storedbl(int64_t row, int64_t col, double val, std::vector>& triplets) +{ + triplets.emplace_back(row, col, val); +} + +template +__attribute__((always_inline)) static void sparse_store(T val, int64_t idx, size_t i, std::vector>& triplets) +{ + if (val == 0.0) + return; + idx /= sizeof(T); + if constexpr (sizeof(T) == 4) + inner_storeflt(i, idx, val, triplets); + else + inner_storedbl(i, idx, val, triplets); +} + +template +__attribute__((always_inline)) static T sparse_load(int64_t idx, size_t i, std::vector>& triplets) +{ + return 0.0; +} + +template +__attribute__((always_inline)) static void ident_store(T, int64_t idx, size_t i) +{ + assert(0 && "should never load"); +} + +template +__attribute__((always_inline)) static T ident_load(int64_t idx, size_t i) +{ + idx /= sizeof(T); + return (T) (idx == i); +} + +/// Vector-valued function to differentiate +template +__attribute__((always_inline)) static void f(size_t N, T* input, T* output) +{ + for (size_t i = 0; i < N; i++) + { + output[i] = input[i] * input[i]; + } +} + +/// Reference Jacobian +template +void jac_f_ref(std::vector x, std::vector y, SparseMatrix& jac) +{ + std::vector ctemp{}; + std::vector rtemp{}; + std::vector valtemp{}; + for (int idy = 0; idy < y.size(); ++idy) + { + for (int idx = 0; idx < x.size(); ++idx) + { + if (idx == idy) + { + rtemp.push_back(idx); + ctemp.push_back(idy); + valtemp.push_back(2.0 * x[idx]); + } + } + } + jac.setValues(rtemp, ctemp, valtemp); +} + +/// Function that computes the Jacobian via automatic differentiation +template +__attribute__((noinline)) void jac_f(size_t N, T* input, SparseMatrix& jac) +{ + std::vector> triplets; + for (size_t i = 0; i < N; i++) + { + T* output = __enzyme_todense((void*) ident_load, (void*) ident_store, i); + T* d_output = __enzyme_todense((void*) sparse_load, (void*) sparse_store, i, &triplets); + + __enzyme_fwddiff((void*) f, + enzyme_const, + N, + enzyme_dup, + input, + output, + enzyme_dupnoneed, + (T*) 0x1, + d_output); + } + + std::vector ctemp{}; + std::vector rtemp{}; + std::vector valtemp{}; + for (auto& tup : triplets) + { + rtemp.push_back(tup.row); + ctemp.push_back(tup.col); + valtemp.push_back(tup.val); + } + jac.setValues(rtemp, ctemp, valtemp); +} + +/// Compare two sparse matrices +void check(SparseMatrix matrix_1, SparseMatrix matrix_2, int& fail) +{ + std::tuple&, std::vector&, std::vector&> entries_1 = matrix_1.getEntries(); + const auto [rcord_1, ccord_1, vals_1] = entries_1; + std::tuple&, std::vector&, std::vector&> entries_2 = matrix_2.getEntries(); + const auto [rcord_2, ccord_2, vals_2] = entries_2; + for (int ind = 0; ind < vals_1.size(); ++ind) + { + if (rcord_1[ind] != rcord_2[ind]) + fail++; + if (ccord_1[ind] != ccord_2[ind]) + fail++; + if (std::abs(vals_1[ind] - vals_2[ind]) > std::numeric_limits::epsilon()) + fail++; + } +} + +int main() +{ + /// Vector and matrix declarations + size_t N = 5; + std::vector x(N); + std::vector sq(N); + SparseMatrix dsq = SparseMatrix(N, N); + SparseMatrix dsq_ref = SparseMatrix(N, N); + + /// Input initialization + double val = 0.0; + for (int i = 0; i < N; ++i) + { + x[i] = val; + val += 1.0; + } + + /// Function evaluation + f(x.size(), x.data(), sq.data()); + + /// Reference Jacobian + jac_f_ref(x, sq, dsq_ref); + + /// Enzyme Jacobian + jac_f(N, x.data(), dsq); + + /// Check + int fail = 0; + check(dsq, dsq_ref, fail); + bool verbose = true; + if (verbose) + { + dsq.printMatrix("Autodiff Jacobian"); + dsq_ref.printMatrix("Reference Jacobian"); + } + + return fail; +}