Replies: 2 comments
-
See here: https://fwd.gymni.ch/Z7Oa8F #include <cstdio>
#include <cassert>
#include <functional>
#include <vector>
#include <iostream>
int enzyme_dup;
int enzyme_dupnoneed;
int enzyme_out;
int enzyme_const;
#define __INLINE__ inline __attribute__((always_inline))
#if defined(CUDA_IMPLEMENTATION)
#define DECLARE_FUNCTION \
__host__ __device__ \
__INLINE__
#else
#define DECLARE_FUNCTION \
__INLINE__
#endif
using namespace std::placeholders;
template < typename return_type, typename ... T >
return_type __enzyme_fwddiff(void*, T ... );
template < typename return_type, typename ... T >
return_type __enzyme_autodiff(void*, T ... );
// --------------------------------------------------------------------
template< typename T, typename ... arg_types >
auto wrapper(const T & f, arg_types && ... args) {
return f(args...);
}
// --------------------------------------------------------------------
DECLARE_FUNCTION
void knl_simple_body0(
const int tx,
const double* __restrict__ a,
double* __restrict__ b)
{
b[tx] = 3.*a[tx]*a[tx] + 5.;
}
template<typename F>
DECLARE_FUNCTION
void loop_on_dof_cpu(const int nb_cell, F& f) {
#pragma omp simd
for (int tx = 0; tx < nb_cell; tx++) {
f(tx);
}
}
// --------------------------------------------------------------------
int main(int argc, char *argv[])
{
printf("argc == %d\n", argc);
const int nb_cell = 1e6;
double* a = (double*) malloc(nb_cell * sizeof(double));
double* b = (double*) malloc(nb_cell * sizeof(double));
// Version 2: loop pattern
for (int tx = 0; tx < nb_cell; tx++) {
a[tx] = 12.;
b[tx] = 0.;
}
auto f = [&](int tx) {
return knl_simple_body0(tx, a, b);
};
loop_on_dof_cpu(nb_cell, f);
printf("[2, direct] a[0] == %f\n", a[0]);
printf("[2, direct] a[nb_cell-1] == %f\n", a[nb_cell-1]);
printf("[2, direct] b[0] == %f\n", b[0]);
printf("[2, direct] b[nb_cell-1] == %f\n", b[nb_cell-1]);
assert(a[0] == 12.);
assert(a[nb_cell-1] == 12.);
assert(b[0] == 437.);
assert(b[nb_cell-1] == 437.);
// b(a) = 3.*a*a + 5.
// db/da = (3.*2*a)*da
double* da = (double*) malloc(nb_cell * sizeof(double));
double* db = (double*) malloc(nb_cell * sizeof(double));
for (int tx = 0; tx < nb_cell; tx++) {
da[tx] = 1.;
db[tx] = 0.;
}
auto df = [&](int tx) {
return knl_simple_body0(tx, da, db);
};
__enzyme_fwddiff<void>((void*)loop_on_dof_cpu<decltype(f)>, nb_cell, enzyme_dup, (void*)&f, (void*)&df);
printf("[2] da[0] == %f\n", da[0]);
printf("[2] da[nb_cell-1] == %f\n", da[nb_cell-1]);
printf("[2] db[0] == %f\n", db[0]);
printf("[2] db[nb_cell-1] == %f\n", db[nb_cell-1]);
assert(da[0] == 1.);
assert(da[nb_cell-1] == 1.);
assert(db[0] == (3.*2.*12.*1.));
assert(db[nb_cell-1] == (3.*2.*12.*1.));
free(db);
free(da);
free(a);
free(b);
return 1;
}
|
Beta Was this translation helpful? Give feedback.
0 replies
-
Thanks a lot !!! |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
Hi,
I'm currently testing Enzyme on toy examples but representative of what I am working on.
In this first example, a "body" function
knl_simple_cpu0(tx, ...)
is called inside another "loop" functionknl_simple_body0(...)
which contains... a loop, as shown in following example: https://fwd.gymni.ch/fZz4RaIn this case, the forward differentiation computed by Enzyme works perfectly.
On this second example, a "loop" function
loop_on_dof_cpu(...)
is abstracted in a template form whose argument becomes the "body" functionknl_simple_cpu0(tx, ...)
. It works fine on the direct computation : https://fwd.gymni.ch/1H7WtCHowever, I cannot figure out how to get the differentiation with Enzyme on this template form ?
I have tried this :
but an error is returned as follow :
I think I'm on the wrong track?!?! ?!?!
Could you please help me to understand how to write the appropriate code to differentiate this example with Enzyme-AD ?
Thanks a lot,
Bertrand M.
Beta Was this translation helpful? Give feedback.
All reactions