Skip to content

Commit

Permalink
splitting onedim_f_series in two functions
Browse files Browse the repository at this point in the history
  • Loading branch information
DiamonDinoia committed Sep 11, 2024
1 parent 52cd6cc commit 1355818
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 36 deletions.
13 changes: 7 additions & 6 deletions include/cufinufft/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ int setup_spreader_for_nufft(finufft_spread_opts &spopts, T eps, cufinufft_opts

void set_nf_type12(CUFINUFFT_BIGINT ms, cufinufft_opts opts, finufft_spread_opts spopts,
CUFINUFFT_BIGINT *nf, CUFINUFFT_BIGINT b);
// template<typename T>
// void onedim_fseries_kernel(CUFINUFFT_BIGINT nf, T *fwkerhalf, finufft_spread_opts
// opts);
template<typename T, bool>
void onedim_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a,
finufft_spread_opts opts);

template<typename T>
void onedim_uniformn_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a,
finufft_spread_opts opts);
template<typename T>
void onedim_non_uniform_fseries_kernel_precomp(T *f, T *a, finufft_spread_opts opts);

template<typename T>
void onedim_fseries_kernel_compute(CUFINUFFT_BIGINT nf, T *f, std::complex<double> *a,
T *fwkerhalf, finufft_spread_opts opts);
Expand Down
29 changes: 15 additions & 14 deletions include/cufinufft/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -297,14 +297,14 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran
T fseries_precomp_f[3 * MAX_NQUAD];
thrust::device_vector<T> d_fseries_precomp_a(3 * MAX_NQUAD);
thrust::device_vector<T> d_fseries_precomp_f(3 * MAX_NQUAD);
onedim_fseries_kernel_precomp<T, true>(d_plan->nf1, fseries_precomp_f,
fseries_precomp_a, d_plan->spopts);
onedim_uniformn_fseries_kernel_precomp<T>(d_plan->nf1, fseries_precomp_f,
fseries_precomp_a, d_plan->spopts);
if (d_plan->dim > 1)
onedim_fseries_kernel_precomp<T, true>(d_plan->nf2, fseries_precomp_f + MAX_NQUAD,
fseries_precomp_a + MAX_NQUAD,
d_plan->spopts);
onedim_uniformn_fseries_kernel_precomp<T>(
d_plan->nf2, fseries_precomp_f + MAX_NQUAD, fseries_precomp_a + MAX_NQUAD,
d_plan->spopts);
if (d_plan->dim > 2)
onedim_fseries_kernel_precomp<T, true>(
onedim_uniformn_fseries_kernel_precomp<T>(
d_plan->nf3, fseries_precomp_f + 2 * MAX_NQUAD,
fseries_precomp_a + 2 * MAX_NQUAD, d_plan->spopts);
// copy the precomputed data to the device using thrust
Expand Down Expand Up @@ -687,17 +687,18 @@ int cufinufft_setpts_impl(int M, T *d_kx, T *d_ky, T *d_kz, int N, T *d_s, T *d_
if (d_plan->dim > 2) {
phi_hat3.resize(N);
}
onedim_fseries_kernel_precomp<T, false>(0, fseries_precomp_f.data(),
fseries_precomp_a.data(), d_plan->spopts);
onedim_non_uniform_fseries_kernel_precomp<T>(
fseries_precomp_f.data(), fseries_precomp_a.data(), d_plan->spopts);
if (d_plan->dim > 1) {
onedim_fseries_kernel_precomp<T, false>(0, fseries_precomp_f.data() + MAX_NQUAD,
fseries_precomp_a.data() + MAX_NQUAD,
d_plan->spopts);
onedim_non_uniform_fseries_kernel_precomp<T>(fseries_precomp_f.data() + MAX_NQUAD,
fseries_precomp_a.data() + MAX_NQUAD,
d_plan->spopts);
}
if (d_plan->dim > 2) {
onedim_fseries_kernel_precomp<T, false>(0, fseries_precomp_f.data() + 2 * MAX_NQUAD,
fseries_precomp_a.data() + 2 * MAX_NQUAD,
d_plan->spopts);
onedim_non_uniform_fseries_kernel_precomp<T>(
fseries_precomp_f.data() + 2 * MAX_NQUAD,
fseries_precomp_a.data() + 2 * MAX_NQUAD,
d_plan->spopts);
}
// copy the precomputed data to the device using thrust
thrust::copy(fseries_precomp_a.begin(), fseries_precomp_a.end(),
Expand Down
46 changes: 30 additions & 16 deletions src/cuda/common.cu
Original file line number Diff line number Diff line change
Expand Up @@ -196,25 +196,39 @@ void set_nf_type12(CUFINUFFT_BIGINT ms, cufinufft_opts opts, finufft_spread_opts
f - funciton values at quadrature nodes multiplied with quadrature weights (a, f are
provided as the inputs of onedim_fseries_kernel_compute() defined below)
*/
template<typename T, bool phase_winding = true>
void onedim_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a,
finufft_spread_opts opts) {

template<typename T>
void onedim_uniformn_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a,
finufft_spread_opts opts) {
T J2 = opts.nspread / 2.0; // J/2, half-width of ker z-support
// # quadr nodes in z (from 0 to J/2; reflections will be added)...
int q = (int)(2 + 3.0 * J2); // not sure why so large? cannot
// exceed MAX_NQUAD
double z[2 * MAX_NQUAD];
double w[2 * MAX_NQUAD];
finufft::quadrature::legendre_compute_glr(2 * q, z, w); // only half the nodes used,
// eg on (0,1)
for (int n = 0; n < q; ++n) { // set up nodes z_n and vals f_n
z[n] *= J2; // rescale nodes
f[n] = J2 * w[n] * evaluate_kernel((T)z[n], opts); // vals & quadr wei
a[n] = ((T)(2.0 * M_PI) * (T)(nf / 2 - z[n]) / (T)nf); // phase winding rates
}
}

template<typename T>
void onedim_non_uniform_fseries_kernel_precomp(T *f, T *a, finufft_spread_opts opts) {
T J2 = opts.nspread / 2.0; // J/2, half-width of ker z-support
// # quadr nodes in z (from 0 to J/2; reflections will be added)...
int q = (int)(2 + (phase_winding ? 3.0 : 2.0) * J2); // not sure why so large? cannot
// exceed MAX_NQUAD
int q = (int)(2 + 2.0 * J2); // not sure why so large? cannot
// exceed MAX_NQUAD
double z[2 * MAX_NQUAD];
double w[2 * MAX_NQUAD];
finufft::quadrature::legendre_compute_glr(2 * q, z, w); // only half the nodes used,
// eg on (0,1)
for (int n = 0; n < q; ++n) { // set up nodes z_n and vals f_n
z[n] *= J2; // rescale nodes
f[n] = J2 * w[n] * evaluate_kernel((T)z[n], opts); // vals & quadr wei
if constexpr (phase_winding) {
a[n] = ((T)(2.0 * M_PI) * (T)(nf / 2 - z[n]) / (T)nf); // phase winding rates
} else {
a[n] = T(z[n]);
}
a[n] = T(z[n]);
}
}

Expand Down Expand Up @@ -335,14 +349,14 @@ template int setup_spreader_for_nufft(finufft_spread_opts &spopts, float eps,
cufinufft_opts opts);
template int setup_spreader_for_nufft(finufft_spread_opts &spopts, double eps,
cufinufft_opts opts);
template void onedim_fseries_kernel_precomp<float, true>(
CUFINUFFT_BIGINT nf, float *f, float *a, finufft_spread_opts opts);
template void onedim_fseries_kernel_precomp<double, true>(
CUFINUFFT_BIGINT nf, double *f, double *a, finufft_spread_opts opts);
template void onedim_fseries_kernel_precomp<float, false>(
template void onedim_uniformn_fseries_kernel_precomp<float>(
CUFINUFFT_BIGINT nf, float *f, float *a, finufft_spread_opts opts);
template void onedim_fseries_kernel_precomp<double, false>(
template void onedim_uniformn_fseries_kernel_precomp<double>(
CUFINUFFT_BIGINT nf, double *f, double *a, finufft_spread_opts opts);
template void onedim_non_uniform_fseries_kernel_precomp<float>(float *f, float *a,
finufft_spread_opts opts);
template void onedim_non_uniform_fseries_kernel_precomp<double>(double *f, double *a,
finufft_spread_opts opts);
template int cufserieskernelcompute(int dim, int nf1, int nf2, int nf3, float *d_f,
float *d_a, float *d_fwkerhalf1, float *d_fwkerhalf2,
float *d_fwkerhalf3, int ns, cudaStream_t stream);
Expand Down

0 comments on commit 1355818

Please sign in to comment.