diff --git a/include/cufinufft/common.h b/include/cufinufft/common.h index 09c940013..5e8a63b56 100644 --- a/include/cufinufft/common.h +++ b/include/cufinufft/common.h @@ -32,12 +32,13 @@ int setup_spreader_for_nufft(finufft_spread_opts &spopts, T eps, cufinufft_opts void set_nf_type12(CUFINUFFT_BIGINT ms, cufinufft_opts opts, finufft_spread_opts spopts, CUFINUFFT_BIGINT *nf, CUFINUFFT_BIGINT b); -// template -// void onedim_fseries_kernel(CUFINUFFT_BIGINT nf, T *fwkerhalf, finufft_spread_opts -// opts); -template -void onedim_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a, - finufft_spread_opts opts); + +template +void onedim_uniformn_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a, + finufft_spread_opts opts); +template +void onedim_non_uniform_fseries_kernel_precomp(T *f, T *a, finufft_spread_opts opts); + template void onedim_fseries_kernel_compute(CUFINUFFT_BIGINT nf, T *f, std::complex *a, T *fwkerhalf, finufft_spread_opts opts); diff --git a/include/cufinufft/impl.h b/include/cufinufft/impl.h index d85dfa22b..9d4dcfdf5 100644 --- a/include/cufinufft/impl.h +++ b/include/cufinufft/impl.h @@ -297,14 +297,14 @@ int cufinufft_makeplan_impl(int type, int dim, int *nmodes, int iflag, int ntran T fseries_precomp_f[3 * MAX_NQUAD]; thrust::device_vector d_fseries_precomp_a(3 * MAX_NQUAD); thrust::device_vector d_fseries_precomp_f(3 * MAX_NQUAD); - onedim_fseries_kernel_precomp(d_plan->nf1, fseries_precomp_f, - fseries_precomp_a, d_plan->spopts); + onedim_uniformn_fseries_kernel_precomp(d_plan->nf1, fseries_precomp_f, + fseries_precomp_a, d_plan->spopts); if (d_plan->dim > 1) - onedim_fseries_kernel_precomp(d_plan->nf2, fseries_precomp_f + MAX_NQUAD, - fseries_precomp_a + MAX_NQUAD, - d_plan->spopts); + onedim_uniformn_fseries_kernel_precomp( + d_plan->nf2, fseries_precomp_f + MAX_NQUAD, fseries_precomp_a + MAX_NQUAD, + d_plan->spopts); if (d_plan->dim > 2) - onedim_fseries_kernel_precomp( + onedim_uniformn_fseries_kernel_precomp( d_plan->nf3, fseries_precomp_f + 2 * MAX_NQUAD, fseries_precomp_a + 2 * MAX_NQUAD, d_plan->spopts); // copy the precomputed data to the device using thrust @@ -687,17 +687,18 @@ int cufinufft_setpts_impl(int M, T *d_kx, T *d_ky, T *d_kz, int N, T *d_s, T *d_ if (d_plan->dim > 2) { phi_hat3.resize(N); } - onedim_fseries_kernel_precomp(0, fseries_precomp_f.data(), - fseries_precomp_a.data(), d_plan->spopts); + onedim_non_uniform_fseries_kernel_precomp( + fseries_precomp_f.data(), fseries_precomp_a.data(), d_plan->spopts); if (d_plan->dim > 1) { - onedim_fseries_kernel_precomp(0, fseries_precomp_f.data() + MAX_NQUAD, - fseries_precomp_a.data() + MAX_NQUAD, - d_plan->spopts); + onedim_non_uniform_fseries_kernel_precomp(fseries_precomp_f.data() + MAX_NQUAD, + fseries_precomp_a.data() + MAX_NQUAD, + d_plan->spopts); } if (d_plan->dim > 2) { - onedim_fseries_kernel_precomp(0, fseries_precomp_f.data() + 2 * MAX_NQUAD, - fseries_precomp_a.data() + 2 * MAX_NQUAD, - d_plan->spopts); + onedim_non_uniform_fseries_kernel_precomp( + fseries_precomp_f.data() + 2 * MAX_NQUAD, + fseries_precomp_a.data() + 2 * MAX_NQUAD, + d_plan->spopts); } // copy the precomputed data to the device using thrust thrust::copy(fseries_precomp_a.begin(), fseries_precomp_a.end(), diff --git a/src/cuda/common.cu b/src/cuda/common.cu index 57c1e39dc..5072c06fe 100644 --- a/src/cuda/common.cu +++ b/src/cuda/common.cu @@ -196,13 +196,31 @@ void set_nf_type12(CUFINUFFT_BIGINT ms, cufinufft_opts opts, finufft_spread_opts f - funciton values at quadrature nodes multiplied with quadrature weights (a, f are provided as the inputs of onedim_fseries_kernel_compute() defined below) */ -template -void onedim_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a, - finufft_spread_opts opts) { + +template +void onedim_uniformn_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a, + finufft_spread_opts opts) { + T J2 = opts.nspread / 2.0; // J/2, half-width of ker z-support + // # quadr nodes in z (from 0 to J/2; reflections will be added)... + int q = (int)(2 + 3.0 * J2); // not sure why so large? cannot + // exceed MAX_NQUAD + double z[2 * MAX_NQUAD]; + double w[2 * MAX_NQUAD]; + finufft::quadrature::legendre_compute_glr(2 * q, z, w); // only half the nodes used, + // eg on (0,1) + for (int n = 0; n < q; ++n) { // set up nodes z_n and vals f_n + z[n] *= J2; // rescale nodes + f[n] = J2 * w[n] * evaluate_kernel((T)z[n], opts); // vals & quadr wei + a[n] = ((T)(2.0 * M_PI) * (T)(nf / 2 - z[n]) / (T)nf); // phase winding rates + } +} + +template +void onedim_non_uniform_fseries_kernel_precomp(T *f, T *a, finufft_spread_opts opts) { T J2 = opts.nspread / 2.0; // J/2, half-width of ker z-support // # quadr nodes in z (from 0 to J/2; reflections will be added)... - int q = (int)(2 + (phase_winding ? 3.0 : 2.0) * J2); // not sure why so large? cannot - // exceed MAX_NQUAD + int q = (int)(2 + 2.0 * J2); // not sure why so large? cannot + // exceed MAX_NQUAD double z[2 * MAX_NQUAD]; double w[2 * MAX_NQUAD]; finufft::quadrature::legendre_compute_glr(2 * q, z, w); // only half the nodes used, @@ -210,11 +228,7 @@ void onedim_fseries_kernel_precomp(CUFINUFFT_BIGINT nf, T *f, T *a, for (int n = 0; n < q; ++n) { // set up nodes z_n and vals f_n z[n] *= J2; // rescale nodes f[n] = J2 * w[n] * evaluate_kernel((T)z[n], opts); // vals & quadr wei - if constexpr (phase_winding) { - a[n] = ((T)(2.0 * M_PI) * (T)(nf / 2 - z[n]) / (T)nf); // phase winding rates - } else { - a[n] = T(z[n]); - } + a[n] = T(z[n]); } } @@ -335,14 +349,14 @@ template int setup_spreader_for_nufft(finufft_spread_opts &spopts, float eps, cufinufft_opts opts); template int setup_spreader_for_nufft(finufft_spread_opts &spopts, double eps, cufinufft_opts opts); -template void onedim_fseries_kernel_precomp( - CUFINUFFT_BIGINT nf, float *f, float *a, finufft_spread_opts opts); -template void onedim_fseries_kernel_precomp( - CUFINUFFT_BIGINT nf, double *f, double *a, finufft_spread_opts opts); -template void onedim_fseries_kernel_precomp( +template void onedim_uniformn_fseries_kernel_precomp( CUFINUFFT_BIGINT nf, float *f, float *a, finufft_spread_opts opts); -template void onedim_fseries_kernel_precomp( +template void onedim_uniformn_fseries_kernel_precomp( CUFINUFFT_BIGINT nf, double *f, double *a, finufft_spread_opts opts); +template void onedim_non_uniform_fseries_kernel_precomp(float *f, float *a, + finufft_spread_opts opts); +template void onedim_non_uniform_fseries_kernel_precomp(double *f, double *a, + finufft_spread_opts opts); template int cufserieskernelcompute(int dim, int nf1, int nf2, int nf3, float *d_f, float *d_a, float *d_fwkerhalf1, float *d_fwkerhalf2, float *d_fwkerhalf3, int ns, cudaStream_t stream);