Skip to content

Commit a52cae1

Browse files
committed
Added logic to defer upsampfac to setpts when auto
1 parent c875a85 commit a52cae1

File tree

3 files changed

+171
-105
lines changed

3 files changed

+171
-105
lines changed

include/finufft/fft.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,15 @@ template<> struct Finufft_FFT_plan<float> {
7979
uint64_t nf = 1;
8080
for (auto i : dims) nf *= i;
8181
lock();
82+
// Destroy existing plans before creating new ones (handles re-planning)
83+
if (plan_) {
84+
fftwf_destroy_plan(plan_);
85+
plan_ = nullptr;
86+
}
87+
if (plan_adj_) {
88+
fftwf_destroy_plan(plan_adj_);
89+
plan_adj_ = nullptr;
90+
}
8291
#ifdef _OPENMP
8392
fftwf_plan_with_nthreads(nthreads);
8493
#endif
@@ -155,6 +164,15 @@ template<> struct Finufft_FFT_plan<double> {
155164
uint64_t nf = 1;
156165
for (auto i : dims) nf *= i;
157166
lock();
167+
// Destroy existing plans before creating new ones (handles re-planning)
168+
if (plan_) {
169+
fftw_destroy_plan(plan_);
170+
plan_ = nullptr;
171+
}
172+
if (plan_adj_) {
173+
fftw_destroy_plan(plan_adj_);
174+
plan_adj_ = nullptr;
175+
}
158176
#ifdef _OPENMP
159177
fftw_plan_with_nthreads(nthreads);
160178
#endif

include/finufft/finufft_core.h

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,14 +84,16 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan class, fully C++
8484
int dim; // overall dimension: 1,2 or 3
8585

8686
private:
87-
int ntrans; // how many transforms to do at once (vector or "many" mode)
88-
BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts)
89-
BIGINT nk; // number of NU freq pts (type 3 only)
90-
TF tol; // relative user tolerance
91-
int batchSize; // # strength vectors to group together for FFTW, etc
92-
int nbatch; // how many batches done to cover all ntrans vectors
87+
int ntrans; // how many transforms to do at once (vector or "many" mode)
88+
BIGINT nj; // num of NU pts in type 1,2 (for type 3, num input x pts)
89+
BIGINT nk; // number of NU freq pts (type 3 only)
90+
TF tol; // relative user tolerance
91+
int batchSize; // # strength vectors to group together for FFTW, etc
92+
int nbatch; // how many batches done to cover all ntrans vectors
9393

94-
int nc = 0; // number of Horner coefficients used for ES kernel (<= MAX_NC)
94+
int nc = 0; // number of Horner coefficients used for ES kernel (<= MAX_NC)
95+
bool upsamp_locked = false; // true if user specified upsampfac != 0, prevents auto
96+
// update
9597

9698
public:
9799
std::array<BIGINT, 3> mstu; // number of modes in x,y,z directions
@@ -148,6 +150,10 @@ template<typename TF> struct FINUFFT_PLAN_T { // the main plan class, fully C++
148150

149151
void precompute_horner_coeffs();
150152

153+
// Helper to initialize spreader, Horner coeffs, phiHat (Fourier series), and FFT plan.
154+
// Used by constructor (when upsampfac given) and setpts (when upsampfac deferred).
155+
int initSpreadAndFFT();
156+
151157
public:
152158
FINUFFT_PLAN_T(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans, TF tol,
153159
const finufft_opts *opts, int &ier);

src/finufft_core.cpp

Lines changed: 140 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,88 @@ template<typename TF> void FINUFFT_PLAN_T<TF>::precompute_horner_coeffs() {
594594
}
595595
}
596596

597+
// Helper to initialize spreader, Horner coeffs, phiHat (Fourier series), and FFT plan.
598+
// Used by constructor (when upsampfac given) and setpts (when upsampfac deferred).
599+
// Returns 0 on success, or an error code if set_nf_type12 or alloc fails.
600+
template<typename TF> int FINUFFT_PLAN_T<TF>::initSpreadAndFFT() {
601+
CNTime timer{};
602+
spopts.spread_direction = type;
603+
constexpr TF EPSILON = std::numeric_limits<TF>::epsilon();
604+
605+
if (opts.spreadinterponly) { // (unusual case of no NUFFT, just report)
606+
// spreadinterp grid will simply be the user's "mode" grid...
607+
for (int idim = 0; idim < dim; ++idim) nfdim[idim] = mstu[idim];
608+
609+
if (opts.debug) { // "long long" here is to avoid warnings with printf...
610+
printf("[%s] %dd spreadinterponly(dir=%d): (ms,mt,mu)=(%lld,%lld,%lld)"
611+
"\n ntrans=%d nthr=%d batchSize=%d kernel width ns=%d",
612+
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
613+
(long long)mstu[2], ntrans, opts.nthreads, batchSize, spopts.nspread);
614+
if (batchSize == 1) // spread_thread has no effect in this case
615+
printf("\n");
616+
else
617+
printf(" spread_thread=%d\n", opts.spread_thread);
618+
}
619+
620+
} else { // ..... usual NUFFT: eval Fourier series, alloc workspace .....
621+
622+
if (opts.showwarn) { // user warn round-off error (due to prob condition #)...
623+
for (int idim = 0; idim < dim; ++idim)
624+
if (EPSILON * mstu[idim] > 1.0)
625+
fprintf(stderr,
626+
"%s warning: rounding err (due to cond # of prob) eps_mach*N%d = %.3g "
627+
"> 1 !\n",
628+
__func__, idim, (double)(EPSILON * mstu[idim]));
629+
}
630+
631+
// determine fine grid sizes, sanity check, then alloc...
632+
for (int idim = 0; idim < dim; ++idim) {
633+
int nfier = set_nf_type12(mstu[idim], opts, spopts, &nfdim[idim]);
634+
if (nfier) return nfier; // nf too big; we're done
635+
phiHat[idim].resize(nfdim[idim] / 2 + 1); // alloc fseries
636+
}
637+
638+
if (opts.debug) { // "long long" here is to avoid warnings with printf...
639+
printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) "
640+
"(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d "
641+
"batchSize=%d ",
642+
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
643+
(long long)mstu[2], (long long)nfdim[0], (long long)nfdim[1],
644+
(long long)nfdim[2], ntrans, opts.nthreads, batchSize);
645+
if (batchSize == 1) // spread_thread has no effect in this case
646+
printf("\n");
647+
else
648+
printf(" spread_thread=%d\n", opts.spread_thread);
649+
}
650+
651+
// STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim
652+
timer.restart();
653+
for (int idim = 0; idim < dim; ++idim)
654+
onedim_fseries_kernel(nfdim[idim], phiHat[idim], spopts, horner_coeffs.data(), nc);
655+
if (opts.debug)
656+
printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread,
657+
timer.elapsedsec());
658+
659+
if (nf() * batchSize > MAX_NF) {
660+
fprintf(stderr,
661+
"[%s] fwBatch would be bigger than MAX_NF, not attempting memory "
662+
"allocation!\n",
663+
__func__);
664+
return FINUFFT_ERR_MAXNALLOC;
665+
}
666+
667+
timer.restart(); // plan the FFTW (to act in-place on the workspace fwBatch)
668+
int nthr_fft = opts.nthreads;
669+
const auto ns = gridsize_for_fft(*this);
670+
std::vector<TC, xsimd::aligned_allocator<TC, 64>> fwBatch(nf() * batchSize);
671+
fftPlan->plan(ns, batchSize, fwBatch.data(), fftSign, opts.fftw, nthr_fft);
672+
if (opts.debug)
673+
printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw, nthr_fft,
674+
timer.elapsedsec());
675+
}
676+
return 0;
677+
}
678+
597679
// --------------- rest is the five user guru (plan) interface drivers ----------
598680
// (they are not namespaced since have safe names finufft{f}_* )
599681
using namespace finufft::utils; // AHB since already given at top, needed again?
@@ -749,106 +831,28 @@ FINUFFT_PLAN_T<TF>::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i
749831
}
750832
}
751833

752-
// heuristic to choose default upsampfac... (currently two poss)
753-
if (opts.upsampfac == 0.0) { // init to auto choice
754-
// Let assume density=1 as the average use case.
755-
// TODO: make a decision on how to choose density properly.
756-
const auto density = TF{1};
757-
opts.upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, density, dim, type, tol);
758-
if (opts.debug > 1)
759-
printf("[%s] threads %d, density %.3g, dim %d, nufft type %d, tol %.3g: auto "
760-
"upsampfac=%.2f\n",
761-
__func__, opts.nthreads, density, dim, type, tol, opts.upsampfac);
762-
}
763-
// use opts to choose and write into plan's spread options...
764-
ier = setup_spreader_for_nufft(spopts, tol, opts, dim);
765-
if (ier > 1) // proceed if success or warning
766-
throw int(ier);
767-
precompute_horner_coeffs();
768-
// ------------------------ types 1,2: planning needed ---------------------
769-
if (type == 1 || type == 2) {
770-
771-
int nthr_fft = nthr; // give FFT all threads (or use o.spread_thread?)
772-
// Note: batchSize not used since might be only 1.
773-
774-
spopts.spread_direction = type;
775-
constexpr TF EPSILON = std::numeric_limits<TF>::epsilon();
776-
777-
if (opts.spreadinterponly) { // (unusual case of no NUFFT, just report)
778-
779-
// spreadinterp grid will simply be the user's "mode" grid...
780-
for (int idim = 0; idim < dim; ++idim) nfdim[idim] = mstu[idim];
781-
782-
if (opts.debug) { // "long long" here is to avoid warnings with printf...
783-
printf("[%s] %dd spreadinterponly(dir=%d): (ms,mt,mu)=(%lld,%lld,%lld)"
784-
"\n ntrans=%d nthr=%d batchSize=%d kernel width ns=%d",
785-
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
786-
(long long)mstu[2], ntrans, nthr, batchSize, spopts.nspread);
787-
if (batchSize == 1) // spread_thread has no effect in this case
788-
printf("\n");
789-
else
790-
printf(" spread_thread=%d\n", opts.spread_thread);
791-
}
792-
793-
} else { // ..... usual NUFFT: eval Fourier series, alloc workspace .....
794-
795-
if (opts.showwarn) { // user warn round-off error (due to prob condition #)...
796-
for (int idim = 0; idim < dim; ++idim)
797-
if (EPSILON * mstu[idim] > 1.0)
798-
fprintf(
799-
stderr,
800-
"%s warning: rounding err (due to cond # of prob) eps_mach*N%d = %.3g "
801-
"> 1 !\n",
802-
__func__, idim, (double)(EPSILON * mstu[idim]));
803-
}
804-
805-
// determine fine grid sizes, sanity check, then alloc...
806-
for (int idim = 0; idim < dim; ++idim) {
807-
int nfier = set_nf_type12(mstu[idim], opts, spopts, &nfdim[idim]);
808-
if (nfier) throw nfier; // nf too big; we're done
809-
phiHat[idim].resize(nfdim[idim] / 2 + 1); // alloc fseries
810-
}
811-
812-
if (opts.debug) { // "long long" here is to avoid warnings with printf...
813-
printf("[%s] %dd%d: (ms,mt,mu)=(%lld,%lld,%lld) "
814-
"(nf1,nf2,nf3)=(%lld,%lld,%lld)\n ntrans=%d nthr=%d "
815-
"batchSize=%d ",
816-
__func__, dim, type, (long long)mstu[0], (long long)mstu[1],
817-
(long long)mstu[2], (long long)nfdim[0], (long long)nfdim[1],
818-
(long long)nfdim[2], ntrans, nthr, batchSize);
819-
if (batchSize == 1) // spread_thread has no effect in this case
820-
printf("\n");
821-
else
822-
printf(" spread_thread=%d\n", opts.spread_thread);
823-
}
824-
825-
// STEP 0: get Fourier coeffs of spreading kernel along each fine grid dim
826-
timer.restart();
827-
for (int idim = 0; idim < dim; ++idim)
828-
onedim_fseries_kernel(nfdim[idim], phiHat[idim], spopts, horner_coeffs.data(),
829-
nc);
830-
if (opts.debug)
831-
printf("[%s] kernel fser (ns=%d):\t\t%.3g s\n", __func__, spopts.nspread,
832-
timer.elapsedsec());
833-
834-
if (nf() * batchSize > MAX_NF) {
835-
fprintf(stderr,
836-
"[%s] fwBatch would be bigger than MAX_NF, not attempting memory "
837-
"allocation!\n",
838-
__func__);
839-
throw int(FINUFFT_ERR_MAXNALLOC);
840-
}
841-
842-
timer.restart(); // plan the FFTW (to act in-place on the workspace fwBatch)
843-
const auto ns = gridsize_for_fft(*this);
844-
std::vector<TC, xsimd::aligned_allocator<TC, 64>> fwBatch(nf() * batchSize);
845-
fftPlan->plan(ns, batchSize, fwBatch.data(), fftSign, opts.fftw, nthr_fft);
846-
if (opts.debug)
847-
printf("[%s] FFT plan (mode %d, nthr=%d):\t%.3g s\n", __func__, opts.fftw,
848-
nthr_fft, timer.elapsedsec());
834+
// heuristic to choose default upsampfac: defer selection to setpts unless
835+
// the user explicitly forced a nonzero value in opts. In that case initialize
836+
// spreader/Horner internals now using the provided upsampfac.
837+
if (opts.upsampfac != 0.0) {
838+
upsamp_locked = true; // user explicitly set upsampfac, don't auto-update
839+
ier = setup_spreader_for_nufft(spopts, tol, opts, dim);
840+
if (ier > 1) // proceed if success or warning
841+
throw int(ier);
842+
precompute_horner_coeffs();
843+
844+
// ------------------------ types 1,2: planning needed ---------------------
845+
if (type == 1 || type == 2) {
846+
int code = initSpreadAndFFT();
847+
if (code) throw code;
849848
}
849+
} else {
850+
// If upsampfac was left as 0.0 (auto) we defer setup_spreader to setpts.
851+
// However, we must still check if tol is too small to warn the user now.
852+
if (tol < std::numeric_limits<TF>::epsilon()) ier = FINUFFT_WARN_EPS_TOO_SMALL;
853+
}
850854

851-
} else { // -------------------------- type 3 (no planning) ------------
855+
if (type == 3) { // -------------------------- type 3 (no planning) ------------
852856

853857
if (opts.debug) printf("[%s] %dd%d: ntrans=%d\n", __func__, dim, type, ntrans);
854858
// Type 3 will call finufft_makeplan for type 2; no need to init FFTW
@@ -908,6 +912,27 @@ int FINUFFT_PLAN_T<TF>::setpts(BIGINT nj, const TF *xj, const TF *yj, const TF *
908912

909913
if (type != 3) { // ------------------ TYPE 1,2 SETPTS -------------------
910914
// (all we can do is check and maybe bin-sort the NU pts)
915+
// If upsampfac is not locked by user (auto mode), choose or update it now
916+
// based on the actual density nj/N(). Re-plan if density changed significantly.
917+
if (!upsamp_locked) {
918+
double density = double(nj) / double(N());
919+
double upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, density, dim, type, tol);
920+
// Re-plan if this is the first call (upsampfac==0) or if upsampfac changed
921+
if (upsampfac != opts.upsampfac) {
922+
opts.upsampfac = upsampfac;
923+
if (opts.debug > 1)
924+
printf("[setpts] selected upsampfac=%.2f (density=%.3f, nj=%lld)\n",
925+
opts.upsampfac, density, (long long)nj);
926+
int code = setup_spreader_for_nufft(spopts, tol, opts, dim);
927+
if (code > 1) return code;
928+
precompute_horner_coeffs();
929+
930+
// Perform the planning steps (first call or re-plan due to density change).
931+
code = initSpreadAndFFT();
932+
if (code) return code;
933+
}
934+
}
935+
911936
XYZ = {xj, yj, zj}; // plan must keep pointers to user's fixed NU pts
912937
int ier = spreadcheck(nfdim[0], nfdim[1], nfdim[2], spopts);
913938
if (ier) // no warnings allowed here
@@ -935,6 +960,21 @@ int FINUFFT_PLAN_T<TF>::setpts(BIGINT nj, const TF *xj, const TF *yj, const TF *
935960
this->nk = nk; // user set # targ freq pts
936961
STU = {s, t, u};
937962

963+
// For type 3 with deferred upsampfac (not locked by user), pick and persist
964+
// an upsamp now using density=1.0 so that subsequent steps (set_nhg_type3 etc.)
965+
// have a concrete upsampfac to work with. This choice is persisted so inner
966+
// type-2 plans will inherit it.
967+
double upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, 1.0, dim, type, tol);
968+
if (!upsamp_locked && (upsampfac != opts.upsampfac)) {
969+
opts.upsampfac = upsampfac;
970+
if (opts.debug > 1)
971+
printf("[setpts t3] selected upsampfac=%.2f (density=1 used; persisted)\n",
972+
opts.upsampfac);
973+
int sier = setup_spreader_for_nufft(spopts, tol, opts, dim);
974+
if (sier > 1) return sier;
975+
precompute_horner_coeffs();
976+
}
977+
938978
// pick x, s intervals & shifts & # fine grid pts (nf) in each dim...
939979
std::array<TF, 3> S = {0, 0, 0};
940980
if (opts.debug) printf("\tM=%lld N=%lld\n", (long long)nj, (long long)nk);
@@ -1038,6 +1078,8 @@ int FINUFFT_PLAN_T<TF>::setpts(BIGINT nj, const TF *xj, const TF *yj, const TF *
10381078
t2opts.debug = std::max(0, opts.debug - 1); // don't print as much detail
10391079
t2opts.spread_debug = std::max(0, opts.spread_debug - 1);
10401080
t2opts.showwarn = 0; // so don't see warnings 2x
1081+
if (!upsamp_locked) t2opts.upsampfac = 0.0; // if the upsampfac was auto, let inner
1082+
// t2 pick it again (from density=nj/Nf)
10411083
// (...could vary other t2opts here?)
10421084
// MR: temporary hack, until we have figured out the C++ interface.
10431085
FINUFFT_PLAN_T<TF> *tmpplan;

0 commit comments

Comments
 (0)