Skip to content

Commit 9c29d30

Browse files
committed
Defer upsampfact to setpts
This allows leveraging density to choose a better value.
1 parent f26415c commit 9c29d30

File tree

10 files changed

+10
-459
lines changed

10 files changed

+10
-459
lines changed

CHANGELOG

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ Master (working towards v2.5.0), 11/25/25
3737
the public API and share a unified STL-based implementation for kersum,
3838
mass and relative-error checks. This common kernel header will also enable
3939
reuse by the GPU. (Barbone, Barnett, Lu, PR #748)
40+
* `makeplan` now defers upsampfact choice in `setpts` if upsampfact is set to auto.
41+
This allows the decision to account for the `density`.
4042

4143
V 2.4.1 7/8/25
4244

docs/opts.rst

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -199,15 +199,6 @@ for only two settings, as follows. Otherwise, setting it to zero chooses a good
199199
* ``upsampfac=1.25`` : low-upsampling option, with lower RAM, smaller FFTs, but wider spreading kernel. The latter can be much faster than the standard when the number of nonuniform points is similar or smaller to the number of modes, and/or if low accuracy is required. It is especially much (2 to 3 times) faster for type 3 transforms. However, the kernel widths :math:`w` are about 50% larger in each dimension, which can lead to slower spreading (it can also be faster due to the smaller size of the fine grid). Because the kernel width is limited to 16, currently, thus only 9-digit accuracy can currently be reached when using ``upsampfac=1.25``.
200200

201201

202-
**hint_nj**: Estimated number of nonuniform points available at plan time.
203-
If nonzero, ``makeplan`` uses this estimate to choose ``upsampfac``.
204-
Each ``setpts`` call recomputes the factor using the actual ``nj`` and
205-
re-initializes the plan if it changes. A value of ``0`` defers the choice
206-
until ``setpts``. If ``upsampfac`` is set explicitly, ``hint_nj`` is ignored.
207-
208-
209-
210-
211202
**spread_thread**: in the case of multiple transforms per call (``ntr>1``, or the "many" interfaces), controls how multithreading is used to spread/interpolate each batch of data.
212203

213204
* ``spread_thread=0`` : makes an automatic choice between the below. Recommended.

include/finufft.fh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ c alg performance opts...
1616
integer nthreads, fftw, spread_sort, spread_kerevalmeth
1717
integer spread_kerpad
1818
real*8 upsampfac
19-
integer*4 hint_nj
2019
integer spread_thread, maxbatchsize, spread_nthr_atomic
2120
integer spread_max_sp_size
2221
integer fftw_lock_fun, fftw_unlock_fun, fftw_lock_data

include/finufft/fft.h

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ template<> struct Finufft_FFT_plan<float> {
8080
for (auto i : dims) nf *= i;
8181
#ifdef _OPENMP
8282
lock();
83+
fftwf_plan_with_nthreads(nthreads);
84+
#endif
8385
// If a previous plan exists, destroy it before creating a new one.
8486
if (plan_) {
8587
fftwf_destroy_plan(plan_);
@@ -89,12 +91,6 @@ template<> struct Finufft_FFT_plan<float> {
8991
fftwf_destroy_plan(plan_adj_);
9092
plan_adj_ = nullptr;
9193
}
92-
#else
93-
lock();
94-
#endif
95-
#ifdef _OPENMP
96-
fftwf_plan_with_nthreads(nthreads);
97-
#endif
9894
plan_ = fftwf_plan_many_dft(int(dims.size()), dims.data(), int(batchSize),
9995
reinterpret_cast<fftwf_complex *>(ptr), nullptr, 1,
10096
int(nf), reinterpret_cast<fftwf_complex *>(ptr), nullptr,
@@ -169,7 +165,9 @@ template<> struct Finufft_FFT_plan<double> {
169165
for (auto i : dims) nf *= i;
170166
#ifdef _OPENMP
171167
lock();
172-
// If a previous plan exists, destroy it before creating a new one.
168+
fftw_plan_with_nthreads(nthreads);
169+
#endif
170+
// If a previous plan exists, destroy it before creating a new on
173171
if (plan_) {
174172
fftw_destroy_plan(plan_);
175173
plan_ = nullptr;
@@ -178,12 +176,6 @@ template<> struct Finufft_FFT_plan<double> {
178176
fftw_destroy_plan(plan_adj_);
179177
plan_adj_ = nullptr;
180178
}
181-
#else
182-
lock();
183-
#endif
184-
#ifdef _OPENMP
185-
fftw_plan_with_nthreads(nthreads);
186-
#endif
187179
plan_ = fftw_plan_many_dft(int(dims.size()), dims.data(), int(batchSize),
188180
reinterpret_cast<fftw_complex *>(ptr), nullptr, 1, int(nf),
189181
reinterpret_cast<fftw_complex *>(ptr), nullptr, 1, int(nf),

include/finufft_mod.f90

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ module finufft_mod
1717
integer(kind=C_INT) :: nthreads,fftw,spread_sort,spread_kerevalmeth
1818
integer(kind=C_INT) :: spread_kerpad
1919
real(kind=C_DOUBLE) :: upsampfac
20-
integer(kind=C_INT) :: hint_nj
2120
integer(kind=C_INT) :: spread_thread, maxbatchsize
2221
integer(kind=C_INT) :: spread_nthr_atomic, spread_max_sp_size
2322
integer(kind=C_SIZE_T) :: fftw_lock_fun, fftw_unlock_fun, fftw_lock_data

include/finufft_opts.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
#ifndef FINUFFT_OPTS_H
66
#define FINUFFT_OPTS_H
77

8-
#include <stddef.h>
9-
108
typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_opts_t()
119
// sphinx tag (don't remove): @opts_start
1210
// FINUFFT options:
@@ -28,7 +26,6 @@ typedef struct finufft_opts { // defaults see finufft_core.cpp:finufft_default_o
2826
int spread_kerevalmeth; // spreader: 0 exp(sqrt()), 1 Horner piecewise poly (faster)
2927
int spread_kerpad; // (exp(sqrt()) only): 0 don't pad kernel to 4n, 1 do
3028
double upsampfac; // upsampling ratio sigma: 2.0 std, 1.25 small FFT, 0.0 auto
31-
int hint_nj; // estimated nj at plan time; 0 means unknown
3229
int spread_thread; // (vectorized ntr>1 only): 0 auto, 1 seq multithreaded,
3330
// 2 parallel single-thread spread
3431
int maxbatchsize; // (vectorized ntr>1 only): max transform batch, 0 auto

python/finufft/finufft/_finufft.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from ctypes import c_int
1414
from ctypes import c_longlong
1515
from ctypes import c_void_p
16-
from ctypes import c_size_t
1716

1817
import numpy as np
1918
import os
@@ -83,7 +82,6 @@ class FinufftOpts(ctypes.Structure):
8382
('spread_kerevalmeth', c_int),
8483
('spread_kerpad', c_int),
8584
('upsampfac', c_double),
86-
('hint_nj', c_int),
8785
('spread_thread', c_int),
8886
('maxbatchsize', c_int),
8987
('spread_nthr_atomic', c_int),

src/finufft_core.cpp

Lines changed: 3 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,6 @@ void finufft_default_opts_t(finufft_opts *o)
588588
o->spread_kerevalmeth = 1;
589589
o->spread_kerpad = 1;
590590
o->upsampfac = 0.0;
591-
o->hint_nj = 0;
592591
o->spread_thread = 0;
593592
o->maxbatchsize = 0;
594593
o->spread_nthr_atomic = -1;
@@ -788,22 +787,9 @@ FINUFFT_PLAN_T<TF>::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i
788787

789788
if (tol < std::numeric_limits<TF>::epsilon()) ier = FINUFFT_WARN_EPS_TOO_SMALL;
790789

791-
// If user hasn't forced upsampfac and hint_nj > 0 (types 1,2 only), choose an
792-
// initial upsampfac now using the hint. The actual upsampfac may still change
793-
// at setpts if the real nj differs from the hint. If hint_nj == 0, defer
794-
// upsampfac selection to setpts. For type 3, hint is ignored (always defers).
795-
if (!upsamp_locked && type != 3 && opts.hint_nj > 0) {
796-
double density = double(opts.hint_nj) / double(N());
797-
opts.upsampfac = bestUpsamplingFactor<TF>(opts.nthreads, density, dim, type, tol);
798-
if (opts.debug > 1)
799-
printf("[%s] chosen upsampfac=%.2f from hint (density=%.3g)\n", __func__,
800-
opts.upsampfac, density);
801-
// Note: don't set upsamp_locked - the actual setpts may still adjust
802-
}
803-
804-
// If upsampfac is known (forced, or from hint for types 1,2), init spreader/FFT now.
805-
// If type 3 with upsampfac=0, or types 1,2 with hint_nj==0, defer to setpts.
806-
if (upsamp_locked || opts.upsampfac != 0) {
790+
// If user hasn't forced upsampfac, defer selection to setpts.
791+
if (upsamp_locked) {
792+
// If upsampfac is known (forced), init spreader/FFT now.
807793
int ier2 = init_spreader_and_fft();
808794
if (ier2 > 1) throw int(ier2);
809795
ier = std::max(ier, ier2);
@@ -812,45 +798,6 @@ FINUFFT_PLAN_T<TF>::FINUFFT_PLAN_T(int type_, int dim_, const BIGINT *n_modes, i
812798
if (ier > 1) throw int(ier); // report setup_spreader status (could be warning)
813799
}
814800

815-
template<typename TF> void FINUFFT_PLAN_T<TF>::precompute_horner_coeffs() {
816-
const auto nspread = spopts.nspread;
817-
const auto upsampfac = opts.upsampfac;
818-
const auto max_degree =
819-
upsampfac == 2.00 ? horner_nc_200(nspread) : horner_nc_125(nspread);
820-
// get the xsimd padding
821-
static constexpr auto simd_size = xsimd::batch<TF>::size;
822-
const auto padded_ns = (nspread + simd_size - 1) & -simd_size;
823-
// pad degree if desired; keep equal to max_degree for now
824-
const auto padded_degree = max_degree;
825-
horner_coeffs.fill(0);
826-
827-
for (int j = 0; j < nspread; ++j) {
828-
const auto kernel = [&](const TF x) {
829-
// scale manually from -1, 1 to -w/2+j to -w/2+j+1
830-
return evaluate_kernel(TF(0.5) * TF(x - nspread + 2 * j + 1),
831-
TF(this->spopts.ES_beta), TF(this->spopts.ES_c));
832-
};
833-
// disable polyfit scaling
834-
static constexpr TF a = -1.0;
835-
static constexpr TF b = 1.0;
836-
const auto coeffs = fit_monomials(kernel, max_degree, a, b);
837-
// store transposed: for each degree k
838-
for (size_t k = 0; k < coeffs.size(); ++k)
839-
horner_coeffs[k * padded_ns + j] = coeffs[k];
840-
}
841-
842-
if (opts.debug > 2) {
843-
// Print transposed layout: all degree 0 coeffs for intervals, then degree 1, ...
844-
for (size_t k = 0; k < padded_degree; ++k) {
845-
printf("[%s] degree=%lu: ", __func__, k);
846-
for (size_t j = 0; j < padded_ns; ++j) // use padded_ns to show padding as well
847-
printf("%g ", horner_coeffs[k * padded_ns + j]);
848-
printf("\n");
849-
}
850-
}
851-
}
852-
853-
// MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM
854801
template<typename TF>
855802
int finufft_makeplan_t(int type, int dim, const BIGINT *n_modes, int iflag, int ntrans,
856803
TF tol, FINUFFT_PLAN_T<TF> **pp, const finufft_opts *opts)

test/CMakeLists.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ set(TESTS
1111
finufft3dkernel_test
1212
spreadinterp1d_test
1313
adjointness
14-
hint_nj_test
1514
)
1615

1716
foreach(TEST ${TESTS})
@@ -99,8 +98,6 @@ function(add_tests_with_prec PREC REQ_TOL CHECK_TOL SUFFIX)
9998
)
10099

101100
add_test(NAME run_adjointness_${PREC} COMMAND adjointness${SUFFIX} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
102-
103-
add_test(NAME run_hint_nj_${PREC} COMMAND hint_nj_test${SUFFIX} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
104101
endfunction()
105102

106103
# use above function to actually add the tests, with certain requested and check

0 commit comments

Comments
 (0)