Skip to content

Commit

Permalink
Using vectors in spreader
Browse files Browse the repository at this point in the history
  • Loading branch information
Marco Barbone committed May 2, 2024
1 parent 108c5c5 commit 5bd82ad
Showing 1 changed file with 38 additions and 40 deletions.
78 changes: 38 additions & 40 deletions src/spreadinterp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,21 @@ void interp_line(FLT *out,FLT *du, FLT *ker,BIGINT i1,BIGINT N1,int ns);
void interp_square(FLT *out,FLT *du, FLT *ker1, FLT *ker2, BIGINT i1,BIGINT i2,BIGINT N1,BIGINT N2,int ns);
void interp_cube(FLT *out,FLT *du, FLT *ker1, FLT *ker2, FLT *ker3,
BIGINT i1,BIGINT i2,BIGINT i3,BIGINT N1,BIGINT N2,BIGINT N3,int ns);
void spread_subproblem_1d(BIGINT off1, BIGINT size1,FLT *du0,BIGINT M0,FLT *kx0,
FLT *dd0,const finufft_spread_opts& opts);
void spread_subproblem_1d(BIGINT off1, BIGINT size1,std::vector<FLT>& du0,BIGINT M0,std::vector<FLT>& kx0,
std::vector<FLT>& dd0,const finufft_spread_opts& opts);
void spread_subproblem_2d(BIGINT off1, BIGINT off2, BIGINT size1,BIGINT size2,
FLT *du0,BIGINT M0,
FLT *kx0,FLT *ky0,FLT *dd0,const finufft_spread_opts& opts);
std::vector<FLT>& du0,BIGINT M0,
std::vector<FLT>& kx0,std::vector<FLT>& ky0,std::vector<FLT>& dd0,const finufft_spread_opts& opts);
void spread_subproblem_3d(BIGINT off1,BIGINT off2, BIGINT off3, BIGINT size1,
BIGINT size2,BIGINT size3,FLT *du0,BIGINT M0,
FLT *kx0,FLT *ky0,FLT *kz0,FLT *dd0,
BIGINT size2,BIGINT size3,std::vector<FLT>& du0,BIGINT M0,
std::vector<FLT>& kx0,std::vector<FLT>& ky0,std::vector<FLT>& kz0, std::vector<FLT>& dd0,
const finufft_spread_opts& opts);
void add_wrapped_subgrid(BIGINT offset1,BIGINT offset2,BIGINT offset3,
BIGINT size1,BIGINT size2,BIGINT size3,BIGINT N1,
BIGINT N2,BIGINT N3,FLT *data_uniform, FLT *du0);
BIGINT N2,BIGINT N3,FLT *data_uniform, std::vector<FLT>& du0);
void add_wrapped_subgrid_thread_safe(BIGINT offset1,BIGINT offset2,BIGINT offset3,
BIGINT size1,BIGINT size2,BIGINT size3,BIGINT N1,
BIGINT N2,BIGINT N3,FLT *data_uniform, FLT *du0);
BIGINT N2,BIGINT N3,FLT *data_uniform, std::vector<FLT>& du0);
void bin_sort_singlethread(BIGINT *ret, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
BIGINT N1,BIGINT N2,BIGINT N3,int pirange,
double bin_size_x,double bin_size_y,double bin_size_z, int debug);
Expand All @@ -64,8 +64,8 @@ void bin_sort_multithread(BIGINT *ret, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
double bin_size_x,double bin_size_y,double bin_size_z, int debug,
int nthr);
void get_subgrid(BIGINT &offset1,BIGINT &offset2,BIGINT &offset3,BIGINT &size1,
BIGINT &size2,BIGINT &size3,BIGINT M0,FLT* kx0,FLT* ky0,
FLT* kz0,int ns, int ndims);
BIGINT &size2,BIGINT &size3,BIGINT M0,std::vector<FLT>& kx0,std::vector<FLT>& ky0,
std::vector<FLT>& kz0,int ns, int ndims);



Expand Down Expand Up @@ -368,19 +368,24 @@ int spreadSorted(BIGINT* sort_indices,BIGINT N1, BIGINT N2, BIGINT N3,
printf("\tnthr big: switching add_wrapped OMP from critical to atomic (!)\n");

std::vector<BIGINT> brk(nb+1); // NU index breakpoints defining nb subproblems
//FIXME: Clang-Tidy: Casting (double + 0.5) to integer leads to incorrect rounding; consider using lround (#include <cmath>) instead
for (int p=0;p<=nb;++p)
brk[p] = (BIGINT)(0.5 + M*p/(double)nb);

#pragma omp parallel for num_threads(nthr) schedule(dynamic,1) // each is big
std::vector<FLT> kx0, ky0, kz0, dd0, du0; // subproblem data

#pragma omp parallel for num_threads(nthr) schedule(dynamic,1) firstprivate(kx0, ky0, kz0, dd0, du0) // each is big
for (int isub=0; isub<nb; isub++) { // Main loop through the subproblems
BIGINT M0 = brk[isub+1]-brk[isub]; // # NU pts in this subproblem
// copy the location and data vectors for the nonuniform points
FLT *kx0=(FLT*)malloc(sizeof(FLT)*M0), *ky0=NULL, *kz0=NULL;
if (N2>1)
ky0=(FLT*)malloc(sizeof(FLT)*M0);
if (N3>1)
kz0=(FLT*)malloc(sizeof(FLT)*M0);
FLT *dd0=(FLT*)malloc(sizeof(FLT)*M0*2); // complex strength data
kx0.resize(M0);
if (N2>1) {
ky0.resize(M0);
}
if (N3>1) {
kz0.resize(M0);
}
dd0.resize(2*M0); // complex strength data
for (BIGINT j=0; j<M0; j++) { // todo: can avoid this copying?
BIGINT kk=sort_indices[j+brk[isub]]; // NU pt from subprob index list
kx0[j]=FOLDRESCALE(kx[kk],N1,opts.pirange);
Expand All @@ -401,8 +406,7 @@ int spreadSorted(BIGINT* sort_indices,BIGINT N1, BIGINT N2, BIGINT N3,
printf("\tsubgrid: off %lld,%lld,%lld\t siz %lld,%lld,%lld\t #NU %lld\n",(long long)offset1,(long long)offset2,(long long)offset3,(long long)size1,(long long)size2,(long long)size3,(long long)M0);
}
// allocate output data for this subgrid
FLT *du0=(FLT*)malloc(sizeof(FLT)*2*size1*size2*size3); // complex

du0.resize(2*size1*size2*size3); // complex
// Spread to subgrid without need for bounds checking or wrapping
if (!(opts.flags & TF_OMIT_SPREADING)) {
if (ndims==1)
Expand All @@ -423,12 +427,6 @@ int spreadSorted(BIGINT* sort_indices,BIGINT N1, BIGINT N2, BIGINT N3,
}
}

// free up stuff from this subprob... (that was malloc'ed by hand)
free(dd0);
free(du0);
free(kx0);
if (N2>1) free(ky0);
if (N3>1) free(kz0);
} // end main loop over subprobs
if (opts.debug) printf("\tt1 fancy spread: \t%.3g s (%d subprobs)\n",timer.elapsedsec(), nb);
} // end of choice of which t1 spread type to use
Expand Down Expand Up @@ -938,8 +936,8 @@ void interp_cube(FLT *target,FLT *du, FLT *ker1, FLT *ker2, FLT *ker3,
target[1] = out[1];
}

void spread_subproblem_1d(BIGINT off1, BIGINT size1,FLT *du,BIGINT M,
FLT *kx,FLT *dd, const finufft_spread_opts& opts)
void spread_subproblem_1d(BIGINT off1, BIGINT size1, std::vector<FLT>& du,BIGINT M,
std::vector<FLT>& kx, std::vector<FLT>& dd, const finufft_spread_opts& opts)
/* 1D spreader from nonuniform to uniform subproblem grid, without wrapping.
Inputs:
off1 - integer offset of left end of du subgrid from that of overall fine
Expand Down Expand Up @@ -994,7 +992,7 @@ void spread_subproblem_1d(BIGINT off1, BIGINT size1,FLT *du,BIGINT M,
}

void spread_subproblem_2d(BIGINT off1,BIGINT off2,BIGINT size1,BIGINT size2,
FLT *du,BIGINT M, FLT *kx,FLT *ky,FLT *dd,
std::vector<FLT>& du,BIGINT M,std::vector<FLT>&kx,std::vector<FLT>& ky,std::vector<FLT>& dd,
const finufft_spread_opts& opts)
/* spreader from dd (NU) to du (uniform) in 2D without wrapping.
See above docs/notes for spread_subproblem_2d.
Expand Down Expand Up @@ -1039,7 +1037,7 @@ void spread_subproblem_2d(BIGINT off1,BIGINT off2,BIGINT size1,BIGINT size2,
for (int dy=0; dy<ns; ++dy) {
BIGINT j = size1*(i2-off2+dy) + i1-off1; // should be in subgrid
FLT kerval = ker2[dy];
FLT *trg = du+2*j;
FLT *trg = du.data()+2*j;
for (int dx=0; dx<2*ns; ++dx) {
trg[dx] += kerval*ker1val[dx];
}
Expand All @@ -1048,8 +1046,8 @@ void spread_subproblem_2d(BIGINT off1,BIGINT off2,BIGINT size1,BIGINT size2,
}

void spread_subproblem_3d(BIGINT off1,BIGINT off2,BIGINT off3,BIGINT size1,
BIGINT size2,BIGINT size3,FLT *du,BIGINT M,
FLT *kx,FLT *ky,FLT *kz,FLT *dd,
BIGINT size2,BIGINT size3,std::vector<FLT>& du,BIGINT M,
std::vector<FLT>& kx, std::vector<FLT>& ky,std::vector<FLT>& kz, std::vector<FLT>&dd,
const finufft_spread_opts& opts)
/* spreader from dd (NU) to du (uniform) in 3D without wrapping.
See above docs/notes for spread_subproblem_2d.
Expand Down Expand Up @@ -1101,7 +1099,7 @@ void spread_subproblem_3d(BIGINT off1,BIGINT off2,BIGINT off3,BIGINT size1,
for (int dy=0; dy<ns; ++dy) {
BIGINT j = oz + size1*(i2-off2+dy) + i1-off1; // should be in subgrid
FLT kerval = ker2[dy]*ker3[dz];
FLT *trg = du+2*j;
FLT *trg = du.data()+2*j;
for (int dx=0; dx<2*ns; ++dx) {
trg[dx] += kerval*ker1val[dx];
}
Expand All @@ -1112,7 +1110,7 @@ void spread_subproblem_3d(BIGINT off1,BIGINT off2,BIGINT off3,BIGINT size1,

void add_wrapped_subgrid(BIGINT offset1,BIGINT offset2,BIGINT offset3,
BIGINT size1,BIGINT size2,BIGINT size3,BIGINT N1,
BIGINT N2,BIGINT N3,FLT *data_uniform, FLT *du0)
BIGINT N2,BIGINT N3,FLT *data_uniform, std::vector<FLT>& du0)
/* Add a large subgrid (du0) to output grid (data_uniform),
with periodic wrapping to N1,N2,N3 box.
offset1,2,3 give the offset of the subgrid from the lowest corner of output.
Expand Down Expand Up @@ -1141,7 +1139,7 @@ void add_wrapped_subgrid(BIGINT offset1,BIGINT offset2,BIGINT offset3,
for (int dy=0; dy<size2; dy++) {
BIGINT oy = oz + N1*o2[dy]; // off due to y & z (0 in 1D)
FLT *out = data_uniform + 2*oy;
FLT *in = du0 + 2*size1*(dy + size2*dz); // ptr to subgrid array
FLT *in = du0.data() + 2*size1*(dy + size2*dz); // ptr to subgrid array
BIGINT o = 2*(offset1+N1); // 1d offset for output
for (int j=0; j<2*nlo; j++) // j is really dx/2 (since re,im parts)
out[j+o] += in[j];
Expand All @@ -1157,7 +1155,7 @@ void add_wrapped_subgrid(BIGINT offset1,BIGINT offset2,BIGINT offset3,

void add_wrapped_subgrid_thread_safe(BIGINT offset1,BIGINT offset2,BIGINT offset3,
BIGINT size1,BIGINT size2,BIGINT size3,BIGINT N1,
BIGINT N2,BIGINT N3,FLT *data_uniform, FLT *du0)
BIGINT N2,BIGINT N3,FLT *data_uniform, std::vector<FLT>& du0)
/* Add a large subgrid (du0) to output grid (data_uniform),
with periodic wrapping to N1,N2,N3 box.
offset1,2,3 give the offset of the subgrid from the lowest corner of output.
Expand Down Expand Up @@ -1186,7 +1184,7 @@ void add_wrapped_subgrid_thread_safe(BIGINT offset1,BIGINT offset2,BIGINT offset
for (int dy=0; dy<size2; dy++) {
BIGINT oy = oz + N1*o2[dy]; // off due to y & z (0 in 1D)
FLT *out = data_uniform + 2*oy;
FLT *in = du0 + 2*size1*(dy + size2*dz); // ptr to subgrid array
FLT *in = du0.data() + 2*size1*(dy + size2*dz); // ptr to subgrid array
BIGINT o = 2*(offset1+N1); // 1d offset for output
for (int j=0; j<2*nlo; j++) { // j is really dx/2 (since re,im parts)
#pragma omp atomic
Expand Down Expand Up @@ -1349,7 +1347,7 @@ void bin_sort_multithread(BIGINT *ret, BIGINT M, FLT *kx, FLT *ky, FLT *kz,
}


void get_subgrid(BIGINT &offset1,BIGINT &offset2,BIGINT &offset3,BIGINT &size1,BIGINT &size2,BIGINT &size3,BIGINT M,FLT* kx,FLT* ky,FLT* kz,int ns,int ndims)
void get_subgrid(BIGINT &offset1,BIGINT &offset2,BIGINT &offset3,BIGINT &size1,BIGINT &size2,BIGINT &size3,BIGINT M,std::vector<FLT>& kx,std::vector<FLT>& ky,std::vector<FLT>& kz,int ns,int ndims)
/* Writes out the integer offsets and sizes of a "subgrid" (cuboid subset of
Z^ndims) large enough to enclose all of the nonuniform points with
(non-periodic) padding of half the kernel width ns to each side in
Expand Down Expand Up @@ -1395,12 +1393,12 @@ void get_subgrid(BIGINT &offset1,BIGINT &offset2,BIGINT &offset3,BIGINT &size1,B
{
FLT ns2 = (FLT)ns/2;
FLT min_kx,max_kx; // 1st (x) dimension: get min/max of nonuniform points
arrayrange(M,kx,&min_kx,&max_kx);
arrayrange(M,kx.data(),&min_kx,&max_kx);
offset1 = (BIGINT)std::ceil(min_kx-ns2); // min index touched by kernel
size1 = (BIGINT)std::ceil(max_kx-ns2) - offset1 + ns; // int(ceil) first!
if (ndims>1) {
FLT min_ky,max_ky; // 2nd (y) dimension: get min/max of nonuniform points
arrayrange(M,ky,&min_ky,&max_ky);
arrayrange(M,ky.data(),&min_ky,&max_ky);
offset2 = (BIGINT)std::ceil(min_ky-ns2);
size2 = (BIGINT)std::ceil(max_ky-ns2) - offset2 + ns;
} else {
Expand All @@ -1409,7 +1407,7 @@ void get_subgrid(BIGINT &offset1,BIGINT &offset2,BIGINT &offset3,BIGINT &size1,B
}
if (ndims>2) {
FLT min_kz,max_kz; // 3rd (z) dimension: get min/max of nonuniform points
arrayrange(M,kz,&min_kz,&max_kz);
arrayrange(M,kz.data(),&min_kz,&max_kz);
offset3 = (BIGINT)std::ceil(min_kz-ns2);
size3 = (BIGINT)std::ceil(max_kz-ns2) - offset3 + ns;
} else {
Expand Down

0 comments on commit 5bd82ad

Please sign in to comment.