Skip to content

Commit

Permalink
Add first pass at support for heterogeneous systems
Browse files Browse the repository at this point in the history
Reference #46 for some background and usage instructions.
  • Loading branch information
maherou committed Sep 21, 2016
1 parent 2113157 commit aa4b5ac
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 27 deletions.
9 changes: 8 additions & 1 deletion src/GenerateCoarseProblem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,14 @@ void GenerateCoarseProblem(const SparseMatrix & Af) {

// Construct the geometry and linear system
Geometry * geomc = new Geometry;
GenerateGeometry(Af.geom->size, Af.geom->rank, Af.geom->numThreads, nxc, nyc, nzc, geomc);
local_int_t zlc = 0; // Coarsen nz for the lower block in the z processor dimension
local_int_t zuc = 0; // Coarsen nz for the upper block in the z processor dimension
int pz = Af.geom->pz;
if (pz>0) {
zlc = Af.geom->partz_nz[0]/2; // Coarsen nz for the lower block in the z processor dimension
zuc = Af.geom->partz_nz[1]/2; // Coarsen nz for the upper block in the z processor dimension
}
GenerateGeometry(Af.geom->size, Af.geom->rank, Af.geom->numThreads, Af.geom->pz, zlc, zuc, nxc, nyc, nzc, geomc);

SparseMatrix * Ac = new SparseMatrix;
InitializeSparseMatrix(*Ac, geomc);
Expand Down
71 changes: 64 additions & 7 deletions src/GenerateGeometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <cmath>
#include <cstdlib>
#include <cassert>

#include "ComputeOptimalShapeXYZ.hpp"
#include "GenerateGeometry.hpp"
Expand All @@ -29,7 +30,6 @@
#include "hpcg.hpp"
using std::endl;

#include <cassert>
#endif

/*!
Expand All @@ -42,14 +42,40 @@ using std::endl;
@param[in] size total number of MPI processes
@param[in] rank this process' rank among other MPI processes
@param[in] numThreads number of OpenMP threads in this process
@param[in] pz z-dimension processor ID where second zone of nz values start
@param[in] nx, ny, nz number of grid points for each local block in the x, y, and z dimensions, respectively
@param[out] geom data structure that will store the above parameters and the factoring of total number of processes into three dimensions
*/
void GenerateGeometry(int size, int rank, int numThreads, int nx, int ny, int nz, Geometry * geom) {
void GenerateGeometry(int size, int rank, int numThreads, int pz, local_int_t zl, local_int_t zu, local_int_t nx, local_int_t ny, local_int_t nz, Geometry * geom) {

int npx, npy, npz;

ComputeOptimalShapeXYZ( size, npx, npy, npz );
int * partz_ids = 0;
local_int_t * partz_nz = 0;
int npartz = 0;
if (pz==0) { // No variation in nz sizes
npartz = 1;
partz_ids = new int[1];
partz_nz = new local_int_t[1];
partz_ids[0] = npz;
partz_nz[0] = nz;
}
else {
npartz = 2;
partz_ids = new int[2];
partz_ids[0] = pz;
partz_ids[1] = npz;
partz_nz = new local_int_t[2];
partz_nz[0] = zl;
partz_nz[1] = zu;
}
partz_ids[npartz-1] = npz; // The last element of this array is always npz
int ipartz_ids = 0;
for (int i=0; i< npartz; ++i) {
assert(ipartz_ids<partz_ids[i]); // Make sure that z partitioning is consistent with computed npz value
ipartz_ids = partz_ids[i];
}

// Now compute this process's indices in the 3D cube
int ipz = rank/(npx*npy);
Expand All @@ -59,9 +85,9 @@ void GenerateGeometry(int size, int rank, int numThreads, int nx, int ny, int nz
#ifdef HPCG_DEBUG
if (rank==0)
HPCG_fout << "size = "<< size << endl
<< "nx = " << nx << endl
<< "ny = " << ny << endl
<< "nz = " << nz << endl
<< "nx = " << params.nx << endl
<< "ny = " << params.ny << endl
<< "nz = " << params.nz << endl
<< "npx = " << npx << endl
<< "npy = " << npy << endl
<< "npz = " << npz << endl;
Expand All @@ -82,6 +108,10 @@ void GenerateGeometry(int size, int rank, int numThreads, int nx, int ny, int nz
geom->npx = npx;
geom->npy = npy;
geom->npz = npz;
geom->pz = pz;
geom->npartz = npartz;
geom->partz_ids = partz_ids;
geom->partz_nz = partz_nz;
geom->ipx = ipx;
geom->ipy = ipy;
geom->ipz = ipz;
Expand All @@ -90,10 +120,37 @@ void GenerateGeometry(int size, int rank, int numThreads, int nx, int ny, int nz
// due to variable local grid sizes
global_int_t gnx = npx*nx;
global_int_t gny = npy*ny;
global_int_t gnz = npz*nz;
//global_int_t gnz = npz*nz;
// We now permit varying values for nz for any nx-by-ny plane of MPI processes.
// npartz is the number of different groups of nx-by-ny groups of processes.
// partz_ids is an array of length npartz where each value indicates the z process of the last process in the ith nx-by-ny group.
// partz_nz is an array of length npartz containing the value of nz for the ith group.

// With no variation, npartz = 1, partz_ids[0] = npz, partz_nz[0] = nz

global_int_t gnz = 0;
ipartz_ids = 0;

for (int i=0; i< npartz; ++i) {
ipartz_ids = partz_ids[i] - ipartz_ids;
gnz += partz_nz[i]*ipartz_ids;
}
//global_int_t giz0 = ipz*nz;
global_int_t giz0 = 0;
ipartz_ids = 0;
for (int i=0; i< npartz; ++i) {
int ipart_nz = partz_nz[i];
if (ipz < partz_ids[i]) {
giz0 += (ipz-ipartz_ids)*ipart_nz;
break;
} else {
giz0 += ipartz_ids*ipart_nz;
}
ipartz_ids = partz_ids[i] - ipartz_ids;

}
global_int_t gix0 = ipx*nx;
global_int_t giy0 = ipy*ny;
global_int_t giz0 = ipz*nz;

// Keep these values for later
geom->gnx = gnx;
Expand Down
2 changes: 1 addition & 1 deletion src/GenerateGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
#ifndef GENERATEGEOMETRY_HPP
#define GENERATEGEOMETRY_HPP
#include "Geometry.hpp"
void GenerateGeometry(int size, int rank, int numThreads, int nx, int ny, int nz, Geometry * geom);
void GenerateGeometry(int size, int rank, int numThreads, int pz, local_int_t zl, local_int_t zu, local_int_t nx, local_int_t ny, local_int_t nz, Geometry * geom);
#endif // GENERATEGEOMETRY_HPP
54 changes: 43 additions & 11 deletions src/Geometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,16 @@ struct Geometry_STRUCT {
int size; //!< Number of MPI processes
int rank; //!< This process' rank in the range [0 to size - 1]
int numThreads; //!< This process' number of threads
int nx; //!< Number of x-direction grid points for each local subdomain TODO: The type for nx, ny and nz should be local_int_t
int ny; //!< Number of y-direction grid points for each local subdomain
int nz; //!< Number of z-direction grid points for each local subdomain
local_int_t nx; //!< Number of x-direction grid points for each local subdomain
local_int_t ny; //!< Number of y-direction grid points for each local subdomain
local_int_t nz; //!< Number of z-direction grid points for each local subdomain
int npx; //!< Number of processors in x-direction
int npy; //!< Number of processors in y-direction
int npz; //!< Number of processors in z-direction
global_int_t npartz; //!< Number of partitions with varying nz values
global_int_t * partz_ids; //!< Array of partition ids of processor in z-direction where new value of nz starts (valid values are 0 to npz-1)
global_int_t * partz_nz; //!< Array of nz values for each partition
int pz; //!< partition ID of z-dimension process that starts the second region of nz values
int npartz; //!< Number of partitions with varying nz values
int * partz_ids; //!< Array of partition ids of processor in z-direction where new value of nz starts (valid values are 0 to npz-1)
local_int_t * partz_nz; //!< Array of length npartz containing the nz values for each partition
int ipx; //!< Current rank's x location in the npx by npy by npz processor grid
int ipy; //!< Current rank's y location in the npx by npy by npz processor grid
int ipz; //!< Current rank's z location in the npx by npy by npz processor grid
Expand Down Expand Up @@ -86,17 +87,48 @@ inline int ComputeRankOfMatrixRow(const Geometry & geom, global_int_t index) {
global_int_t iz = index/(gny*gnx);
global_int_t iy = (index-iz*gny*gnx)/gnx;
global_int_t ix = index%gnx;
global_int_t ipartz = 0;
int inz = geom.partz_nz[0];
// We now permit varying values for nz for any nx-by-ny plane of MPI processes.
// npartz is the number of different groups of nx-by-ny groups of processes.
// partz_ids is an array of length npartz where each value indicates the z process of the last process in the ith nx-by-ny group.
// partz_nz is an array of length npartz containing the value of nz for the ith group.

// With no variation, npartz = 1, partz_ids[0] = npz-1, partz_nz[0] = nz

int ipz = 0;
int ipartz_ids = 0;
for (int i=0; i< geom.npartz; ++i) {
int ipart_nz = geom.partz_nz[i];
ipartz_ids = geom.partz_ids[i] - ipartz_ids;
if (iz<= ipart_nz*ipartz_ids) {
ipz += iz/ipart_nz;
break;
} else {
ipz += ipartz_ids;
iz -= ipart_nz*ipartz_ids;
}

}
global_int_t ipz = iz/geom.nz;
global_int_t ipy = iy/geom.ny;
global_int_t ipx = ix/geom.nx;
// global_int_t ipz = iz/geom.nz;
int ipy = iy/geom.ny;
int ipx = ix/geom.nx;
int rank = ipx+ipy*geom.npx+ipz*geom.npy*geom.npx;
return rank;
}


/*!
Destructor for geometry data.
@param[inout] data the geometry data structure whose storage is deallocated
*/
inline void DeleteGeometry(Geometry & geom) {

delete [] geom.partz_nz;
delete [] geom.partz_ids;

return;
}



#endif // GEOMETRY_HPP
2 changes: 1 addition & 1 deletion src/SparseMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ inline void DeleteMatrix(SparseMatrix & A) {
if (A.sendBuffer) delete [] A.sendBuffer;
#endif

if (A.geom!=0) { delete A.geom; A.geom = 0;}
if (A.geom!=0) { DeleteGeometry(*A.geom); delete A.geom; A.geom = 0;}
if (A.Ac!=0) { DeleteMatrix(*A.Ac); delete A.Ac; A.Ac = 0;} // Delete coarse matrix
if (A.mgData!=0) { DeleteMGData(*A.mgData); delete A.mgData; A.mgData = 0;} // Delete MG data
return;
Expand Down
10 changes: 7 additions & 3 deletions src/hpcg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,21 @@
#define HPCG_HPP

#include <fstream>
#include "Geometry.hpp"

extern std::ofstream HPCG_fout;

struct HPCG_Params_STRUCT {
int comm_size; //!< Number of MPI processes in MPI_COMM_WORLD
int comm_rank; //!< This process' MPI rank in the range [0 to comm_size - 1]
int numThreads; //!< This process' number of threads
int nx; //!< Number of x-direction grid points for each local subdomain
int ny; //!< Number of y-direction grid points for each local subdomain
int nz; //!< Number of z-direction grid points for each local subdomain
local_int_t nx; //!< Number of x-direction grid points for each local subdomain
local_int_t ny; //!< Number of y-direction grid points for each local subdomain
local_int_t nz; //!< Number of z-direction grid points for each local subdomain
int runningTime; //!< Number of seconds to run the timed portion of the benchmark
int pz; //!< Partition in the z processor dimension, default is npz
local_int_t zl; //!< nz for processors in the z dimension with value less than pz
local_int_t zu; //!< nz for processors in the z dimension with value greater than pz
};
/*!
HPCG_Params is a shorthand for HPCG_Params_STRUCT
Expand Down
11 changes: 9 additions & 2 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,11 @@ HPCG_Init(int * argc_p, char ** *argv_p, HPCG_Params & params) {
char ** argv = *argv_p;
char fname[80];
int i, j, *iparams;
char cparams[][6] = {"--nx=", "--ny=", "--nz=", "--rt="};
char cparams[][6] = {"--nx=", "--ny=", "--nz=", "--rt=", "--pz=", "--zl=", "--zu="};
time_t rawtime;
tm * ptm;
const int nparams = (sizeof cparams) / (sizeof cparams[0]);
bool broadcastParams = false; // Make true if parameters read from file.

iparams = (int *)malloc(sizeof(int) * nparams);

Expand All @@ -95,6 +96,7 @@ HPCG_Init(int * argc_p, char ** *argv_p, HPCG_Params & params) {
if (! iparams[3]) rt = 0; // If --rt was specified, we already have the runtime, so don't read it from file
if (! iparams[0] && ! iparams[1] && ! iparams[2]) { /* no geometry arguments on the command line */
ReadHpcgDat(iparams, rt);
broadcastParams = true;
}

// Check for small or unspecified nx, ny, nz values
Expand All @@ -110,14 +112,19 @@ HPCG_Init(int * argc_p, char ** *argv_p, HPCG_Params & params) {

// Broadcast values of iparams to all MPI processes
#ifndef HPCG_NO_MPI
MPI_Bcast( iparams, nparams, MPI_INT, 0, MPI_COMM_WORLD );
if (broadcastParams) {
MPI_Bcast( iparams, nparams, MPI_INT, 0, MPI_COMM_WORLD );
}
#endif

params.nx = iparams[0];
params.ny = iparams[1];
params.nz = iparams[2];

params.runningTime = iparams[3];
params.pz = iparams[4];
params.zl = iparams[5];
params.zu = iparams[6];

#ifndef HPCG_NO_MPI
MPI_Comm_rank( MPI_COMM_WORLD, &params.comm_rank );
Expand Down
2 changes: 1 addition & 1 deletion src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ int main(int argc, char * argv[]) {

// Construct the geometry and linear system
Geometry * geom = new Geometry;
GenerateGeometry(size, rank, params.numThreads, nx, ny, nz, geom);
GenerateGeometry(size, rank, params.numThreads, params.pz, params.zl, params.zu, nx, ny, nz, geom);

ierr = CheckAspectRatio(0.125, geom->npx, geom->npy, geom->npz, "process grid", rank==0);
if (ierr)
Expand Down

0 comments on commit aa4b5ac

Please sign in to comment.