Skip to content

Commit

Permalink
Import latest changes (#183)
Browse files Browse the repository at this point in the history
* use correct glo_num numbering for cht
* call udf_setup before ellipticSetup
* fix openCL long long issues
* reorganize source files
* rename ins to nrs
* rename boundary functions
* add variable dt support (no automatic adjustment yet)
* fix wrong scalarNeumann BC calls
* add par-option stopAt = elapsedTime
  • Loading branch information
stgeke authored Dec 10, 2020
1 parent 636fe62 commit 9c31c62
Show file tree
Hide file tree
Showing 493 changed files with 8,091 additions and 40,347 deletions.
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
build/
*.so
*.o
CMakeFiles
*.tgz
45 changes: 45 additions & 0 deletions 3rd_party/gslib/ogs/include/ogstypes.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#if !defined(ogstypes_h)
#define ogstypes_h

//float data type
#if 0
#define DFLOAT_SINGLE
#define dfloat float
#define MPI_DFLOAT MPI_FLOAT
#define dfloatFormat "%f"
#define dfloatString "float"
#else
#define DFLOAT_DOUBLE
#define dfloat double
#define MPI_DFLOAT MPI_DOUBLE
#define dfloatFormat "%lf"
#define dfloatString "double"
#endif

//host index data type
#if 0
#define hlong int
#define MPI_HLONG MPI_INT
#define hlongFormat "%d"
#define hlongString "int"
#else
#define hlong long long int
#define MPI_HLONG MPI_LONG_LONG_INT
#define hlongFormat "%lld"
#define hlongString "long long int"
#endif

//device index data type
#if 1
#define dlong int
#define MPI_DLONG MPI_INT
#define dlongFormat "%d"
#define dlongString "int"
#else
#define dlong long long int
#define MPI_DLONG MPI_LONG_LONG_INT
#define dlongFormat "%lld"
#define dlongString "long long int"
#endif

#endif
67 changes: 33 additions & 34 deletions 3rd_party/gslib/ogs/ogs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ SOFTWARE.
The code
dlong N;
hlong id[N]; // the hlong and dlong types are defined in "types.h"
int N;
long long int id[N]; // the long long int and int types are defined in "types.h"
int haloFlag[N];
...
struct ogs_t *ogs = ogsSetup(N, id, &comm, verbose);
Expand Down Expand Up @@ -118,7 +118,6 @@ SOFTWARE.
#include <occa.hpp>

#include "mpi.h"
#include "types.h"

#define ogsFloat "float"
#define ogsDouble "double"
Expand All @@ -127,7 +126,7 @@ SOFTWARE.
#define ogsFloatCommHalf "floatCommHalf"
#define ogsInt "int"
#define ogsLong "long long int"
#define ogsDlong dlongString
#define ogsDlong intString
#define ogsHlong hlongString

#define ogsAdd "add"
Expand All @@ -141,21 +140,21 @@ typedef struct {
MPI_Comm comm;
occa::device device;

dlong N;
dlong Ngather; // total number of gather nodes
dlong Nlocal; // number of local nodes
dlong NlocalGather; // number of local gathered nodes
dlong Nhalo; // number of halo nodes
dlong NhaloGather; // number of gathered nodes on halo
dlong NownedHalo; // number of owned halo nodes
int N;
int Ngather; // total number of gather nodes
int Nlocal; // number of local nodes
int NlocalGather; // number of local gathered nodes
int Nhalo; // number of halo nodes
int NhaloGather; // number of gathered nodes on halo
int NownedHalo; // number of owned halo nodes

dlong *localGatherOffsets;
dlong *localGatherIds;
int *localGatherOffsets;
int *localGatherIds;
occa::memory o_localGatherOffsets;
occa::memory o_localGatherIds;

dlong *haloGatherOffsets;
dlong *haloGatherIds;
int *haloGatherOffsets;
int *haloGatherIds;
occa::memory o_haloGatherOffsets;
occa::memory o_haloGatherIds;

Expand All @@ -171,59 +170,59 @@ typedef struct {
}ogs_t;


ogs_t *ogsSetup(dlong N, hlong *ids, MPI_Comm &comm,
ogs_t *ogsSetup(int N, long long int *ids, MPI_Comm &comm,
int verbose, occa::device device);

void ogsFree(ogs_t* ogs);

// Host array versions
void ogsGatherScatter (void *v, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call
void ogsGatherScatterVec (void *v, const int k, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call
void ogsGatherScatterMany(void *v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call
void ogsGatherScatterMany(void *v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call

void ogsGather (void *gv, void *v, const char *type, const char *op, ogs_t *ogs);
void ogsGatherVec (void *gv, void *v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsGatherMany(void *gv, void *v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherMany(void *gv, void *v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs);

void ogsScatter (void *sv, void *v, const char *type, const char *op, ogs_t *ogs);
void ogsScatterVec (void *sv, void *v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsScatterMany(void *sv, void *v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsScatterMany(void *sv, void *v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs);


// Synchronous device buffer versions
void ogsGatherScatter (occa::memory o_v, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call
void ogsGatherScatterVec (occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call
void ogsGatherScatterMany(occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call
void ogsGatherScatterMany(occa::memory o_v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs); //wrapper for gslib call

void ogsGather (occa::memory o_gv, occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsGatherVec (occa::memory o_gv, occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsGatherMany(occa::memory o_gv, occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherMany(occa::memory o_gv, occa::memory o_v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs);

void ogsScatter (occa::memory o_sv, occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsScatterVec (occa::memory o_sv, occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsScatterMany(occa::memory o_sv, occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsScatterMany(occa::memory o_sv, occa::memory o_v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs);

// Asynchronous device buffer versions
void ogsGatherScatterStart (occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterFinish (occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterVecStart (occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterVecFinish (occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterManyStart (occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterManyFinish(occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterManyStart (occa::memory o_v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherScatterManyFinish(occa::memory o_v, const int k, const int stride, const char *type, const char *op, ogs_t *ogs);

void ogsGatherStart (occa::memory o_Gv, occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsGatherFinish (occa::memory o_Gv, occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsGatherVecStart (occa::memory o_Gv, occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsGatherVecFinish (occa::memory o_Gv, occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsGatherManyStart (occa::memory o_Gv, occa::memory o_v, const int k, const dlong gstride, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherManyFinish(occa::memory o_Gv, occa::memory o_v, const int k, const dlong gstride, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherManyStart (occa::memory o_Gv, occa::memory o_v, const int k, const int gstride, const int stride, const char *type, const char *op, ogs_t *ogs);
void ogsGatherManyFinish(occa::memory o_Gv, occa::memory o_v, const int k, const int gstride, const int stride, const char *type, const char *op, ogs_t *ogs);

void ogsScatterStart (occa::memory o_Sv, occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsScatterFinish (occa::memory o_Sv, occa::memory o_v, const char *type, const char *op, ogs_t *ogs);
void ogsScatterVecStart (occa::memory o_Sv, occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsScatterVecFinish (occa::memory o_Sv, occa::memory o_v, const int k, const char *type, const char *op, ogs_t *ogs);
void ogsScatterManyStart (occa::memory o_Sv, occa::memory o_v, const int k, const dlong sstride, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsScatterManyFinish(occa::memory o_Sv, occa::memory o_v, const int k, const dlong sstride, const dlong stride, const char *type, const char *op, ogs_t *ogs);
void ogsScatterManyStart (occa::memory o_Sv, occa::memory o_v, const int k, const int sstride, const int stride, const char *type, const char *op, ogs_t *ogs);
void ogsScatterManyFinish(occa::memory o_Sv, occa::memory o_v, const int k, const int sstride, const int stride, const char *type, const char *op, ogs_t *ogs);

void *ogsHostMallocPinned(occa::device &device, size_t size, void *source, occa::memory &mem, occa::memory &h_mem);

Expand Down Expand Up @@ -255,12 +254,12 @@ typedef struct {

namespace oogs{

void start(occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, oogs_t *h);
void finish(occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, oogs_t *h);
void startFinish(void *v, const int k, const dlong stride, const char *type, const char *op, oogs_t *h);
void startFinish(occa::memory o_v, const int k, const dlong stride, const char *type, const char *op, oogs_t *h);
oogs_t *setup(ogs_t *ogs, int nVec, dlong stride, const char *type, std::function<void()> callback, oogs_mode gsMode);
oogs_t *setup(dlong N, hlong *ids, const int k, const dlong stride, const char *type, MPI_Comm &comm,
void start(occa::memory o_v, const int k, const int stride, const char *type, const char *op, oogs_t *h);
void finish(occa::memory o_v, const int k, const int stride, const char *type, const char *op, oogs_t *h);
void startFinish(void *v, const int k, const int stride, const char *type, const char *op, oogs_t *h);
void startFinish(occa::memory o_v, const int k, const int stride, const char *type, const char *op, oogs_t *h);
oogs_t *setup(ogs_t *ogs, int nVec, int stride, const char *type, std::function<void()> callback, oogs_mode gsMode);
oogs_t *setup(int N, long long int *ids, const int k, const int stride, const char *type, MPI_Comm &comm,
int verbose, occa::device device, std::function<void()> callback, oogs_mode mode);
void destroy(oogs_t *h);

Expand Down
24 changes: 12 additions & 12 deletions 3rd_party/gslib/ogs/okl/gather.okl
Original file line number Diff line number Diff line change
Expand Up @@ -94,15 +94,15 @@ SOFTWARE.
@kernel void gather_longAdd(const dlong Ngather,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather;++g;@tile(256,@outer,@inner)){

const dlong start = gatherStarts[g];
const dlong end = gatherStarts[g+1];

long long int gq = 0;
hlong gq = 0;
for(dlong n=start;n<end;++n){
const dlong id = gatherIds[n];
gq += q[id];
Expand Down Expand Up @@ -182,15 +182,15 @@ SOFTWARE.
@kernel void gather_longMul(const dlong Ngather,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather;++g;@tile(256,@outer,@inner)){

const dlong start = gatherStarts[g];
const dlong end = gatherStarts[g+1];

long long int gq = 1.f;
hlong gq = 1.f;
for(dlong n=start;n<end;++n){
const dlong id = gatherIds[n];
gq *= q[id];
Expand Down Expand Up @@ -276,16 +276,16 @@ SOFTWARE.
@kernel void gather_longMin(const dlong Ngather,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather;++g;@tile(256,@outer,@inner)){

const dlong start = gatherStarts[g];
const dlong end = gatherStarts[g+1];

const dlong startId = gatherIds[start];
long long int gq = q[startId];
hlong gq = q[startId];
for(dlong n=start+1;n<end;++n){
const dlong id = gatherIds[n];
gq = (q[id] < gq) ? q[id] : gq;
Expand Down Expand Up @@ -369,16 +369,16 @@ SOFTWARE.
@kernel void gather_longMax(const dlong Ngather,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather;++g;@tile(256,@outer,@inner)){

const dlong start = gatherStarts[g];
const dlong end = gatherStarts[g+1];

const dlong startId = gatherIds[start];
long long int gq = q[startId];
hlong gq = q[startId];
for(dlong n=start+1;n<end;++n){
const dlong id = gatherIds[n];
gq = (q[id] > gq) ? q[id] : gq;
Expand Down
24 changes: 12 additions & 12 deletions 3rd_party/gslib/ogs/okl/gatherMany.okl
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ SOFTWARE.
const dlong gstride,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather*Nentries;++g;@tile(256,@outer,@inner)){

Expand All @@ -122,7 +122,7 @@ SOFTWARE.
const dlong start = gatherStarts[gid];
const dlong end = gatherStarts[gid+1];

long long int gq = 0;
hlong gq = 0;
for(dlong n=start;n<end;++n){
const dlong id = gatherIds[n];
gq += q[id+k*stride];
Expand Down Expand Up @@ -220,8 +220,8 @@ SOFTWARE.
const dlong gstride,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather*Nentries;++g;@tile(256,@outer,@inner)){

Expand All @@ -230,7 +230,7 @@ SOFTWARE.
const dlong start = gatherStarts[gid];
const dlong end = gatherStarts[gid+1];

long long int gq = 1.f;
hlong gq = 1.f;
for(dlong n=start;n<end;++n){
const dlong id = gatherIds[n];
gq *= q[id+k*stride];
Expand Down Expand Up @@ -334,8 +334,8 @@ SOFTWARE.
const dlong gstride,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather*Nentries;++g;@tile(256,@outer,@inner)){

Expand All @@ -345,7 +345,7 @@ SOFTWARE.
const dlong end = gatherStarts[gid+1];

const dlong startId = gatherIds[start];
long long int gq = q[startId+k*stride];
hlong gq = q[startId+k*stride];
for(dlong n=start+1;n<end;++n){
const dlong id = gatherIds[n];
gq = (q[id+k*stride] < gq) ? q[id+k*stride] : gq;
Expand Down Expand Up @@ -447,8 +447,8 @@ SOFTWARE.
const dlong gstride,
@restrict const dlong * gatherStarts,
@restrict const dlong * gatherIds,
@restrict const long long int * q,
@restrict long long int * gatherq){
@restrict const hlong * q,
@restrict hlong * gatherq){

for(dlong g=0;g<Ngather*Nentries;++g;@tile(256,@outer,@inner)){

Expand All @@ -458,7 +458,7 @@ SOFTWARE.
const dlong end = gatherStarts[gid+1];

const dlong startId = gatherIds[start];
long long int gq = q[startId+k*stride];
hlong gq = q[startId+k*stride];
for(dlong n=start+1;n<end;++n){
const dlong id = gatherIds[n];
gq = (q[id+k*stride] > gq) ? q[id+k*stride] : gq;
Expand Down
Loading

0 comments on commit 9c31c62

Please sign in to comment.