From 900342c808254e2efaac5692ebe039e0e1eb852b Mon Sep 17 00:00:00 2001 From: Stefan Date: Fri, 17 Apr 2020 09:19:25 -0500 Subject: [PATCH 01/22] changed nek_outfld interface --- src/nekInterface/nekInterface.f | 13 +++++++++++++ src/nekInterface/nekInterfaceAdapter.cpp | 14 +++++++++++--- src/nekInterface/nekInterfaceAdapter.hpp | 3 ++- src/plugins/avg.cpp | 7 ++++--- 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/nekInterface/nekInterface.f b/src/nekInterface/nekInterface.f index 604a0c1b2..95dace778 100644 --- a/src/nekInterface/nekInterface.f +++ b/src/nekInterface/nekInterface.f @@ -101,6 +101,8 @@ subroutine nekf_ptr(ptr,id,len) ptr = loc(gett) elseif (id .eq. 'ifgetps') then ptr = loc(getps) + elseif (id .eq. 'vmult') then + ptr = loc(vmult) else write(6,*) 'ERROR: nek_ptr cannot find ', id call exitt @@ -556,3 +558,14 @@ integer*8 function nekf_set_vert(npts, isTmsh) return end c----------------------------------------------------------------------- + subroutine nekf_dssum(u) + include 'SIZE' + include 'TOTAL' + + ifld = ifield + ifield = 1 + call dssum(u,lx1,ly1,lz1) + ifield = ifld + + return + end diff --git a/src/nekInterface/nekInterfaceAdapter.cpp b/src/nekInterface/nekInterfaceAdapter.cpp index 0ce89d9a7..c9d2bad53 100644 --- a/src/nekInterface/nekInterfaceAdapter.cpp +++ b/src/nekInterface/nekInterfaceAdapter.cpp @@ -40,6 +40,7 @@ static void (*nek_setics_ptr)(void); static int (*nek_bcmap_ptr)(int *, int*); static int (*nek_nbid_ptr)(int *); static long long (*nek_set_vert_ptr)(int *, int *); +static void (*nek_dssum_ptr)(double *); void noop_func(void) {} @@ -59,7 +60,7 @@ void nek_outfld(const char *suffix){ (*nek_outfld_ptr)((char*)suffix); } -void nek_outfld(const char *suffix, dfloat t, occa::memory o_x, +void nek_outfld(const char *suffix, dfloat t, int coords, occa::memory o_u, occa::memory o_p, occa::memory o_s, int NSfields, int FP64){ @@ -74,7 +75,7 @@ void nek_outfld(const char *suffix, dfloat t, occa::memory o_x, int po = 0; int so = 0; - if(o_x.ptr()) { + if(coords) { xo = 1; } if(o_u.ptr()) { @@ -261,7 +262,8 @@ void set_function_handles(const char *session_in,int verbose) { check_error(dlerror()); nek_set_vert_ptr = (long long (*)(int *, int *)) dlsym(handle,fname("nekf_set_vert")); check_error(dlerror()); - + nek_dssum_ptr = (void (*)(double *)) dlsym(handle,fname("nekf_dssum")); + check_error(dlerror()); #define postfix(x) x##_ptr #define load_or_noop(s) \ @@ -688,3 +690,9 @@ int nek_bcmap(int bid, int ifld) { long long nek_set_glo_num(int npts, int isTMesh) { return (*nek_set_vert_ptr)(&npts, &isTMesh); } + +void nek_dssum(dfloat *u) { + + (*nek_dssum_ptr)(u); +} + diff --git a/src/nekInterface/nekInterfaceAdapter.hpp b/src/nekInterface/nekInterfaceAdapter.hpp index ed6a9d449..05bc42ec6 100644 --- a/src/nekInterface/nekInterfaceAdapter.hpp +++ b/src/nekInterface/nekInterfaceAdapter.hpp @@ -89,7 +89,7 @@ DECLARE_USER_FUNC(userqtl) void* nek_ptr(const char *id); void nek_outfld(void); void nek_outfld(const char *suffix); -void nek_outfld(const char *suffix, dfloat t, occa::memory o_x, +void nek_outfld(const char *suffix, dfloat t, int coords, occa::memory o_u, occa::memory o_p, occa::memory o_s, int NSfields, int FP64); void nek_uic(int ifield); @@ -112,5 +112,6 @@ void nek_copyTo(dfloat &time); void nek_ocopyTo(dfloat &time); void nek_copyRestart(); long long nek_set_glo_num(int npts, int isTMesh); +void nek_dssum(dfloat *u); #endif diff --git a/src/plugins/avg.cpp b/src/plugins/avg.cpp index d0cd2e6bb..41c4d2b39 100644 --- a/src/plugins/avg.cpp +++ b/src/plugins/avg.cpp @@ -174,6 +174,7 @@ void avg::outfld() cds_t *cds = ins->cds; mesh_t *mesh = ins->mesh; const int FP64 = 1; + const int coords = 0; occa::memory o_null; occa::memory o_Tavg, o_Trms; @@ -184,21 +185,21 @@ void avg::outfld() occa::memory o_Trms = o_Srms; } - nek_outfld("avg", atime, o_null, + nek_outfld("avg", atime, coords, o_Uavg, o_Pavg, o_Tavg, Nscalar, FP64); - nek_outfld("rms", atime, o_null, + nek_outfld("rms", atime, coords, o_Urms, o_Prms, o_Trms, Nscalar, FP64); - nek_outfld("rm2", atime, o_null, + nek_outfld("rm2", atime, coords, o_Urm2, o_null, o_null, From 29ff7b64ac5369b884382ed4b69f2cc272e08ae3 Mon Sep 17 00:00:00 2001 From: Stefan Date: Fri, 17 Apr 2020 09:19:45 -0500 Subject: [PATCH 02/22] add diagnostics --- src/core/insSetup.cpp | 15 +++++++++- src/core/tombo.cpp | 68 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index a6b191cf1..f2dd61e92 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -703,13 +703,26 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) ins->o_wrk1.copyTo(tmp, Nlocal*sizeof(dfloat)); dfloat sum1 = 0; for(int i=0; icomm); const dfloat err = abs(sum1-sum2)/sum2; if(err > 1e-15) { if(mesh->rank==0) cout << "ogsGatherScatter test failed!\n"; + fflush(stdout); exit(1); } + + mesh->ogs->o_invDegree.copyTo(tmp, Nlocal*sizeof(dfloat)); + double *vmult = (double *) nek_ptr("vmult"); + sum1 = 0; + for(int i=0; icomm); + if(sum1 > 1e-15) { + if(mesh->rank==0) cout << "multiplicity test failed!\n"; + fflush(stdout); + exit(1); + } + + free(tmp); } return ins; diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index ccfeb4bb7..86551d396 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -1,8 +1,46 @@ #include "nrs.hpp" #include "udf.hpp" +#include "nekInterfaceAdapter.hpp" + +#define NEKDSSUM + +int firstStep = 1; namespace tombo { +void nek_ogsGatherScatterMany(ins_t *ins, occa::memory o_u, int nfld) +{ + occa::memory o_vx = o_u + 0*ins->fieldOffset*sizeof(dfloat); + occa::memory o_vy = o_u + 1*ins->fieldOffset*sizeof(dfloat); + occa::memory o_vz = o_u + 2*ins->fieldOffset*sizeof(dfloat); + + dfloat *vx = ins->U + 0*ins->fieldOffset; + dfloat *vy = ins->U + 1*ins->fieldOffset; + dfloat *vz = ins->U + 2*ins->fieldOffset; + + *(nekData.istep) = 1; + o_vx.copyTo(vx, ins->fieldOffset*sizeof(dfloat)); + memcpy(nekData.vx, vx, sizeof(dfloat)*ins->Nlocal); + if(nfld==3) { + o_vy.copyTo(vy, ins->fieldOffset*sizeof(dfloat)); + memcpy(nekData.vy, vy, sizeof(dfloat)*ins->Nlocal); + + o_vz.copyTo(vz, ins->fieldOffset*sizeof(dfloat)); + memcpy(nekData.vz, vz, sizeof(dfloat)*ins->Nlocal); + } + + nek_userchk(); // call dssum for vx, vy, vz + + memcpy(vx, nekData.vx, sizeof(dfloat)*ins->Nlocal); + o_vx.copyFrom(vx, ins->fieldOffset*sizeof(dfloat)); + if(nfld==3) { + memcpy(vy, nekData.vy, sizeof(dfloat)*ins->Nlocal); + o_vy.copyFrom(vy, ins->fieldOffset*sizeof(dfloat)); + memcpy(vz, nekData.vz, sizeof(dfloat)*ins->Nlocal); + o_vz.copyFrom(vz, ins->fieldOffset*sizeof(dfloat)); + } +} + occa::memory pressureSolve(ins_t *ins, dfloat time) { mesh_t *mesh = ins->mesh; @@ -15,8 +53,16 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->o_Ue, ins->o_wrk0); +#ifdef NEKDSSUM + ins->o_wrk0.copyTo(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); + nek_dssum(ins->U + 0*ins->fieldOffset); + nek_dssum(ins->U + 1*ins->fieldOffset); + nek_dssum(ins->U + 2*ins->fieldOffset); + ins->o_wrk0.copyFrom(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); +#else ogsGatherScatterMany(ins->o_wrk0, ins->NVfields, ins->fieldOffset, ogsDfloat, ogsAdd, mesh->ogs); +#endif ins->invMassMatrixKernel( mesh->Nelements, @@ -66,8 +112,16 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->o_FU, ins->o_wrk0); +#ifdef NEKDSSUM + ins->o_wrk0.copyTo(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); + nek_dssum(ins->U + 0*ins->fieldOffset); + nek_dssum(ins->U + 1*ins->fieldOffset); + nek_dssum(ins->U + 2*ins->fieldOffset); + ins->o_wrk0.copyFrom(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); +#else ogsGatherScatterMany(ins->o_wrk0, ins->NVfields, ins->fieldOffset, ogsDfloat, ogsAdd, mesh->ogs); +#endif ins->invMassMatrixKernel( mesh->Nelements, @@ -125,7 +179,21 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) elliptic_t *solver = ins->pSolver; +#ifdef NEKDSSUM + ins->o_wrk3.copyTo(ins->U, ins->fieldOffset*sizeof(dfloat)); + nek_dssum(ins->U + 0*ins->fieldOffset); + ins->o_wrk3.copyFrom(ins->U, ins->fieldOffset*sizeof(dfloat)); +#else ogsGatherScatter(ins->o_wrk3, ogsDfloat, ogsAdd, mesh->ogs); +#endif + + if(firstStep){ + ins->o_wrk3.copyTo(ins->U, ins->fieldOffset*sizeof(dfloat)); //dumping respr (no masked) + nek_copyFrom(time, 0); + nek_outfld(); + firstStep = 0; + } + if (solver->Nmasked) mesh->maskKernel(solver->Nmasked, solver->o_maskIds, ins->o_wrk3); ins->setScalarKernel(ins->Ntotal, 0.0, ins->o_PI); From e362cc6c38bc37be8ab1bc8401cd588dcd9011c8 Mon Sep 17 00:00:00 2001 From: Stefan Kerkemeier Date: Fri, 17 Apr 2020 17:39:10 +0000 Subject: [PATCH 03/22] dont run ogs checks for buildOnly --- src/core/insSetup.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index f2dd61e92..899491b2d 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -688,7 +688,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) MPI_Barrier(mesh->comm); } - { + if(!buildOnly) { dlong gNelements = mesh->Nelements; MPI_Allreduce(MPI_IN_PLACE, &gNelements, 1, MPI_DLONG, MPI_SUM, mesh->comm); const dfloat sum2 = (dfloat)gNelements * mesh->Np; From 590c9eb7964f8baafe70782bafda1da3bb39d8d2 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sat, 18 Apr 2020 09:29:42 -0500 Subject: [PATCH 04/22] print raw p_rhs + multp err --- src/core/insSetup.cpp | 4 +- src/core/tombo.cpp | 53 ++++++++---------------- src/nekInterface/nekInterfaceAdapter.cpp | 6 +-- 3 files changed, 21 insertions(+), 42 deletions(-) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index 899491b2d..c60be3d7a 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -688,7 +688,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) MPI_Barrier(mesh->comm); } - if(!buildOnly) { + { dlong gNelements = mesh->Nelements; MPI_Allreduce(MPI_IN_PLACE, &gNelements, 1, MPI_DLONG, MPI_SUM, mesh->comm); const dfloat sum2 = (dfloat)gNelements * mesh->Np; @@ -717,7 +717,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) for(int i=0; icomm); if(sum1 > 1e-15) { - if(mesh->rank==0) cout << "multiplicity test failed!\n"; + if(mesh->rank==0) printf("multiplicity test failed! err=%g\n", sum1); fflush(stdout); exit(1); } diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index 86551d396..69a2b8a8d 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -8,39 +8,6 @@ int firstStep = 1; namespace tombo { -void nek_ogsGatherScatterMany(ins_t *ins, occa::memory o_u, int nfld) -{ - occa::memory o_vx = o_u + 0*ins->fieldOffset*sizeof(dfloat); - occa::memory o_vy = o_u + 1*ins->fieldOffset*sizeof(dfloat); - occa::memory o_vz = o_u + 2*ins->fieldOffset*sizeof(dfloat); - - dfloat *vx = ins->U + 0*ins->fieldOffset; - dfloat *vy = ins->U + 1*ins->fieldOffset; - dfloat *vz = ins->U + 2*ins->fieldOffset; - - *(nekData.istep) = 1; - o_vx.copyTo(vx, ins->fieldOffset*sizeof(dfloat)); - memcpy(nekData.vx, vx, sizeof(dfloat)*ins->Nlocal); - if(nfld==3) { - o_vy.copyTo(vy, ins->fieldOffset*sizeof(dfloat)); - memcpy(nekData.vy, vy, sizeof(dfloat)*ins->Nlocal); - - o_vz.copyTo(vz, ins->fieldOffset*sizeof(dfloat)); - memcpy(nekData.vz, vz, sizeof(dfloat)*ins->Nlocal); - } - - nek_userchk(); // call dssum for vx, vy, vz - - memcpy(vx, nekData.vx, sizeof(dfloat)*ins->Nlocal); - o_vx.copyFrom(vx, ins->fieldOffset*sizeof(dfloat)); - if(nfld==3) { - memcpy(vy, nekData.vy, sizeof(dfloat)*ins->Nlocal); - o_vy.copyFrom(vy, ins->fieldOffset*sizeof(dfloat)); - memcpy(vz, nekData.vz, sizeof(dfloat)*ins->Nlocal); - o_vz.copyFrom(vz, ins->fieldOffset*sizeof(dfloat)); - } -} - occa::memory pressureSolve(ins_t *ins, dfloat time) { mesh_t *mesh = ins->mesh; @@ -179,6 +146,16 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) elliptic_t *solver = ins->pSolver; + if(firstStep){ + occa::memory o_null; + nek_outfld("rs0",time, 1, + o_null, + o_null, + ins->o_wrk3, + 1, + 0); + } + #ifdef NEKDSSUM ins->o_wrk3.copyTo(ins->U, ins->fieldOffset*sizeof(dfloat)); nek_dssum(ins->U + 0*ins->fieldOffset); @@ -188,9 +165,13 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) #endif if(firstStep){ - ins->o_wrk3.copyTo(ins->U, ins->fieldOffset*sizeof(dfloat)); //dumping respr (no masked) - nek_copyFrom(time, 0); - nek_outfld(); + occa::memory o_null; + nek_outfld("rs1",time, 1, + o_null, + o_null, + ins->o_wrk3, + 1, + 0); firstStep = 0; } diff --git a/src/nekInterface/nekInterfaceAdapter.cpp b/src/nekInterface/nekInterfaceAdapter.cpp index c9d2bad53..6d4310a7e 100644 --- a/src/nekInterface/nekInterfaceAdapter.cpp +++ b/src/nekInterface/nekInterfaceAdapter.cpp @@ -611,10 +611,8 @@ void nek_ocopyFrom(dfloat time, int tstep) { void nek_copyFrom(dfloat time, int tstep) { - if(time != timeLast) { - nek_copyFrom(time); - *(nekData.istep) = tstep; - } + nek_copyFrom(time); + *(nekData.istep) = tstep; } void nek_ocopyTo(dfloat &time) { From 31e961bad6f68f06c33a7f1d55d5df0d83ec1633 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sat, 18 Apr 2020 09:43:06 -0500 Subject: [PATCH 05/22] don't run ogs check for buildOnly --- src/core/insSetup.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index c60be3d7a..b95c56a5e 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -688,7 +688,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) MPI_Barrier(mesh->comm); } - { + if(!buildOnly) { dlong gNelements = mesh->Nelements; MPI_Allreduce(MPI_IN_PLACE, &gNelements, 1, MPI_DLONG, MPI_SUM, mesh->comm); const dfloat sum2 = (dfloat)gNelements * mesh->Np; From baf65982829b36e776715d1f3af7a30a8c5b39fb Mon Sep 17 00:00:00 2001 From: Stefan Kerkemeier Date: Sat, 18 Apr 2020 15:06:14 +0000 Subject: [PATCH 06/22] print residual in pressure --- src/core/tombo.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index 69a2b8a8d..5e8d6d708 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -149,10 +149,10 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) if(firstStep){ occa::memory o_null; nek_outfld("rs0",time, 1, - o_null, o_null, ins->o_wrk3, - 1, + o_null, + 0, 0); } @@ -167,10 +167,10 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) if(firstStep){ occa::memory o_null; nek_outfld("rs1",time, 1, - o_null, o_null, ins->o_wrk3, - 1, + o_null, + 0, 0); firstStep = 0; } From e13c074cf3223b776e4e1795a0f0522a566ace51 Mon Sep 17 00:00:00 2001 From: Stefan Kerkemeier Date: Sat, 18 Apr 2020 16:58:09 +0000 Subject: [PATCH 07/22] add dssum check with nek gIds --- src/core/insSetup.cpp | 32 +++++++++++++++++------- src/mesh/meshNekReader.cpp | 2 +- src/nekInterface/nekInterface.f | 3 +-- src/nekInterface/nekInterfaceAdapter.cpp | 4 +-- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index b95c56a5e..195a186ff 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -689,6 +689,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) } if(!buildOnly) { + dfloat err = 0; dlong gNelements = mesh->Nelements; MPI_Allreduce(MPI_IN_PLACE, &gNelements, 1, MPI_DLONG, MPI_SUM, mesh->comm); const dfloat sum2 = (dfloat)gNelements * mesh->Np; @@ -704,24 +705,37 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) dfloat sum1 = 0; for(int i=0; icomm); - const dfloat err = abs(sum1-sum2)/sum2; - if(err > 1e-15) { + if(abs(sum1-sum2)/sum2 > 1e-15) { if(mesh->rank==0) cout << "ogsGatherScatter test failed!\n"; fflush(stdout); - exit(1); } mesh->ogs->o_invDegree.copyTo(tmp, Nlocal*sizeof(dfloat)); double *vmult = (double *) nek_ptr("vmult"); sum1 = 0; - for(int i=0; icomm); - if(sum1 > 1e-15) { - if(mesh->rank==0) printf("multiplicity test failed! err=%g\n", sum1); - fflush(stdout); - exit(1); - } + if(mesh->rank==0) printf("multiplicity test err=%g\n", sum1); + fflush(stdout); + if(sum1 > 1e-15) err++; + + hlong ngv = nek_set_glo_num(mesh->N+1, 0); + hlong *gIds = (hlong *) calloc(Nlocal, sizeof(hlong)); + for(int i=0; iogs = ogsSetup(Nlocal, gIds, mesh->comm, 0, mesh->device); + + ins->setScalarKernel(ins->fieldOffset, 1.0, ins->o_wrk0); + ogsGatherScatter(ins->o_wrk0, ogsDfloat, ogsAdd, mesh->ogs); + ins->o_wrk0.copyTo(tmp, Nlocal*sizeof(dfloat)); + + sum1 = 0; + for(int i=0; icomm); + if(mesh->rank==0) printf("multiplicity test2 err=%g\n", sum1); + fflush(stdout); + if(sum1 > 1e-15) err++; + if(err) exit(1); free(tmp); } diff --git a/src/mesh/meshNekReader.cpp b/src/mesh/meshNekReader.cpp index 39b5962df..8537bc0bc 100644 --- a/src/mesh/meshNekReader.cpp +++ b/src/mesh/meshNekReader.cpp @@ -34,7 +34,7 @@ void meshNekReaderHex3D(int N, mesh_t *mesh, int isMeshT){ const int vtxmap[8] = {0, 1, 3, 2, 4, 5, 7, 6}; // build vertex numbering - mesh->Nnodes = nek_set_glo_num(mesh->Nverts, isMeshT); + mesh->Nnodes = nek_set_glo_num(2, isMeshT); mesh->EToV = (hlong*) calloc(mesh->Nelements * mesh->Nverts, sizeof(hlong)); diff --git a/src/nekInterface/nekInterface.f b/src/nekInterface/nekInterface.f index 95dace778..a550eb9b1 100644 --- a/src/nekInterface/nekInterface.f +++ b/src/nekInterface/nekInterface.f @@ -535,7 +535,7 @@ integer function nekf_nbid(isTmsh) return end c----------------------------------------------------------------------- - integer*8 function nekf_set_vert(npts, isTmsh) + integer*8 function nekf_set_vert(nx, isTmsh) include 'SIZE' include 'TOTAL' @@ -548,7 +548,6 @@ integer*8 function nekf_set_vert(npts, isTmsh) integer*8 ngv - nx = npts**(1./ndim) nel = nelt if (isTmsh.eq.0) nel = nelv call set_vert(glo_num,ngv,nx,nel,vertex,.false.) diff --git a/src/nekInterface/nekInterfaceAdapter.cpp b/src/nekInterface/nekInterfaceAdapter.cpp index 6d4310a7e..9b8bd0f18 100644 --- a/src/nekInterface/nekInterfaceAdapter.cpp +++ b/src/nekInterface/nekInterfaceAdapter.cpp @@ -685,8 +685,8 @@ int nek_bcmap(int bid, int ifld) { return (*nek_bcmap_ptr)(&bid, &ifld); } -long long nek_set_glo_num(int npts, int isTMesh) { - return (*nek_set_vert_ptr)(&npts, &isTMesh); +long long nek_set_glo_num(int nx, int isTMesh) { + return (*nek_set_vert_ptr)(&nx, &isTMesh); } void nek_dssum(dfloat *u) { From 2f95b0d8e8d898c01af3dd972ab284be31b6f835 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sun, 19 Apr 2020 05:34:27 -0500 Subject: [PATCH 08/22] use nek globalIDs --- 3rd_party/libparanumal/libparanumal.makefile | 4 ++-- CMakeLists.txt | 1 + src/core/tombo.cpp | 2 +- src/mesh/meshParallelConnectNodes.cpp | 22 ++++++++++++++++++++ 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 src/mesh/meshParallelConnectNodes.cpp diff --git a/3rd_party/libparanumal/libparanumal.makefile b/3rd_party/libparanumal/libparanumal.makefile index 3607e7e29..c8d7ec78f 100644 --- a/3rd_party/libparanumal/libparanumal.makefile +++ b/3rd_party/libparanumal/libparanumal.makefile @@ -79,7 +79,6 @@ $(HDRDIR)/src/meshLoadReferenceNodesTet3D.o \ $(HDRDIR)/src/meshOccaSetup2D.o \ $(HDRDIR)/src/meshOccaSetup3D.o \ $(HDRDIR)/src/meshOccaSetupQuad3D.o \ -$(HDRDIR)/src/meshParallelConnectNodes.o \ $(HDRDIR)/src/meshParallelConnectOpt.o \ $(HDRDIR)/src/meshParallelConsecutiveGlobalNumbering.o\ $(HDRDIR)/src/meshParallelGatherScatterSetup.o \ @@ -123,7 +122,8 @@ $(HDRDIR)/src/hash.o\ $(HDRDIR)/src/setupAide.o \ $(HDRDIR)/src/readArray.o\ $(HDRDIR)/src/occaHostMallocPinned.o \ -$(HDRDIR)/src/timer.o +$(HDRDIR)/src/timer.o \ +#$(HDRDIR)/src/meshParallelConnectNodes.o ifeq ($(OS),Windows_NT) detected_OS := Windows diff --git a/CMakeLists.txt b/CMakeLists.txt index e9deefcd8..bd32af54f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -322,6 +322,7 @@ set(SRC src/mesh/meshSetup.cpp src/mesh/meshNekReader.cpp src/mesh/meshPhysicalNodesHex3D.cpp + src/mesh/meshParallelConnectNodes.cpp src/core/occaDeviceConfig.cpp src/nekInterface/nekInterfaceAdapter.cpp src/core/parReader.cpp diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index 5e8d6d708..bd37e5dbd 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -2,7 +2,7 @@ #include "udf.hpp" #include "nekInterfaceAdapter.hpp" -#define NEKDSSUM +//#define NEKDSSUM int firstStep = 1; diff --git a/src/mesh/meshParallelConnectNodes.cpp b/src/mesh/meshParallelConnectNodes.cpp new file mode 100644 index 000000000..d1fb2ee12 --- /dev/null +++ b/src/mesh/meshParallelConnectNodes.cpp @@ -0,0 +1,22 @@ +#include +#include +#include + +#include "mesh.h" +#include "nekInterfaceAdapter.hpp" + +// uniquely label each node with a global index, used for gatherScatter +void meshParallelConnectNodes(mesh_t *mesh) +{ + int rank, size; + rank = mesh->rank; + size = mesh->size; + + dlong localNodeCount = mesh->Np*mesh->Nelements; + + mesh->globalIds = (hlong*) calloc(localNodeCount, sizeof(hlong)); + hlong ngv = nek_set_glo_num(mesh->N+1, 0); + for(dlong id=0;idglobalIds[id] = nekData.glo_num[id]; + } +} From 8c4c0aaeb89cad024f1f45f7b40984a58124cf46 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sun, 19 Apr 2020 05:42:54 -0500 Subject: [PATCH 09/22] disable mult test2 --- src/core/insSetup.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index 195a186ff..cf083597c 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -719,6 +719,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) fflush(stdout); if(sum1 > 1e-15) err++; +/* hlong ngv = nek_set_glo_num(mesh->N+1, 0); hlong *gIds = (hlong *) calloc(Nlocal, sizeof(hlong)); for(int i=0; irank==0) printf("multiplicity test2 err=%g\n", sum1); fflush(stdout); if(sum1 > 1e-15) err++; +*/ if(err) exit(1); free(tmp); From 3d3f6b7227aefb547c9fba577c438d854e82bc7e Mon Sep 17 00:00:00 2001 From: Stefan Date: Sun, 19 Apr 2020 06:15:26 -0500 Subject: [PATCH 10/22] use libP globalIDs if buildOnly --- src/mesh/meshParallelConnectNodes.cpp | 111 +++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) diff --git a/src/mesh/meshParallelConnectNodes.cpp b/src/mesh/meshParallelConnectNodes.cpp index d1fb2ee12..532652f68 100644 --- a/src/mesh/meshParallelConnectNodes.cpp +++ b/src/mesh/meshParallelConnectNodes.cpp @@ -5,8 +5,18 @@ #include "mesh.h" #include "nekInterfaceAdapter.hpp" +extern int nrsBuildOnly; + +typedef struct{ + + int baseRank; + hlong baseId; + +}parallelNode_t; + + // uniquely label each node with a global index, used for gatherScatter -void meshParallelConnectNodes(mesh_t *mesh) +void meshNekParallelConnectNodes(mesh_t *mesh) { int rank, size; rank = mesh->rank; @@ -20,3 +30,102 @@ void meshParallelConnectNodes(mesh_t *mesh) mesh->globalIds[id] = nekData.glo_num[id]; } } + +void meshParallelConnectNodes(mesh_t *mesh){ + + if(!nrsBuildOnly) meshNekParallelConnectNodes(mesh); + + int rank, size; + rank = mesh->rank; + size = mesh->size; + + dlong localNodeCount = mesh->Np*mesh->Nelements; + dlong *allLocalNodeCounts = (dlong*) calloc(size, sizeof(dlong)); + + MPI_Allgather(&localNodeCount, 1, MPI_DLONG, + allLocalNodeCounts, 1, MPI_DLONG, + mesh->comm); + + hlong gatherNodeStart = 0; + for(int r=0;rvirtual gather) + parallelNode_t *localNodes = + (parallelNode_t*) calloc((mesh->totalHaloPairs+mesh->Nelements)*mesh->Np, + sizeof(parallelNode_t)); + + // use local numbering + for(dlong e=0;eNelements;++e){ + for(int n=0;nNp;++n){ + dlong id = e*mesh->Np+n; + + localNodes[id].baseRank = rank; + localNodes[id].baseId = 1 + id + mesh->Nnodes + gatherNodeStart; + + } + + // use vertex ids for vertex nodes to reduce iterations + for(int v=0;vNverts;++v){ + dlong id = e*mesh->Np + mesh->vertexNodes[v]; + hlong gid = mesh->EToV[e*mesh->Nverts+v] + 1; + localNodes[id].baseId = gid; + } + } + + dlong localChange = 0, gatherChange = 1; + + parallelNode_t *sendBuffer = + (parallelNode_t*) calloc(mesh->totalHaloPairs*mesh->Np, sizeof(parallelNode_t)); + + // keep comparing numbers on positive and negative traces until convergence + while(gatherChange>0){ + + // reset change counter + localChange = 0; + + // send halo data and recv into extension of buffer + meshHaloExchange(mesh, mesh->Np*sizeof(parallelNode_t), + localNodes, sendBuffer, localNodes+localNodeCount); + + // compare trace nodes + for(dlong e=0;eNelements;++e){ + for(int n=0;nNfp*mesh->Nfaces;++n){ + dlong id = e*mesh->Nfp*mesh->Nfaces + n; + dlong idM = mesh->vmapM[id]; + dlong idP = mesh->vmapP[id]; + hlong gidM = localNodes[idM].baseId; + hlong gidP = localNodes[idP].baseId; + + int baseRankM = localNodes[idM].baseRank; + int baseRankP = localNodes[idP].baseRank; + + if(gidMcomm); + } + + //make a locally-ordered version + mesh->globalIds = (hlong*) calloc(localNodeCount, sizeof(hlong)); + for(dlong id=0;idglobalIds[id] = localNodes[id].baseId; + } + + free(localNodes); + free(sendBuffer); +} From 12feae197c275829d46e87006812bf2e0f76038d Mon Sep 17 00:00:00 2001 From: Stefan Date: Sun, 19 Apr 2020 13:02:40 -0500 Subject: [PATCH 11/22] revert some changes --- src/core/insSetup.cpp | 34 ++++++------------- src/core/tombo.cpp | 49 --------------------------- src/mesh/meshParallelConnectNodes.cpp | 5 ++- 3 files changed, 14 insertions(+), 74 deletions(-) diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index cf083597c..4f7865605 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -689,7 +689,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) } if(!buildOnly) { - dfloat err = 0; + int err = 0; dlong gNelements = mesh->Nelements; MPI_Allreduce(MPI_IN_PLACE, &gNelements, 1, MPI_DLONG, MPI_SUM, mesh->comm); const dfloat sum2 = (dfloat)gNelements * mesh->Np; @@ -705,9 +705,11 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) dfloat sum1 = 0; for(int i=0; icomm); - if(abs(sum1-sum2)/sum2 > 1e-15) { - if(mesh->rank==0) cout << "ogsGatherScatter test failed!\n"; + sum1 = abs(sum1-sum2)/sum2; + if(sum1 > 1e-15) { + if(mesh->rank==0) printf("ogsGatherScatter test err=%g!\n", sum1); fflush(stdout); + err++; } mesh->ogs->o_invDegree.copyTo(tmp, Nlocal*sizeof(dfloat)); @@ -715,27 +717,11 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) sum1 = 0; for(int i=0; icomm); - if(mesh->rank==0) printf("multiplicity test err=%g\n", sum1); - fflush(stdout); - if(sum1 > 1e-15) err++; - -/* - hlong ngv = nek_set_glo_num(mesh->N+1, 0); - hlong *gIds = (hlong *) calloc(Nlocal, sizeof(hlong)); - for(int i=0; iogs = ogsSetup(Nlocal, gIds, mesh->comm, 0, mesh->device); - - ins->setScalarKernel(ins->fieldOffset, 1.0, ins->o_wrk0); - ogsGatherScatter(ins->o_wrk0, ogsDfloat, ogsAdd, mesh->ogs); - ins->o_wrk0.copyTo(tmp, Nlocal*sizeof(dfloat)); - - sum1 = 0; - for(int i=0; icomm); - if(mesh->rank==0) printf("multiplicity test2 err=%g\n", sum1); - fflush(stdout); - if(sum1 > 1e-15) err++; -*/ + if(sum1 > 1e-15) { + if(mesh->rank==0) printf("multiplicity test err=%g!\n", sum1); + fflush(stdout); + err++; + } if(err) exit(1); free(tmp); diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index bd37e5dbd..ccfeb4bb7 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -1,10 +1,5 @@ #include "nrs.hpp" #include "udf.hpp" -#include "nekInterfaceAdapter.hpp" - -//#define NEKDSSUM - -int firstStep = 1; namespace tombo { @@ -20,16 +15,8 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->o_Ue, ins->o_wrk0); -#ifdef NEKDSSUM - ins->o_wrk0.copyTo(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); - nek_dssum(ins->U + 0*ins->fieldOffset); - nek_dssum(ins->U + 1*ins->fieldOffset); - nek_dssum(ins->U + 2*ins->fieldOffset); - ins->o_wrk0.copyFrom(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); -#else ogsGatherScatterMany(ins->o_wrk0, ins->NVfields, ins->fieldOffset, ogsDfloat, ogsAdd, mesh->ogs); -#endif ins->invMassMatrixKernel( mesh->Nelements, @@ -79,16 +66,8 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->o_FU, ins->o_wrk0); -#ifdef NEKDSSUM - ins->o_wrk0.copyTo(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); - nek_dssum(ins->U + 0*ins->fieldOffset); - nek_dssum(ins->U + 1*ins->fieldOffset); - nek_dssum(ins->U + 2*ins->fieldOffset); - ins->o_wrk0.copyFrom(ins->U, ins->NVfields*ins->fieldOffset*sizeof(dfloat)); -#else ogsGatherScatterMany(ins->o_wrk0, ins->NVfields, ins->fieldOffset, ogsDfloat, ogsAdd, mesh->ogs); -#endif ins->invMassMatrixKernel( mesh->Nelements, @@ -146,35 +125,7 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) elliptic_t *solver = ins->pSolver; - if(firstStep){ - occa::memory o_null; - nek_outfld("rs0",time, 1, - o_null, - ins->o_wrk3, - o_null, - 0, - 0); - } - -#ifdef NEKDSSUM - ins->o_wrk3.copyTo(ins->U, ins->fieldOffset*sizeof(dfloat)); - nek_dssum(ins->U + 0*ins->fieldOffset); - ins->o_wrk3.copyFrom(ins->U, ins->fieldOffset*sizeof(dfloat)); -#else ogsGatherScatter(ins->o_wrk3, ogsDfloat, ogsAdd, mesh->ogs); -#endif - - if(firstStep){ - occa::memory o_null; - nek_outfld("rs1",time, 1, - o_null, - ins->o_wrk3, - o_null, - 0, - 0); - firstStep = 0; - } - if (solver->Nmasked) mesh->maskKernel(solver->Nmasked, solver->o_maskIds, ins->o_wrk3); ins->setScalarKernel(ins->Ntotal, 0.0, ins->o_PI); diff --git a/src/mesh/meshParallelConnectNodes.cpp b/src/mesh/meshParallelConnectNodes.cpp index 532652f68..4d03a6788 100644 --- a/src/mesh/meshParallelConnectNodes.cpp +++ b/src/mesh/meshParallelConnectNodes.cpp @@ -33,7 +33,10 @@ void meshNekParallelConnectNodes(mesh_t *mesh) void meshParallelConnectNodes(mesh_t *mesh){ - if(!nrsBuildOnly) meshNekParallelConnectNodes(mesh); + if(!nrsBuildOnly) { + meshNekParallelConnectNodes(mesh); + return; + } int rank, size; rank = mesh->rank; From b084b59b800f72164707173c6c918abef2e55962 Mon Sep 17 00:00:00 2001 From: Stefan Kerkemeier Date: Sun, 19 Apr 2020 20:47:23 +0000 Subject: [PATCH 12/22] fix CHT globalIds --- src/mesh/meshParallelConnectNodes.cpp | 4 +++- src/mesh/meshSetup.cpp | 10 +++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/mesh/meshParallelConnectNodes.cpp b/src/mesh/meshParallelConnectNodes.cpp index 4d03a6788..3e9f002b9 100644 --- a/src/mesh/meshParallelConnectNodes.cpp +++ b/src/mesh/meshParallelConnectNodes.cpp @@ -6,6 +6,7 @@ #include "nekInterfaceAdapter.hpp" extern int nrsBuildOnly; +extern int isTmesh; typedef struct{ @@ -25,7 +26,7 @@ void meshNekParallelConnectNodes(mesh_t *mesh) dlong localNodeCount = mesh->Np*mesh->Nelements; mesh->globalIds = (hlong*) calloc(localNodeCount, sizeof(hlong)); - hlong ngv = nek_set_glo_num(mesh->N+1, 0); + hlong ngv = nek_set_glo_num(mesh->N+1, isTmesh); for(dlong id=0;idglobalIds[id] = nekData.glo_num[id]; } @@ -34,6 +35,7 @@ void meshNekParallelConnectNodes(mesh_t *mesh) void meshParallelConnectNodes(mesh_t *mesh){ if(!nrsBuildOnly) { + // hotfix as libP version seems to be broken meshNekParallelConnectNodes(mesh); return; } diff --git a/src/mesh/meshSetup.cpp b/src/mesh/meshSetup.cpp index 8630ac42b..7f5364fc2 100644 --- a/src/mesh/meshSetup.cpp +++ b/src/mesh/meshSetup.cpp @@ -2,6 +2,8 @@ #include "bcMap.hpp" #include "meshNekReader.hpp" +int isTmesh = 0; + void meshVOccaSetup3D(mesh_t *mesh, setupAide &options, occa::properties &kernelInfo); mesh_t *createMeshDummy(MPI_Comm comm, int N, setupAide &options, occa::properties& kernelInfo) { @@ -136,7 +138,7 @@ mesh_t *createMeshDummy(MPI_Comm comm, int N, setupAide &options, occa::properti libParanumal::meshSurfaceGeometricFactorsHex3D(mesh); // global nodes - libParanumal::meshParallelConnectNodes(mesh); + meshParallelConnectNodes(mesh); meshOccaSetup3D(mesh, options, kernelInfo); @@ -187,7 +189,9 @@ mesh_t *createMeshT(MPI_Comm comm, int N, int isMeshT, setupAide &options, occa: libParanumal::meshSurfaceGeometricFactorsHex3D(mesh); // global nodes - libParanumal::meshParallelConnectNodes(mesh); + isTmesh = 1; + meshParallelConnectNodes(mesh); + isTmesh = 0; bcMap::check(mesh); @@ -248,7 +252,7 @@ mesh_t *createMeshV(MPI_Comm comm, int N, mesh_t *meshT, setupAide &options, occ // uniquely label each node with a global index, used for gatherScatter // mesh->globalIds - libParanumal::meshParallelConnectNodes(mesh); + meshParallelConnectNodes(mesh); bcMap::check(mesh); meshVOccaSetup3D(mesh, options, kernelInfo); From 492e3e74603dcaf9cbd160208aebeeed71257d15 Mon Sep 17 00:00:00 2001 From: Stefan Date: Fri, 27 Mar 2020 15:01:24 -0500 Subject: [PATCH 13/22] add ktauChannel example --- examples/ktauChannel/channel.co2 | Bin 0 -> 2296 bytes examples/ktauChannel/channel.oudf | 18 +++++ examples/ktauChannel/channel.par | 43 +++++++++++ examples/ktauChannel/channel.re2 | Bin 0 -> 14276 bytes examples/ktauChannel/channel.udf | 75 +++++++++++++++++++ examples/ktauChannel/channel.usr | 115 ++++++++++++++++++++++++++++++ examples/ktauChannel/input.box | 22 ++++++ 7 files changed, 273 insertions(+) create mode 100644 examples/ktauChannel/channel.co2 create mode 100644 examples/ktauChannel/channel.oudf create mode 100644 examples/ktauChannel/channel.par create mode 100644 examples/ktauChannel/channel.re2 create mode 100644 examples/ktauChannel/channel.udf create mode 100644 examples/ktauChannel/channel.usr create mode 100644 examples/ktauChannel/input.box diff --git a/examples/ktauChannel/channel.co2 b/examples/ktauChannel/channel.co2 new file mode 100644 index 0000000000000000000000000000000000000000..e6b93cad0ad82f1cd52b696b1aca185af0dd92d2 GIT binary patch literal 2296 zcmb`G^>R~D7zcy9O9i{MxTZkURG`7#-BaA1;!t=)UWdQfow>h~2cR=MJ^i*hv$^-2 z?$K(srx^dKRmYb_>$<4_oB#LKpON2<36)Akmcb-g0TW?4Ooo-^-}+wA_kq3!`a1Yo z{b|sj1^pS&p9A;PKL-6%&_4nFGnfK;oAmChw@L3FQz5PX|Eh9si>`oakXC&^q_to@ z0Mj9@`ty)h{RNl-Y1Kc6wCZ2LOh~KVrnKs9npMtiv28V^)s8Nh4QbU6LIJkuhhPq* zReurEs=owtA+7qCkXHRGmUTq0^?P6;q*Z?z z(yG4#iy*D~*N|5I8(0i!)!UR-y-iEXxh+2PwUAby?>bltY1I!yTJ;TRgS6_eLRuTF zuR%McRsR;!s(%L^kXF4-Y1P}*SzT>odZa0^6TRfv%z-OFR{Xs~p{t)=w)2hD>Y1KEu@10it zCrE3P^=I%|q*ZTITJ<*Bl2-4?)^cu(_hB2@msb5@NUQ!xi?>BT0%_IX0Y8&g{TFD0 zE&8wE{%O_QlvaJ3(&~NO-ZJ;@Rm)jg^+zGC`eWelwCe9dTJ`s!2hytl25HrQhaHes zy-jJ=+qAQs+tLQ^o7Q&L?y(Efsy_~C)t`V~NUQ!nq*ea_YLHg_4@j$i4Ei9gdYjTZ z#@ePjq_u;!-yyA?tnKZGwCYbnTJ@)30Me>|2x-+nfsP = 0; + if(bc->scalarId == 0) bc->sP = 0; +} diff --git a/examples/ktauChannel/channel.par b/examples/ktauChannel/channel.par new file mode 100644 index 000000000..169c154fe --- /dev/null +++ b/examples/ktauChannel/channel.par @@ -0,0 +1,43 @@ +[OCCA] +backend = CUDA +deviceNumber = LOCAL-RANK + +[GENERAL] +polynomialOrder = 7 +startFrom = r.fld+time=0 +stopAt = endTime +endTime = 500 +dt = 2e-02 +timeStepper = tombo2 + +writeControl = runTime +writeInterval = 50 + +[PRESSURE] +preconditioner = semg_amg +residualTol = 1e-04 + +[VELOCITY] +boundaryTypeMap = wall, slipY +residualTol = 1e-06 +density = 1.0 +viscosity = -43500. + +[TEMPERATURE] +#solver = none +boundaryTypeMap = inlet, insulated +residualTol = 1e-06 +rhoCp = 1.0 +conductivity = -43500. + +[SCALAR01] # k +boundaryTypeMap = inlet, insulated +residualTol = 1e-06 +rho = 1.0 +diffusivity = -43500. + +[SCALAR02] # tau +boundaryTypeMap = inlet, insulated +residualTol = 1e-06 +rho = 1.0 +diffusivity = -43500. diff --git a/examples/ktauChannel/channel.re2 b/examples/ktauChannel/channel.re2 new file mode 100644 index 0000000000000000000000000000000000000000..0584fd052d81599a14d2267ba30e9f061eba9574 GIT binary patch literal 14276 zcmd7YF{>O^7{>7_wG*^3-37%`E+z_ExT}aF8WBXrQn0YLu->nsA3RfOCU>{E|C#4KXU_A^nP=zbl9#@}ynOAVeBQXcxVTx5zy0RB z@ni=6{`%MT5I#Tt@aVH$VJHuDjCJRqUwwXAAsy*RZ}YEJsb?I5zh~FQC)U2EdacLx z`NO&OIKJ&)U;21^rKN zf4b-D#=6X$pYxv2UOhTKe9mviIxEYu5m~+Xtj|#ye|OC9-H>;=u65UMGxJZkA9;?a z{^c9x{9O7tHRspgM=!^{`guK9r{?_n_d~v>IoR&KXWR9f+p<6R#(dw~*}Md@A>jRJU-{q z_fJ=!-&WYGTfa>?$Qb>5Ph;(?>>C}u&+GRS|M+zKSf2Bq&tAQ~531Qd(EqRB+s|b` zmgl^8zk79eyiNB#W1Zc8U&kEP?*Tc$f37~iuaJ&(tPWa7nSXlpC%@RwIe*9>(vglW z_qE1vMtv;LIe*9>(vgnUYwIZUPd`29{2_lxM>@9L*BZYW?Z@(*^N0K)9qCxTwvICY z^g4HG&iTRO_x|`kzrUhmtXBuEBYrd54|$evq$3@-U7!5plkH=9&iTXWla8_8+J#!n z_JMwS&iTXWla8@oy|#|{&FHyWo^$>%`lMs5x7^oS=AT|iF3oxLRele7F#h}e!7lH{ z#d>wnI^s8@{m64X?N7dO+x5vmKG{B&=bS%`KIs_itzD?KY#->S=bS%`KIs_i)obgB z-;AECw=AUjqmgoF9pHHigK6-yo@72$x zkMi6v&-uCZQGCRl9REI-XZ+&oi|Nl5(vgnVZ&RK#M*n^vNBKuOZg<~HnNK?Mk8S^| znSZ)Imgk&5+JUX zTEB8qzX$lia>cqH9b>&ZXdTt-W%H;0Ru{u?Y#+;W&L2jf zbd2@3f7N~;z4vvm&L2jfbc}UQ+wAvo>ie8Oj6UfY>uvw4{XWXbch-e*yzAura{Pbw z)A`S>%lQi<2Q2bqKR=z!H;#|T1}2Bo^`BXnVN2)py>z~QPn{p{17G_$VS4{w9Q!x^ zc)i~ISHk+Q&Rjo^kJsz1e;L-lHFNzqK3=c4{_A1=H)pON$H(jS)_*Ik z|MtxFJ-ukQQ`WxRXM?QahAFns;eJ1N~tT*Ow z^ZvZ`Jz14@%{|DoG^8ewy^@lh +#include "udf.hpp" +#include "plugins/RANSktau.hpp" + +/* User Functions */ + +static dfloat rho, mueLam; +occa::kernel userfKernel; + +void userf(ins_t *ins, dfloat time, occa::memory o_U, occa::memory o_FU) +{ + const dfloat Re_tau = 2000.0 + const dfloat Re_B = rho/mueLam; + const dfloat DPDX = (Re_tau/Re_B)*(Re_TAU/Re_B); + userfKernel(ins->Nlocal, 0*ins->fieldOffset, DPDX, o_FU); +} + +void userq(ins_t *ins, dfloat time, occa::memory o_S, occa::memory o_FS) +{ + mesh_t *mesh = ins->mesh; + cds_t *cds = ins->cds; + + RANSktau::updateSourceTerms(); +} + +void uservp(ins_t *ins, dfloat time, occa::memory o_U, occa::memory o_S, + occa::memory o_UProp, occa::memory o_SProp) +{ + mesh_t *mesh = ins->mesh; + cds_t *cds = ins->cds; + + RANSktau::updateProperties(); + + dfloat conductivity; + ins->options.getArgs("SCALAR00 DIFFUSIVITY", conductivity); + const dfloat Pr_t = 0.7; + occa::memory o_mue_t = RANSktau::o_mue_t(); + occa::memory o_temp_mue = cds->o_diff + 0*cds->fieldOffset*sizeof(dfloat); + ins->scalarScaledAddKernel(ins->Nlocal, mueLam, 1/Pr_t, o_mue_t, o_temp_mue); +} + +void UDF_LoadKernels(ins_t *ins) +{ + userfKernel = udfBuildKernel(ins, "cfill"); + RANSktau::buildKernel(ins); +} + +void UDF_Setup(ins_t *ins) +{ + mesh_t *mesh = ins->mesh; + cds_t *cds = ins->cds; + + // get IC from nek + if (!ins->readRestartFile) nek_copyTo(ins->startTime); + + udf.properties = &uservp; + udf.uEqnSource = &userf; + udf.sEqnSource = &userq; + + const int scalarFieldStart = 1; + ins->options.getArgs("VISCOSITY", mueLam); + ins->options.getArgs("DENSITY", rho); + RANSktau::setup(ins, mueLam, rho, scalarFieldStart); +} + +void UDF_ExecuteStep(ins_t *ins, dfloat time, int tstep) +{ + if (ins->isOutputStep) { + nek_ocopyFrom(time, tstep); + nek_userchk(); + } +} diff --git a/examples/ktauChannel/channel.usr b/examples/ktauChannel/channel.usr new file mode 100644 index 000000000..221058124 --- /dev/null +++ b/examples/ktauChannel/channel.usr @@ -0,0 +1,115 @@ +C +C USER SPECIFIED ROUTINES: +C +C - boundary conditions +C - initial conditions +C - variable properties +C - forcing function for fluid (f) +C - forcing function for passive scalar (q) +C - general purpose routine for checking errors etc. +C +c----------------------------------------------------------------------- + subroutine useric (ix,iy,iz,ieg) + include 'SIZE' + include 'TOTAL' + include 'NEKUSE' + + real kmax + + Re = 1/param(2) + kmax = 3.9201E-03 + omax = 0.5 + + ux = 1.0 + uy = 0.0 + uz = 0.0 + + yd = 1 + y + tau = 0.0 + if(yd.ne.0) then + omeg = omax + 6./Re/0.075/yd**2 + tau = 1/omeg + endif + + if (ifield.eq.2) then + temp = 1.0 + elseif (ifield.eq.3) then + temp = kmax + elseif (ifield.eq.4) then + temp = tau + endif + + return + end +c----------------------------------------------------------------------- + subroutine userchk + include 'SIZE' + include 'TOTAL' + +c ubar = glsc2(vx,bm1,nx1*ny1*nz1*nelt)/volvm1 +c if (nid.eq.0) write(6,*) 'ubar=', ubar + + return + end +c----------------------------------------------------------------------- + subroutine usrdat ! This routine to modify element vertices + include 'SIZE' ! _before_ mesh is generated, which + include 'TOTAL' ! guarantees GLL mapping of mesh. + + return + end +c----------------------------------------------------------------------- + subroutine usrdat2() ! This routine to modify mesh coordinates + include 'SIZE' + include 'TOTAL' + + parameter(BETAM = 1.8) + + call rescale_x(xm1, 0.0,8.0) + call rescale_x(ym1,-1.0,0.0) + call rescale_x(zm1, 0.0,1.0) + + ntot = nx1*ny1*nz1*nelt + + do i=1,ntot + ym1(i,1,1,1) = tanh(BETAM*ym1(i,1,1,1))/tanh(BETAM) + enddo + + do iel=1,nelt + cbc(5,iel,1) = 'P ' + cbc(6,iel,1) = 'P ' + do ifc=1,2*ndim + cbc(ifc,iel,2) = cbc(ifc,iel,1) + if (cbc(ifc,iel,1) .eq. 'W ') cbc(ifc,iel,2) = 't ' + if (cbc(ifc,iel,1) .eq. 'SYM') cbc(ifc,iel,2) = 'I ' + if (cbc(ifc,iel,1) .eq. 'W ') boundaryID(ifc,iel) = 1 + if (cbc(ifc,iel,1) .eq. 'SYM') boundaryID(ifc,iel) = 2 + enddo + enddo + + return + end +c----------------------------------------------------------------------- + subroutine usrdat3 + include 'SIZE' + include 'TOTAL' + + return + end +c----------------------------------------------------------------------- + subroutine usrsetvert(glo_num,nel,nx,ny,nz) ! to modify glo_num + integer*8 glo_num(1) + + ! kludge for periodic bc in z + nxy = nx*ny + nxyz = nx*ny*nz + do iel = 1,nel + ioff = nxyz*(iel-1) + do ixy = 1,nxy + glo_num(ioff + nxy*(nz-1) + ixy) = glo_num(ioff + ixy) + enddo + enddo + + return + end +c----------------------------------------------------------------------- diff --git a/examples/ktauChannel/input.box b/examples/ktauChannel/input.box new file mode 100644 index 000000000..850c6c7e1 --- /dev/null +++ b/examples/ktauChannel/input.box @@ -0,0 +1,22 @@ +base.rea +-3 spatial dimension ( < 0 --> generate .rea/.re2 pair) +1 number of fields +#======================================================================= +# +# Example of .box file for Taylor-Green +# +# If nelx (y or z) < 0, then genbox automatically generates the +# grid spacing in the x (y or z) direction +# with a geometric ratio given by "ratio". +# ( ratio=1 implies uniform spacing ) +# +# Note that the character bcs _must_ have 3 spaces. +# +#======================================================================= +# +Box +-5 -12 -1 nelx,nely,nelz for Box +0 8 1. x0,x1,gain (rescaled in usrdat) +0 1 1. y0,y1,gain (rescaled in usrdat) +0 1 1. z0,z1,gain +P ,P ,W ,SYM,E ,E bc's (3 chars each!) From 856817466f4bc53c72ecc95aaae463345e3c4405 Mon Sep 17 00:00:00 2001 From: Stefan Date: Fri, 27 Mar 2020 15:03:30 -0500 Subject: [PATCH 14/22] add missing files --- src/plugins/RANSktau.cpp | 201 +++++++++++++++++++++++++++++++++++++++ src/plugins/RANSktau.hpp | 13 +++ 2 files changed, 214 insertions(+) create mode 100644 src/plugins/RANSktau.cpp create mode 100644 src/plugins/RANSktau.hpp diff --git a/src/plugins/RANSktau.cpp b/src/plugins/RANSktau.cpp new file mode 100644 index 000000000..178a316cc --- /dev/null +++ b/src/plugins/RANSktau.cpp @@ -0,0 +1,201 @@ +#include +#include +#include "RANSktau.hpp" + +// private members +namespace { + static ins_t *ins; + + int kFieldIndex; + + dfloat rho; + dfloat mueLam; + + static occa::memory o_mut; + + static occa::memory o_k; + static occa::memory o_tau; + + static occa::kernel computeKernel; + static occa::kernel SijOijKernel; + static occa::kernel SijOijMag2Kernel; + static occa::kernel limitKernel; + static occa::kernel mueKernel; + + static bool setupCalled = 0; + + static dfloat coeff[] = { + 0.6, // sigma_k + 0.5, // sigma_tau + 1.0, // alpinf_str + 0.0708, // beta0 + 0.41, // kappa + 0.09, // betainf_str + 0.0, // sigd_min + 1.0/8.0, // sigd_max + 400.0, // fb_c1st + 400.0, // fb_c2st + 85.0, // fb_c1 + 100.0, // fb_c2 + 0.52, // alp_inf + 1e-8 // TINY + }; +} + + +void RANSktau::buildKernel(ins_t *ins) +{ + mesh_t *mesh = ins->mesh; + + occa::properties kernelInfo = *(ins->kernelInfo); + kernelInfo["defines/p_sigma_k"] = coeff[0]; + kernelInfo["defines/p_sigma_tau"] = coeff[1]; + kernelInfo["defines/p_alpinf_str"] = coeff[2]; + kernelInfo["defines/p_beta0"] = coeff[3]; + kernelInfo["defines/p_kappa"] = coeff[4]; + kernelInfo["defines/p_betainf_str"] = coeff[5]; + kernelInfo["defines/p_ibetainf_str3"] = 1/pow(coeff[5],3); + kernelInfo["defines/p_sigd_min"] = coeff[6]; + kernelInfo["defines/p_sigd_max"] = coeff[7]; + kernelInfo["defines/p_fb_c1st"] = coeff[8]; + kernelInfo["defines/p_fb_c2st"] = coeff[9]; + kernelInfo["defines/p_fb_c1"] = coeff[10]; + kernelInfo["defines/p_fb_c2"] = coeff[11]; + kernelInfo["defines/p_alp_inf"] = coeff[12]; + kernelInfo["defines/p_tiny"] = coeff[13]; + + string fileName; + int rank = mesh->rank; + fileName.assign(getenv("NEKRS_INSTALL_DIR")); + fileName += "/okl/plugins/RANSktau.okl"; + for (int r=0;r<2;r++){ + if ((r==0 && rank==0) || (r==1 && rank>0)) { + computeKernel = mesh->device.buildKernel(fileName.c_str(), "computeHex3D", kernelInfo); + SijOijKernel = mesh->device.buildKernel(fileName.c_str(), "SijOijHex3D", kernelInfo); + SijOijMag2Kernel = mesh->device.buildKernel(fileName.c_str(), "SijOijMag2", kernelInfo); + limitKernel = mesh->device.buildKernel(fileName.c_str(), "limit", kernelInfo); + mueKernel = mesh->device.buildKernel(fileName.c_str(), "mue", kernelInfo); + } + MPI_Barrier(mesh->comm); + } +} + +void RANSktau::updateProperties() +{ + mesh_t *mesh = ins->mesh; + cds_t *cds = ins->cds; + + occa::memory o_mue = ins->o_mue; + occa::memory o_diff = cds->o_diff + kFieldIndex*cds->fieldOffset*sizeof(dfloat); + + limitKernel(mesh->Nelements*mesh->Np, o_k, o_tau); + mueKernel(mesh->Nelements*mesh->Np, + ins->fieldOffset, + rho, + mueLam, + o_k, + o_tau, + o_mut, + o_mue, + o_diff); +} + +occa::memory RANSktau::o_mue_t() +{ + return o_mut; +} + +void RANSktau::updateSourceTerms() +{ + mesh_t *mesh = ins->mesh; + cds_t *cds = ins->cds; + + occa::memory o_OiOjSk = ins->o_wrk0; + occa::memory o_SijMag2 = ins->o_wrk1; + occa::memory o_SijOij = ins->o_wrk2; + + occa::memory o_FS = cds->o_FS + kFieldIndex*cds->fieldOffset*sizeof(dfloat); + occa::memory o_BFDiag = cds->o_BFDiag + kFieldIndex*cds->fieldOffset*sizeof(dfloat); + + const int NSOfields = 9; + SijOijKernel(mesh->Nelements, + ins->fieldOffset, + mesh->o_vgeo, + mesh->o_Dmatrices, + ins->o_U, + o_SijOij); + + ogsGatherScatterMany(o_SijOij, + NSOfields, + ins->fieldOffset, + ogsDfloat, + ogsAdd, + mesh->ogs); + + ins->invMassMatrixKernel( + mesh->Nelements, + ins->fieldOffset, + NSOfields, + mesh->o_vgeo, + ins->o_InvM, + o_SijOij); + + SijOijMag2Kernel(mesh->Nelements*mesh->Np, + ins->fieldOffset, + o_SijOij, + o_OiOjSk, + o_SijMag2); + + limitKernel(mesh->Nelements*mesh->Np, o_k, o_tau); + + computeKernel(mesh->Nelements, + ins->cds->fieldOffset, + rho, + mueLam, + mesh->o_vgeo, + mesh->o_Dmatrices, + o_k, + o_tau, + o_SijMag2, + o_OiOjSk, + o_BFDiag, + o_FS); +} + +void RANSktau::setup(ins_t *insIn, dfloat mueIn, dfloat rhoIn, int ifld) +{ + setup(insIn, mueIn, rhoIn, ifld, NULL); +} + +void RANSktau::setup(ins_t *insIn, dfloat mueIn, dfloat rhoIn, + int ifld, const dfloat *coeffIn) +{ + if(setupCalled) return; + + ins = insIn; + mueLam = mueIn; + rho = rhoIn; + kFieldIndex = ifld; + + cds_t *cds = ins->cds; + mesh_t *mesh = ins->mesh; + + if(ins->Nscalar < 2) { + if(mesh->rank == 0) cout << "RANSktau: Nscalar needs to be >= 2!\n"; + exit(1); + } + + if(coeffIn) memcpy(coeff, coeffIn, sizeof(coeff)); + + o_k = cds->o_S + kFieldIndex*cds->fieldOffset*sizeof(dfloat); + o_tau = cds->o_S + (kFieldIndex+1)*cds->fieldOffset*sizeof(dfloat); + + o_mut = mesh->device.malloc(cds->fieldOffset*sizeof(dfloat)); + + if(!cds->o_BFDiag.ptr()) { + cds->o_BFDiag = mesh->device.malloc(cds->NSfields*cds->fieldOffset*sizeof(dfloat)); + ins->setScalarKernel(cds->NSfields*cds->fieldOffset, 0.0, cds->o_BFDiag); + } + + setupCalled = 1; +} diff --git a/src/plugins/RANSktau.hpp b/src/plugins/RANSktau.hpp new file mode 100644 index 000000000..954dec3b6 --- /dev/null +++ b/src/plugins/RANSktau.hpp @@ -0,0 +1,13 @@ +#include +#include + +namespace RANSktau { + +void buildKernel(ins_t *ins); +void updateSourceTerms(); +void setup(ins_t *insIn, dfloat mue, dfloat rho, int startIndex); +void setup(ins_t *insIn, dfloat mue, dfloat rho, int startIndex, const dfloat *coeffIn); +void updateProperties(); +occa::memory o_mue_t(); + +} From e8e45a40c55af3378cad16e769368f6a988863d8 Mon Sep 17 00:00:00 2001 From: Stefan Kerkemeier Date: Sat, 28 Mar 2020 13:21:17 +0000 Subject: [PATCH 15/22] fix channel example --- CMakeLists.txt | 1 + examples/ktauChannel/channel.par | 2 +- examples/ktauChannel/channel.udf | 6 +++--- src/main.cpp | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bd32af54f..aa03ad517 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -331,6 +331,7 @@ set(SRC src/core/timer.cpp src/plugins/avg.cpp src/plugins/velRecycling.cpp + src/plugins/RANSktau.cpp ## To get fortran flags src/dummy.f ) diff --git a/examples/ktauChannel/channel.par b/examples/ktauChannel/channel.par index 169c154fe..5df55e4c9 100644 --- a/examples/ktauChannel/channel.par +++ b/examples/ktauChannel/channel.par @@ -4,7 +4,7 @@ deviceNumber = LOCAL-RANK [GENERAL] polynomialOrder = 7 -startFrom = r.fld+time=0 +#startFrom = r.fld+time=0 stopAt = endTime endTime = 500 dt = 2e-02 diff --git a/examples/ktauChannel/channel.udf b/examples/ktauChannel/channel.udf index 0d64ee09e..7351b89b7 100644 --- a/examples/ktauChannel/channel.udf +++ b/examples/ktauChannel/channel.udf @@ -12,9 +12,9 @@ occa::kernel userfKernel; void userf(ins_t *ins, dfloat time, occa::memory o_U, occa::memory o_FU) { - const dfloat Re_tau = 2000.0 - const dfloat Re_B = rho/mueLam; - const dfloat DPDX = (Re_tau/Re_B)*(Re_TAU/Re_B); + const dfloat Re_tau = 2000.0; + const dfloat Re_b = rho/mueLam; + const dfloat DPDX = (Re_tau/Re_b)*(Re_tau/Re_b); userfKernel(ins->Nlocal, 0*ins->fieldOffset, DPDX, o_FU); } diff --git a/src/main.cpp b/src/main.cpp index e5e3e3d39..22876bf01 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -148,6 +148,7 @@ int main(int argc, char **argv) } ++tStep; + printf("tdiff %.15e %.15e %.15e\n", time, finalTime, finalTime-time); } MPI_Pcontrol(0); From 562ce878f893b224e7c5869f34ac9d6e5a8b2288 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sat, 28 Mar 2020 08:19:14 -0500 Subject: [PATCH 16/22] add variable viscocity term to pressure rhs --- okl/core/insPressureRhsHex3D.okl | 97 +++++++++----------------------- src/core/ins.h | 4 +- src/core/insSetup.cpp | 11 ++-- src/core/parReader.cpp | 1 + src/core/tombo.cpp | 39 +++++++------ src/plugins/RANSktau.cpp | 2 + 6 files changed, 57 insertions(+), 97 deletions(-) diff --git a/okl/core/insPressureRhsHex3D.okl b/okl/core/insPressureRhsHex3D.okl index c7cf3c21a..44457fbb8 100644 --- a/okl/core/insPressureRhsHex3D.okl +++ b/okl/core/insPressureRhsHex3D.okl @@ -1,72 +1,31 @@ -/* - - The MIT License (MIT) - - Copyright (c) 2017 Tim Warburton, Noel Chalmers, Jesse Chan, Ali Karakus - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. - -*/ - -// F (o_rkU) = sum_j (alfa_j U_j)/dt- sum_j ( beta_j (NU_j + NC_j) ) -@kernel void insPressureRhsTOMBOHex3D(const dlong Nelements, - @restrict const dfloat * vgeo, - @restrict const dfloat * MM, - const dfloat idt, - const dfloat g0, - @restrict const dfloat * extbdfA, - @restrict const dfloat * extbdfB, - const dlong fieldOffset, - @restrict const dfloat * U, - @restrict const dfloat * BF, - @restrict const dfloat * NC, - @restrict const dfloat * FU, - @restrict dfloat * rhsU){ - - for(dlong eo=0;eopressureRhsKernel = mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); + fileName = oklpath + "insPressureStress" + suffix + ".okl"; + kernelName = "insPressureStress" + suffix; + ins->pressureStressKernel = + mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); + fileName = oklpath + "insPressureBC" + suffix + ".okl"; kernelName = "insPressureAddBCTOMBO" + suffix; ins->pressureAddBCKernel = @@ -678,12 +683,6 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) kernelName = "insPQ"; ins->pqKernel = mesh->device.buildKernel(fileName, kernelName, kernelInfo); - - fileName = oklpath + "insNC.okl"; - kernelName = "insNC"; - ins->ncKernel = - mesh->device.buildKernel(fileName, kernelName, kernelInfo); - } MPI_Barrier(mesh->comm); } diff --git a/src/core/parReader.cpp b/src/core/parReader.cpp index 88c72eddc..59de5b430 100644 --- a/src/core/parReader.cpp +++ b/src/core/parReader.cpp @@ -68,6 +68,7 @@ void setDefaultSettings(libParanumal::setupAide &options, string casename, int r options.setArgs("VELOCITY PRECONDITIONER", "JACOBI"); options.setArgs("VELOCITY DISCRETIZATION", "CONTINUOUS"); + options.setArgs("VARIABLE VISCOSITY", "FALSE"); options.setArgs("LOWMACH", "FALSE"); options.setArgs("ELLIPTIC INTEGRATION", "NODAL"); diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index ccfeb4bb7..bbbdf4036 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -35,6 +35,17 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->o_wrk0, ins->o_wrk3); + if(ins->options.compareArgs("VARIABLE VISCOSITY", "TRUE")) + ins->pressureStressKernel( + mesh->Nelements, + mesh->o_vgeo, + mesh->o_Dmatrices, + ins->fieldOffset, + ins->o_mue, + ins->o_Ue, + ins->o_div, + ins->o_wrk3); + ins->gradientVolumeKernel( mesh->Nelements, mesh->o_vgeo, @@ -42,31 +53,19 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->fieldOffset, ins->o_div, ins->o_wrk0); + occa::memory o_irho = ins->o_ellipticCoeff; - ins->ncKernel( - mesh->Np*mesh->Nelements, + ins->pressureRhsKernel( + mesh->Nelements*mesh->Np, ins->fieldOffset, ins->o_mue, o_irho, - ins->o_wrk0, - ins->o_wrk3); - - ins->pressureRhsKernel( - mesh->Nelements, - mesh->o_vgeo, - mesh->o_MM, - ins->idt, - ins->g0, - ins->o_extbdfA, - ins->o_extbdfB, - ins->fieldOffset, - ins->o_U, ins->o_BF, ins->o_wrk3, - ins->o_FU, - ins->o_wrk0); + ins->o_wrk0, + ins->o_wrk6); - ogsGatherScatterMany(ins->o_wrk0, ins->NVfields, ins->fieldOffset, + ogsGatherScatterMany(ins->o_wrk6, ins->NVfields, ins->fieldOffset, ogsDfloat, ogsAdd, mesh->ogs); ins->invMassMatrixKernel( @@ -75,14 +74,14 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->NVfields, mesh->o_vgeo, ins->o_InvM, - ins->o_wrk0); + ins->o_wrk6); ins->divergenceVolumeKernel( mesh->Nelements, mesh->o_vgeo, mesh->o_Dmatrices, ins->fieldOffset, - ins->o_wrk0, + ins->o_wrk6, ins->o_wrk3); const dfloat lambda = ins->g0*ins->idt; diff --git a/src/plugins/RANSktau.cpp b/src/plugins/RANSktau.cpp index 178a316cc..35fe10331 100644 --- a/src/plugins/RANSktau.cpp +++ b/src/plugins/RANSktau.cpp @@ -185,6 +185,8 @@ void RANSktau::setup(ins_t *insIn, dfloat mueIn, dfloat rhoIn, exit(1); } + options.setArgs("VARIABLE VISCOSITY", "TRUE"); + if(coeffIn) memcpy(coeff, coeffIn, sizeof(coeff)); o_k = cds->o_S + kFieldIndex*cds->fieldOffset*sizeof(dfloat); From 7fe3844029d6c4859342c703078345a7a7604716 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sat, 28 Mar 2020 09:50:56 -0500 Subject: [PATCH 17/22] fix --- src/main.cpp | 9 +++------ src/nekrs.cpp | 2 +- src/plugins/RANSktau.cpp | 13 ++++++------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 22876bf01..842357b84 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -129,12 +129,10 @@ int main(int argc, char **argv) double time = startTime; int tStep = 1; MPI_Pcontrol(1); - while (finalTime-time > 1e-10) { + while (finalTime-time >= nekrs::dt()) { - const double dt = nekrs::dt(); - - nekrs::runStep(time, dt, tStep); - time += dt; + nekrs::runStep(time, nekrs::dt(), tStep); + time += nekrs::dt(); int isOutputStep = 0; if (outputStep > 0) { @@ -148,7 +146,6 @@ int main(int argc, char **argv) } ++tStep; - printf("tdiff %.15e %.15e %.15e\n", time, finalTime, finalTime-time); } MPI_Pcontrol(0); diff --git a/src/nekrs.cpp b/src/nekrs.cpp index 02b22a9f5..f0426e09b 100644 --- a/src/nekrs.cpp +++ b/src/nekrs.cpp @@ -125,7 +125,7 @@ void setup(MPI_Comm comm_in, int buildOnly, int sizeTarget, if(rank == 0) { cout << "\nsettings:\n" << endl; if(!ins->options.compareArgs("VELOCITY SOLVER", "NONE")) - cout << ins->vOptions << endl; + cout << ins->options << endl; else if(ins->Nscalar) cout << ins->cds->options << endl; size_t dMB = ins->mesh->device.memoryAllocated() / 1e6; diff --git a/src/plugins/RANSktau.cpp b/src/plugins/RANSktau.cpp index 35fe10331..8c3c2514b 100644 --- a/src/plugins/RANSktau.cpp +++ b/src/plugins/RANSktau.cpp @@ -78,6 +78,12 @@ void RANSktau::buildKernel(ins_t *ins) } MPI_Barrier(mesh->comm); } + + if(ins->Nscalar < 2) { + if(mesh->rank == 0) cout << "RANSktau: Nscalar needs to be >= 2!\n"; + exit(1); + } + ins->options.setArgs("VARIABLE VISCOSITY", "TRUE"); } void RANSktau::updateProperties() @@ -180,13 +186,6 @@ void RANSktau::setup(ins_t *insIn, dfloat mueIn, dfloat rhoIn, cds_t *cds = ins->cds; mesh_t *mesh = ins->mesh; - if(ins->Nscalar < 2) { - if(mesh->rank == 0) cout << "RANSktau: Nscalar needs to be >= 2!\n"; - exit(1); - } - - options.setArgs("VARIABLE VISCOSITY", "TRUE"); - if(coeffIn) memcpy(coeff, coeffIn, sizeof(coeff)); o_k = cds->o_S + kFieldIndex*cds->fieldOffset*sizeof(dfloat); From fa16e82b1a472ab054e0d456fd53d4fd7147522b Mon Sep 17 00:00:00 2001 From: Stefan Date: Sat, 28 Mar 2020 10:13:17 -0500 Subject: [PATCH 18/22] add missing file --- okl/core/insPressureStressHex3D.okl | 121 ++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 okl/core/insPressureStressHex3D.okl diff --git a/okl/core/insPressureStressHex3D.okl b/okl/core/insPressureStressHex3D.okl new file mode 100644 index 000000000..65f5678ce --- /dev/null +++ b/okl/core/insPressureStressHex3D.okl @@ -0,0 +1,121 @@ +@kernel void insPressureStressHex3D(const dlong Nelements, + @restrict const dfloat * vgeo, + @restrict const dfloat * D, + const dlong offset, + @restrict const dfloat * MUE, + @restrict const dfloat * U, + @restrict const dfloat * DIV, + @restrict dfloat * OUT){ + + for(dlong e=0; e Date: Sat, 11 Apr 2020 05:37:00 -0500 Subject: [PATCH 19/22] remove unneccesary input arguments --- okl/core/insAxHex3D.okl | 1 - okl/core/insCurlHex3D.okl | 1 - okl/core/insDivergenceHex3D.okl | 2 -- src/core/tombo.cpp | 7 +------ 4 files changed, 1 insertion(+), 10 deletions(-) diff --git a/okl/core/insAxHex3D.okl b/okl/core/insAxHex3D.okl index 5788b63aa..c0a6a5375 100644 --- a/okl/core/insAxHex3D.okl +++ b/okl/core/insAxHex3D.okl @@ -30,7 +30,6 @@ @restrict const dfloat * ggeo, @restrict const dfloat * D, @restrict const dfloat * S, - @restrict const dfloat * MM, @restrict const dfloat * q, @restrict const dfloat * lambda, @restrict dfloat * Aq){ diff --git a/okl/core/insCurlHex3D.okl b/okl/core/insCurlHex3D.okl index 6e35ec0c0..4f98337f9 100644 --- a/okl/core/insCurlHex3D.okl +++ b/okl/core/insCurlHex3D.okl @@ -28,7 +28,6 @@ SOFTWARE. @kernel void insCurlHex3D(const dlong Nelements, @restrict const dfloat * vgeo, @restrict const dfloat * const D, - @restrict const dfloat * const M, const dlong offset, @restrict const dfloat * U, @restrict dfloat * W){ diff --git a/okl/core/insDivergenceHex3D.okl b/okl/core/insDivergenceHex3D.okl index 1f1dd64ef..52e8449b7 100644 --- a/okl/core/insDivergenceHex3D.okl +++ b/okl/core/insDivergenceHex3D.okl @@ -165,9 +165,7 @@ @kernel void insDivergenceSurfaceTOMBOHex3D(const dlong Nelements, @restrict const dfloat * vgeo, @restrict const dfloat * sgeo, - @restrict const dfloat * LIFTT, @restrict const dlong * vmapM, - @restrict const dlong * mapB, @restrict const int * EToBM, @restrict const int * EToB, const dfloat time, diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index bbbdf4036..6eea9c84c 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -10,7 +10,6 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->curlKernel(mesh->Nelements, mesh->o_vgeo, mesh->o_Dmatrices, - mesh->o_MM, ins->fieldOffset, ins->o_Ue, ins->o_wrk0); @@ -30,7 +29,6 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) mesh->Nelements, mesh->o_vgeo, mesh->o_Dmatrices, - mesh->o_MM, ins->fieldOffset, ins->o_wrk0, ins->o_wrk3); @@ -89,9 +87,7 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) mesh->Nelements, mesh->o_vgeo, mesh->o_sgeo, - mesh->o_LIFTT, mesh->o_vmapM, - mesh->o_vmapP, mesh->o_EToB, ins->o_EToB, time, @@ -101,7 +97,7 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) mesh->o_z, ins->fieldOffset, ins->o_usrwrk, - ins->o_wrk0, + ins->U, ins->o_wrk3); ins->AxKernel( @@ -110,7 +106,6 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) mesh->o_ggeo, mesh->o_Dmatrices, mesh->o_Smatrices, - mesh->o_MM, ins->o_P, ins->o_ellipticCoeff, ins->o_wrk3); From 3d606d642ad6745a100462ea8bd62b01738f0b07 Mon Sep 17 00:00:00 2001 From: Stefan Date: Mon, 20 Apr 2020 04:37:06 -0500 Subject: [PATCH 20/22] fix Nstep issue --- okl/plugins/RANSktau.okl | 319 +++++++++++++++++++++++++++++++++++++++ src/core/insSetup.cpp | 6 +- src/core/tombo.cpp | 2 +- src/main.cpp | 2 +- src/nekrs.cpp | 6 +- 5 files changed, 325 insertions(+), 10 deletions(-) create mode 100644 okl/plugins/RANSktau.okl diff --git a/okl/plugins/RANSktau.okl b/okl/plugins/RANSktau.okl new file mode 100644 index 000000000..3d1137e35 --- /dev/null +++ b/okl/plugins/RANSktau.okl @@ -0,0 +1,319 @@ +@kernel void computeHex3D(const dlong Nelements, + const dlong offset, + const dfloat rho, + const dfloat mue, + @restrict const dfloat * vgeo, + @restrict const dfloat * D, + @restrict const dfloat * K, + @restrict const dfloat * TAU, + @restrict const dfloat * STMAG2, + @restrict const dfloat * OIOJSK, + @restrict dfloat * SRCDIAG, + @restrict dfloat * SRC){ + + for(dlong e=0; e 0) itau = 1/tau; + + dfloat sigd = p_sigd_min; + dfloat f_beta_str = 1.0; + if (xk > 0) { + const dfloat xk3 = xk*xk*tau*tau; + sigd = p_sigd_max; + f_beta_str = (1.0 + p_fb_c1st*xk3)/(1.0 + p_fb_c2st*xk3); + } + + // compute source term for k + const dfloat Y_k = rho*p_betainf_str*f_beta_str*itau; + const dfloat kSrc = mu_t*stMag2; + const dfloat kDiag = Y_k; + + // compute rource term for omega + const dfloat x_w = abs(OiOjSk)*(tau*tau*tau*p_ibetainf_str3); + const dfloat f_b = (1.0 + p_fb_c1*x_w)/(1.0 + p_fb_c2*x_w); + dfloat tauSrc = rho*(p_beta0*f_b - sigd*xk*tau); + dfloat tauDiag = rho*tau*p_alp_inf*stMag2 + + 8.0*rho*p_alpinf_str*kk*xtq*p_sigma_tau; + + // apply correction + const dfloat S_tau = 8.0*mue*xtq; + if (tau <= p_tiny) + tauSrc -= S_tau; + else + tauDiag += S_tau*itau; + + SRC[id + 0*offset] = kSrc; + SRC[id + 1*offset] = tauSrc; + SRCDIAG[id + 0*offset] = kDiag; + SRCDIAG[id + 1*offset] = tauDiag; + } + } + } + } +} + +@kernel void SijOijHex3D(const dlong Nelements, + const dlong offset, + @restrict const dfloat * vgeo, + @restrict const dfloat * D, + @restrict const dfloat * U, + @restrict dfloat * SO){ + + for(dlong e=0; eelementType); int flow = 1; - if(ins->options.compareArgs("VELOCITY SOLVER", "NONE")) flow = 0; + if(options.compareArgs("VELOCITY SOLVER", "NONE")) flow = 0; ins->cht = 0; if (nekData.nelv != nekData.nelt && ins->Nscalar) ins->cht = 1; @@ -213,7 +213,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) ins->o_rho = ins->o_prop.slice(1*ins->fieldOffset*sizeof(dfloat)); ins->lowMach = 0; - if(ins->options.compareArgs("LOWMACH", "TRUE")) ins->lowMach = 1; + if(options.compareArgs("LOWMACH", "TRUE")) ins->lowMach = 1; ins->div = (dfloat*) calloc(ins->fieldOffset,sizeof(dfloat)); ins->o_div = mesh->device.malloc(ins->fieldOffset*sizeof(dfloat), ins->div); @@ -281,7 +281,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) options.getArgs("DATA FILE", boundaryHeaderFileName); kernelInfoBC["includes"] += realpath(boundaryHeaderFileName.c_str(), NULL); - if(ins->options.compareArgs("FILTER STABILIZATION", "RELAXATION")) + if(options.compareArgs("FILTER STABILIZATION", "RELAXATION")) filterSetup(ins); const int nbrBIDs = bcMap::size(); diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index 6eea9c84c..49f9b3e69 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -97,7 +97,7 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) mesh->o_z, ins->fieldOffset, ins->o_usrwrk, - ins->U, + ins->o_U, ins->o_wrk3); ins->AxKernel( diff --git a/src/main.cpp b/src/main.cpp index 842357b84..6feadd9f2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -129,7 +129,7 @@ int main(int argc, char **argv) double time = startTime; int tStep = 1; MPI_Pcontrol(1); - while (finalTime-time >= nekrs::dt()) { + while ((finalTime-time)/finalTime > 0.1*nekrs::dt()) { nekrs::runStep(time, nekrs::dt(), tStep); time += nekrs::dt(); diff --git a/src/nekrs.cpp b/src/nekrs.cpp index f0426e09b..5a93737a2 100644 --- a/src/nekrs.cpp +++ b/src/nekrs.cpp @@ -123,11 +123,7 @@ void setup(MPI_Comm comm_in, int buildOnly, int sizeTarget, timer::init(ins->mesh->comm, ins->mesh->device, 0); if(rank == 0) { - cout << "\nsettings:\n" << endl; - if(!ins->options.compareArgs("VELOCITY SOLVER", "NONE")) - cout << ins->options << endl; - else - if(ins->Nscalar) cout << ins->cds->options << endl; + cout << "\nsettings:\n" << endl << options << endl; size_t dMB = ins->mesh->device.memoryAllocated() / 1e6; cout << "device memory allocation: " << dMB << " MB" << endl; cout << "initialization took " << MPI_Wtime() - t0 << " seconds" << endl; From 95ba914e3a84f9d8da6a5a7b89282940c5c1af3c Mon Sep 17 00:00:00 2001 From: Stefan Date: Mon, 20 Apr 2020 04:45:13 -0500 Subject: [PATCH 21/22] change name --- src/core/ins.h | 2 +- src/core/insSetup.cpp | 2 +- src/core/tombo.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/core/ins.h b/src/core/ins.h index d5086179b..e987601b5 100644 --- a/src/core/ins.h +++ b/src/core/ins.h @@ -208,7 +208,7 @@ typedef struct { occa::kernel setEllipticCoeffPressureKernel; int TOMBO; - occa::kernel AxKernel; + occa::kernel pressureAxKernel; occa::kernel curlKernel; occa::kernel invMassMatrixKernel; occa::kernel massMatrixKernel; diff --git a/src/core/insSetup.cpp b/src/core/insSetup.cpp index 16e10654b..c7252b715 100644 --- a/src/core/insSetup.cpp +++ b/src/core/insSetup.cpp @@ -535,7 +535,7 @@ ins_t *insSetup(MPI_Comm comm, setupAide &options, int buildOnly) fileName = oklpath + "insAx" + suffix + ".okl"; kernelName = "insAx" + suffix; - ins->AxKernel = mesh->device.buildKernel(fileName, kernelName, kernelInfo); + ins->pressureAxKernel = mesh->device.buildKernel(fileName, kernelName, kernelInfo); fileName = oklpath + "insCurl" + suffix + ".okl"; kernelName = "insCurl" + suffix; diff --git a/src/core/tombo.cpp b/src/core/tombo.cpp index 49f9b3e69..90cc3337d 100644 --- a/src/core/tombo.cpp +++ b/src/core/tombo.cpp @@ -100,7 +100,7 @@ occa::memory pressureSolve(ins_t *ins, dfloat time) ins->o_U, ins->o_wrk3); - ins->AxKernel( + ins->pressureAxKernel( mesh->Nelements, ins->fieldOffset, mesh->o_ggeo, From 83f7130ce99f01d3fce15b7ea95e4dae4ed9fe8c Mon Sep 17 00:00:00 2001 From: Stefan Kerkemeier Date: Mon, 20 Apr 2020 11:55:15 +0000 Subject: [PATCH 22/22] change name --- okl/core/insNC.okl | 19 ------------------- ...{insAxHex3D.okl => insPressureAxHex3D.okl} | 2 +- src/core/insSetup.cpp | 4 ++-- 3 files changed, 3 insertions(+), 22 deletions(-) delete mode 100644 okl/core/insNC.okl rename okl/core/{insAxHex3D.okl => insPressureAxHex3D.okl} (98%) diff --git a/okl/core/insNC.okl b/okl/core/insNC.okl deleted file mode 100644 index d7de6d94f..000000000 --- a/okl/core/insNC.okl +++ /dev/null @@ -1,19 +0,0 @@ -@kernel void insNC(const dlong N, - const dlong offset, - @restrict const dfloat * MUE, - @restrict const dfloat * iRHO, - @restrict const dfloat * gQTL, - @restrict dfloat * NC) -{ - for(dlong n=0;nadvectionStrongCubatureVolumeKernel = mesh->device.buildKernel(fileName.c_str(), kernelName.c_str(), kernelInfo); - fileName = oklpath + "insAx" + suffix + ".okl"; - kernelName = "insAx" + suffix; + fileName = oklpath + "insPressureAx" + suffix + ".okl"; + kernelName = "insPressureAx" + suffix; ins->pressureAxKernel = mesh->device.buildKernel(fileName, kernelName, kernelInfo); fileName = oklpath + "insCurl" + suffix + ".okl";