Skip to content

Commit

Permalink
Merge pull request #144 from valassi/fpsingle
Browse files Browse the repository at this point in the history
[epoch1] Single precision: fix build failures, improve NaN determination
  • Loading branch information
valassi authored Mar 31, 2021
2 parents 44460dd + 69d8d8f commit 8fc6978
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 30 deletions.
102 changes: 81 additions & 21 deletions epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/check.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,64 @@ bool is_number(const char *s) {
return (int)strlen(s) == t - s;
}

// Disabling fast math is essential here, otherwise results are undefined
// See https://stackoverflow.com/a/40702790 about the __attribute__ syntax
__attribute__((optimize("-fno-fast-math")))
bool fp_is_abnormal( const fptype& fp )
{
if ( std::isnan( fp ) ) return true;
if ( fp != fp ) return true;
return false;
}

__attribute__((optimize("-fno-fast-math")))
bool fp_is_zero( const fptype& fp )
{
if ( fp == 0 ) return true;
return false;
}

// See https://en.cppreference.com/w/cpp/numeric/math/FP_categories
__attribute__((optimize("-fno-fast-math")))
const char* fp_show_class( const fptype& fp )
{
switch( std::fpclassify( fp ) ) {
case FP_INFINITE: return "Inf";
case FP_NAN: return "NaN";
case FP_NORMAL: return "normal";
case FP_SUBNORMAL: return "subnormal";
case FP_ZERO: return "zero";
default: return "unknown";
}
}

__attribute__((optimize("-fno-fast-math")))
void debug_me_is_abnormal( const fptype& me, int ievtALL )
{
std::cout << "DEBUG[" << ievtALL << "]"
<< " ME=" << me
<< " fpisabnormal=" << fp_is_abnormal( me )
<< " fpclass=" << fp_show_class( me )
<< " (me==me)=" << ( me == me )
<< " (me==me+1)=" << ( me == me+1 )
<< " isnan=" << std::isnan( me )
<< " isfinite=" << std::isfinite( me )
<< " isnormal=" << std::isnormal( me )
<< " is0=" << ( me == 0 )
<< " is1=" << ( me == 1 )
<< " abs(ME)=" << std::abs( me )
<< " isnan=" << std::isnan( std::abs( me ) )
<< std::endl;
}

int usage(char* argv0, int ret = 1) {
std::cout << "Usage: " << argv0
<< " [--verbose|-v] [--debug|-d] [--performance|-p] [--json|-j]"
<< " [#gpuBlocksPerGrid #gpuThreadsPerBlock] #iterations" << std::endl << std::endl;
std::cout << "The number of events per iteration is #gpuBlocksPerGrid * #gpuThreadsPerBlock" << std::endl;
std::cout << "(also in CPU/C++ code, where only the product of these two parameters counts)" << std::endl << std::endl;
std::cout << "Summary stats are always computed: '-p' and '-j' only control their printout" << std::endl;
std::cout << "The '-d' flag only enables nan warnings and OMP debugging" << std::endl;
std::cout << "The '-d' flag only enables NaN/abnormal warnings and OMP debugging" << std::endl;
#ifndef __CUDACC__
std::cout << std::endl << "Use the OMP_NUM_THREADS environment variable to control OMP multi-threading" << std::endl;
std::cout << "(OMP multithreading will be disabled if OMP_NUM_THREADS is not set)" << std::endl;
Expand Down Expand Up @@ -193,7 +243,7 @@ int main(int argc, char **argv)
struct CudaTearDown {
CudaTearDown(bool print) : _print(print) { }
~CudaTearDown() {
if ( _print ) std::cout << "Calling cudaDeviceReset()." << std::endl;
//if ( _print ) std::cout << "Calling cudaDeviceReset()." << std::endl;
checkCuda( cudaDeviceReset() ); // this is needed by cuda-memcheck --leak-check full
}
bool _print{false};
Expand Down Expand Up @@ -530,19 +580,30 @@ int main(int argc, char **argv)
double meanwtim = sumwtim / niter;
//double stdwtim = std::sqrt( sqswtim / niter - meanwtim * meanwtim );

int nnan = 0;
int nabn = 0;
int nzero = 0;
double minelem = matrixelementALL[0];
double maxelem = matrixelementALL[0];
double minweig = weightALL[0];
double maxweig = weightALL[0];
for ( int ievtALL = 0; ievtALL < nevtALL; ++ievtALL )
{
// The following events are abnormal in a run with "-p 2048 256 12 -d"
// - check.exe/commonrand: ME[310744,451171,3007871,3163868,4471038,5473927] with fast math
// - check.exe/curand: ME[578162,1725762,2163579,5407629,5435532,6014690] with fast math
// - gcheck.exe/curand: ME[596016,1446938] with fast math
// Debug NaN/abnormal issues
//if ( ievtALL == 310744 ) // this ME is abnormal both with and without fast math
// debug_me_is_abnormal( matrixelementALL[ievtALL], ievtALL );
//if ( ievtALL == 5473927 ) // this ME is abnormal only with fast math
// debug_me_is_abnormal( matrixelementALL[ievtALL], ievtALL );
// Compute min/max
if ( std::isnan( matrixelementALL[ievtALL] ) )
if ( fp_is_zero( matrixelementALL[ievtALL] ) ) nzero++;
if ( fp_is_abnormal( matrixelementALL[ievtALL] ) )
{
if ( debug ) // only printed out with "-p -d" (matrixelementALL is not filled without -p)
std::cout << "WARNING! ME[" << ievtALL << "} is nan" << std::endl;
nnan++;
std::cout << "WARNING! ME[" << ievtALL << "] is NaN/abnormal" << std::endl;
nabn++;
continue;
}
minelem = std::min( minelem, (double)matrixelementALL[ievtALL] );
Expand All @@ -555,23 +616,23 @@ int main(int argc, char **argv)
for ( int ievtALL = 0; ievtALL < nevtALL; ++ievtALL )
{
// Compute mean from the sum of diff to min
if ( std::isnan( matrixelementALL[ievtALL] ) ) continue;
if ( fp_is_abnormal( matrixelementALL[ievtALL] ) ) continue;
sumelemdiff += ( matrixelementALL[ievtALL] - minelem );
sumweigdiff += ( weightALL[ievtALL] - minweig );
}
double meanelem = minelem + sumelemdiff / ( nevtALL - nnan );
double meanweig = minweig + sumweigdiff / ( nevtALL - nnan );
double meanelem = minelem + sumelemdiff / ( nevtALL - nabn );
double meanweig = minweig + sumweigdiff / ( nevtALL - nabn );
double sqselemdiff = 0;
double sqsweigdiff = 0;
for ( int ievtALL = 0; ievtALL < nevtALL; ++ievtALL )
{
// Compute stddev from the squared sum of diff to mean
if ( std::isnan( matrixelementALL[ievtALL] ) ) continue;
if ( fp_is_abnormal( matrixelementALL[ievtALL] ) ) continue;
sqselemdiff += std::pow( matrixelementALL[ievtALL] - meanelem, 2 );
sqsweigdiff += std::pow( weightALL[ievtALL] - meanweig, 2 );
}
double stdelem = std::sqrt( sqselemdiff / ( nevtALL - nnan ) );
double stdweig = std::sqrt( sqsweigdiff / ( nevtALL - nnan ) );
double stdelem = std::sqrt( sqselemdiff / ( nevtALL - nabn ) );
double stdweig = std::sqrt( sqsweigdiff / ( nevtALL - nabn ) );

// === STEP 9 FINALISE
// --- 9a. Destroy curand generator
Expand Down Expand Up @@ -611,9 +672,9 @@ int main(int argc, char **argv)
<< "NumIterations = " << niter << std::endl
<< "-----------------------------------------------------------------------" << std::endl
#if defined MGONGPU_FPTYPE_DOUBLE
<< "FP precision = DOUBLE (nan=" << nnan << ")" << std::endl
<< "FP precision = DOUBLE (NaN/abnormal=" << nabn << ", zero=" << nzero << " )" << std::endl
#elif defined MGONGPU_FPTYPE_FLOAT
<< "FP precision = FLOAT (nan=" << nnan << ")" << std::endl
<< "FP precision = FLOAT (NaN/abnormal=" << nabn << ", zero=" << nzero << ")" << std::endl
#endif
#ifdef __CUDACC__
#if defined MGONGPU_CXTYPE_CUCOMPLEX
Expand Down Expand Up @@ -676,15 +737,15 @@ int main(int argc, char **argv)
<< std::string(16, ' ') << " ) sec^-1" << std::endl
<< std::defaultfloat; // default format: affects all floats
std::cout << "***********************************************************************" << std::endl
<< "NumMatrixElements(notNan) = " << nevtALL - nnan << std::endl
<< "NumMatrixElems(notAbnormal) = " << nevtALL - nabn << std::endl
<< std::scientific // fixed format: affects all floats (default precision: 6)
<< "MeanMatrixElemValue = ( " << meanelem
<< " +- " << stdelem/sqrt(nevtALL - nnan) << " ) GeV^" << meGeVexponent << std::endl // standard error
<< " +- " << stdelem/sqrt(nevtALL - nabn) << " ) GeV^" << meGeVexponent << std::endl // standard error
<< "[Min,Max]MatrixElemValue = [ " << minelem
<< " , " << maxelem << " ] GeV^" << meGeVexponent << std::endl
<< "StdDevMatrixElemValue = ( " << stdelem << std::string(16, ' ') << " ) GeV^" << meGeVexponent << std::endl
<< "MeanWeight = ( " << meanweig
<< " +- " << stdweig/sqrt(nevtALL - nnan) << " )" << std::endl // standard error
<< " +- " << stdweig/sqrt(nevtALL - nabn) << " )" << std::endl // standard error
<< "[Min,Max]Weight = [ " << minweig
<< " , " << maxweig << " ]" << std::endl
<< "StdDevWeight = ( " << stdweig << std::string(16, ' ') << " )" << std::endl
Expand Down Expand Up @@ -727,10 +788,9 @@ int main(int argc, char **argv)
<< "\"NumThreadsPerBlock\": " << gputhreads << ", " << std::endl
<< "\"NumBlocksPerGrid\": " << gpublocks << ", " << std::endl
#if defined MGONGPU_FPTYPE_DOUBLE
<< "\"FP precision\": "
<< "\"DOUBLE (nan=" << nnan << ")\"," << std::endl
<< "\"FP precision\": " << "\"DOUBLE (NaN/abnormal=" << nabn << ")\"," << std::endl
#elif defined MGONGPU_FPTYPE_FLOAT
<< "\"FP precision\": " << "FLOAT (nan=" << nnan << ")," << std::endl
<< "\"FP precision\": " << "\"FLOAT (NaN/abnormal=" << nabn << ")\"," << std::endl
#endif
<< "\"Complex type\": "
#ifdef __CUDACC__
Expand Down Expand Up @@ -797,7 +857,7 @@ int main(int argc, char **argv)
<< std::endl
<< "\"EvtsPerSec[MatrixElems] (3)\": \""
<< std::to_string(nevtALL/sumwtim) << " sec^-1\"," << std::endl
<< "\"NumMatrixElements(notNan)\": " << nevtALL - nnan << "," << std::endl
<< "\"NumMatrixElems(notAbnormal)\": " << nevtALL - nabn << "," << std::endl
<< std::scientific
<< "\"MeanMatrixElemValue\": "
<< "\"" << std::to_string(meanelem) << " GeV^"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@



struct CUDA_CPU_TestBase : public TestDriverBase<double> {
struct CUDA_CPU_TestBase : public TestDriverBase<fptype> {
static_assert( gputhreads%mgOnGpu::neppR == 0, "ERROR! #threads/block should be a multiple of neppR" );
static_assert( gputhreads%mgOnGpu::neppM == 0, "ERROR! #threads/block should be a multiple of neppM" );
static_assert( gputhreads <= mgOnGpu::ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" );
Expand Down Expand Up @@ -82,15 +82,15 @@ struct CPUTest : public CUDA_CPU_TestBase {



double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
fptype getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
assert(component < mgOnGpu::np4);
assert(particle < mgOnGpu::npar);
const auto page = evtNo / mgOnGpu::neppM; // #eventpage in this iteration
const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration
return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM];
};

double getMatrixElement(std::size_t evtNo) const override {
fptype getMatrixElement(std::size_t evtNo) const override {
return hstMEs[evtNo];
}
};
Expand Down Expand Up @@ -187,21 +187,23 @@ struct CUDATest : public CUDA_CPU_TestBase {
}


double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
fptype getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
assert(component < mgOnGpu::np4);
assert(particle < mgOnGpu::npar);
const auto page = evtNo / mgOnGpu::neppM; // #eventpage in this iteration
const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration
return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM];
};

double getMatrixElement(std::size_t evtNo) const override {
fptype getMatrixElement(std::size_t evtNo) const override {
return hstMEs[evtNo];
}
};
#endif


#if defined MGONGPU_FPTYPE_DOUBLE

#ifdef __CUDACC__
INSTANTIATE_TEST_SUITE_P(EP1_CUDA_GPU, MadgraphTestDouble,
testing::Values( [](){ return new CUDATest; } )
Expand All @@ -212,4 +214,9 @@ INSTANTIATE_TEST_SUITE_P(EP1_CUDA_CPU, MadgraphTestDouble,
);
#endif

#else

#warning runTest.cc has not been ported to single precision yet (issue #143)

#endif

Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,10 @@ int main(int argc, char **argv)
const std::string rngnKey = "1b GenRnGen";
timermap.start( rngnKey );
#ifdef MGONGPU_COMMONRAND_ONHOST
std::vector<double> commonRnd = commonRandomPromises[iiter].get_future().get();
std::vector<fptype> commonRnd = commonRandomPromises[iiter].get_future().get();
assert( nRnarray == static_cast<int>( commonRnd.size() ) );
// NB (PR #45): memcpy is strictly needed only in CUDA (copy to pinned memory), but keep it also in C++ for consistency
memcpy( hstRnarray.get(), commonRnd.data(), nRnarray * sizeof(hstRnarray[0]) );
memcpy( hstRnarray.get(), commonRnd.data(), nRnarray * sizeof(fptype) );
#elif defined __CUDACC__
#ifdef MGONGPU_CURAND_ONDEVICE
grambo2toNm0::generateRnarray( rnGen, devRnarray.get(), nevt );
Expand Down
4 changes: 2 additions & 2 deletions test/include/MadgraphTest.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ class TestDriverBase {
// ------------------------------------------------
// Interface for retrieving info from madgraph
// ------------------------------------------------
virtual double getMomentum(std::size_t evtNo, unsigned int particleNo, unsigned int component) const = 0;
virtual double getMatrixElement(std::size_t evtNo) const = 0;
virtual fptype getMomentum(std::size_t evtNo, unsigned int particleNo, unsigned int component) const = 0;
virtual fptype getMatrixElement(std::size_t evtNo) const = 0;


// ------------------------------------------------
Expand Down

0 comments on commit 8fc6978

Please sign in to comment.