From 8145b0e8ccfe41f46662e31c9cb5fce64ec130b9 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Fri, 11 Dec 2020 13:57:29 +0100 Subject: [PATCH 1/6] [tests eemm] Move common test code into test/eemumu. To run their own test, each epoch/abstraction implements a test driver interface, and instantiates a test suite defined in test/src. Refactor testing code, make it independent of CUDA and Epoch1. To run their own test, each epoch/abstraction implements a test driver interface, and instantiates a test suite defined in test/src. In more detail: - Move reference file for process 1 in eemumu to test/. - Split framework-specific parts and generic test code in epoch1. - Create base test class as general interface for madgraph runs. - Move googletest and common testing code into test/. - Instantiate epoch1 cuda test for eemumu. --- .gitignore | 2 +- epoch1/cuda/ee_mumu/SubProcesses/Makefile | 26 +- .../P1_Sigma_sm_epem_mupmum/runTest.cc | 341 ++++-------------- test/Makefile | 22 ++ .../eemumu}/dump_CPUTest.eemumu.txt | 1 + test/include/MadgraphTest.h | 110 ++++++ test/src/MadgraphTest.cc | 159 ++++++++ tools/Makefile | 16 - 8 files changed, 384 insertions(+), 293 deletions(-) create mode 100644 test/Makefile rename {epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum => test/eemumu}/dump_CPUTest.eemumu.txt (99%) create mode 100644 test/include/MadgraphTest.h create mode 100644 test/src/MadgraphTest.cc diff --git a/.gitignore b/.gitignore index b2c6044a89..9792d863d5 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,4 @@ profile_* ***/perf/params*.txt # googletest -tools/googletest +test/googletest diff --git a/epoch1/cuda/ee_mumu/SubProcesses/Makefile b/epoch1/cuda/ee_mumu/SubProcesses/Makefile index 63392ec925..9abb6cbc84 100644 --- a/epoch1/cuda/ee_mumu/SubProcesses/Makefile +++ b/epoch1/cuda/ee_mumu/SubProcesses/Makefile @@ -1,5 +1,6 @@ LIBDIR = ../../lib TOOLSDIR = ../../../../../tools +TESTDIR = ../../../../../test INCFLAGS = -I. -I../../src -I$(TOOLSDIR) MODELLIB = model_sm OPTFLAGS = -O3 @@ -47,7 +48,7 @@ else endif endif -GTESTLIBDIR = $(TOOLSDIR)/googletest/build/lib/ +GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/ GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a MAKEDEBUG= @@ -88,20 +89,23 @@ $(cxx_main): check.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) runTest.o: $(GTESTLIBS) runTest.exe: $(GTESTLIBS) -runTest.exe: INCFLAGS += -I$(TOOLSDIR)/googletest/googletest/include/ +runTest.exe: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include/ +runTest.exe: INCFLAGS += -I$(TESTDIR)/include/ runTest.exe: LIBFLAGS += -L$(GTESTLIBDIR)/ -lgtest -lgtest_main +runTest.exe: runTest.o $(TESTDIR)/src/MadgraphTest.o $(TESTDIR)/include/*.h +runTest.exe: cxx_objects += runTest.o $(TESTDIR)/src/MadgraphTest.o +runTest.exe: cu_objects += runTest_cu.o ifeq ($(NVCC),) -runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS) - $(CXX) -o $@ $(cxx_objects) runTest.o $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS) +runTest.exe: $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS) + $(CXX) -o $@ $(cxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS) else -runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS) - ln -sf runTest.cc runTest_tmp.cu - $(NVCC) -o $@ $(cxx_objects) runTest.o $(cu_objects) runTest_tmp.cu $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp - unlink runTest_tmp.cu +runTest.exe runTest_cu.o &: runTest.cc $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cu_objects) $(GTESTLIBS) + $(NVCC) -o runTest_cu.o -c -x cu runTest.cc $(CPPFLAGS) $(CUFLAGS) + $(NVCC) -o $@ $(cxx_objects) $(cu_objects) $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp endif $(GTESTLIBS): - $(MAKE) -C $(TOOLSDIR) + $(MAKE) -C $(TESTDIR) check: runTest.exe ./runTest.exe @@ -112,6 +116,10 @@ clean: make -C ../../src clean rm -f *.o *.exe +distclean: clean + make -C $(TOOLSDIR) clean + make -C $(TESTDIR) clean + memcheck: $(cu_main) /usr/local/cuda/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes ./gcheck.exe 2 32 2 diff --git a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc b/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc index 2aa32994a3..5bbf1b64e4 100644 --- a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc +++ b/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc @@ -1,6 +1,8 @@ #include "mgOnGpuConfig.h" #include "mgOnGpuTypes.h" +#include "MadgraphTest.h" + #include "CommonRandomNumbers.h" #include "CPPProcess.h" #include "Memory.h" @@ -10,111 +12,38 @@ #include "rambo.h" #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -struct ReferenceData { - std::vector< std::array, mgOnGpu::npar> > momenta; - std::vector MEs; -}; +struct CUDA_CPU_TestBase : public TestDriverBase { + static_assert( gputhreads%mgOnGpu::neppR == 0, "ERROR! #threads/block should be a multiple of neppR" ); + static_assert( gputhreads%mgOnGpu::neppM == 0, "ERROR! #threads/block should be a multiple of neppM" ); + static_assert( gputhreads <= mgOnGpu::ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" ); -std::map readReferenceData(const std::string& refFileName); + const std::size_t nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }; // (NB: ASA layout with nevt=npagR*neppR events per iteration) + const std::size_t nMomenta{ mgOnGpu::np4 * mgOnGpu::npar * nevt }; // (NB: nevt=npagM*neppM for ASA layouts) + const std::size_t nWeights{ nevt }; + const std::size_t nMEs { nevt }; -#ifndef __CUDACC__ -std::map readReferenceData(const std::string& refFileName) -{ - std::ifstream referenceFile(refFileName.c_str()); - EXPECT_TRUE(referenceFile.is_open()) << refFileName; - std::map referenceData; - unsigned int evtNo; - unsigned int batchNo; - - for (std::string line; std::getline(referenceFile, line); ) + CUDA_CPU_TestBase() : + TestDriverBase() { - std::stringstream lineStr(line); - if (line.empty()) - { - continue; - } - else if (line.find("Event") != std::string::npos) - { - std::string dummy; - lineStr >> dummy >> evtNo >> dummy >> batchNo; - } - else if (line.find("ME") != std::string::npos) - { - if (evtNo <= referenceData[batchNo].MEs.size()) - referenceData[batchNo].MEs.resize(evtNo + 1); - - std::string dummy; - lineStr >> dummy >> referenceData[batchNo].MEs[evtNo]; - } - else - { - unsigned int particleIndex; - lineStr >> particleIndex; - - if (evtNo <= referenceData[batchNo].momenta.size()) - referenceData[batchNo].momenta.resize(evtNo + 1); - - for (unsigned int i=0; i < mgOnGpu::np4; ++i) { - EXPECT_TRUE(lineStr.good()); - lineStr >> referenceData[batchNo].momenta[evtNo][particleIndex][i]; - } - EXPECT_TRUE(lineStr.eof()); - } + TestDriverBase::nparticle = mgOnGpu::npar; } - return referenceData; -} -#endif -class BaseTest : public ::testing::Test { - protected: - - static constexpr unsigned niter = 2; - static constexpr unsigned gpublocks = 2; - static constexpr unsigned gputhreads = 128; - static constexpr std::size_t nevt = gpublocks * gputhreads; - - const std::size_t nRnarray; // (NB: ASA layout with nevt=npagR*neppR events per iteration) - const std::size_t nMomenta; // (NB: nevt=npagM*neppM for ASA layouts) - const std::size_t nWeights; - const std::size_t nMEs; - - BaseTest() : - nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }, // (NB: ASA layout with nevt=npagR*neppR events per iteration) - nMomenta{ mgOnGpu::np4 * mgOnGpu::npar * nevt },// (NB: nevt=npagM*neppM for ASA layouts) - nWeights{ nevt }, - nMEs { nevt } - { } - - virtual void prepareRandomNumbers(int iiter) = 0; - virtual void prepareMomenta(fptype energy) = 0; - virtual void runSigmaKin(std::size_t iiter) = 0; }; #ifndef __CUDACC__ -struct CPUTest : public BaseTest { +struct CPUTest : public CUDA_CPU_TestBase { Proc::CPPProcess process; - unique_ptr_host hstRnarray; - unique_ptr_host hstMomenta; - unique_ptr_host hstIsGoodHel; - unique_ptr_host hstWeights; - unique_ptr_host hstMEs; + // --- 0b. Allocate memory structures + // Memory structures for random numbers, momenta, matrix elements and weights on host and device + unique_ptr_host hstRnarray { hstMakeUnique( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) + unique_ptr_host hstMomenta { hstMakeUnique( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp) + unique_ptr_host hstIsGoodHel{ hstMakeUnique( mgOnGpu::ncomb ) }; + unique_ptr_host hstWeights { hstMakeUnique( nWeights ) }; + unique_ptr_host hstMEs { hstMakeUnique( nMEs ) }; // Create a process object // Read param_card and set parameters @@ -122,23 +51,15 @@ struct CPUTest : public BaseTest { // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations. // Don't remove! CPUTest() : - BaseTest(), + CUDA_CPU_TestBase(), process(niter, gpublocks, gputhreads, /*verbose=*/false) { process.initProc("../../Cards/param_card.dat"); - - // --- 0b. Allocate memory structures - // Memory structures for random numbers, momenta, matrix elements and weights on host and device - hstRnarray = hstMakeUnique( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) - hstMomenta = hstMakeUnique( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp) - hstIsGoodHel = hstMakeUnique( mgOnGpu::ncomb ); - hstWeights = hstMakeUnique( nWeights ); // (previously was: meHostPtr) - hstMEs = hstMakeUnique( nMEs ); // (previously was: meHostPtr) } virtual ~CPUTest() { } - void prepareRandomNumbers(int iiter) override { + void prepareRandomNumbers(unsigned int iiter) override { std::vector rnd = CommonRandomNumbers::generate(nRnarray, 1337 + iiter); std::copy(rnd.begin(), rnd.end(), hstRnarray.get()); } @@ -158,11 +79,26 @@ struct CPUTest : public BaseTest { // --- 3a. SigmaKin Proc::sigmaKin(hstMomenta.get(), hstMEs.get(), nevt); } + + + + double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override { + assert(component < mgOnGpu::np4); + assert(particle < mgOnGpu::npar); + const auto page = evtNo / mgOnGpu::neppM; // #eventpage in this iteration + const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration + return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM]; + }; + + double getMatrixElement(std::size_t evtNo) const override { + return hstMEs[evtNo]; + } }; #endif + #ifdef __CUDACC__ -struct CUDATest : public BaseTest { +struct CUDATest : public CUDA_CPU_TestBase { // Reset the device when our test goes out of scope. Note that this should happen after // the frees, i.e. be declared before the pointers to device memory. struct DeviceReset { @@ -171,17 +107,20 @@ struct CUDATest : public BaseTest { } } deviceResetter; - unique_ptr_host hstRnarray; - unique_ptr_host hstMomenta; - unique_ptr_host hstIsGoodHel; - unique_ptr_host hstWeights; - unique_ptr_host hstMEs; + // --- 0b. Allocate memory structures + // Memory structures for random numbers, momenta, matrix elements and weights on host and device + unique_ptr_host hstRnarray { hstMakeUnique( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) + unique_ptr_host hstMomenta { hstMakeUnique( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp) + unique_ptr_host hstIsGoodHel{ hstMakeUnique( mgOnGpu::ncomb ) }; + unique_ptr_host hstWeights { hstMakeUnique( nWeights ) }; + unique_ptr_host hstMEs { hstMakeUnique( nMEs ) }; - unique_ptr_dev devRnarray; - unique_ptr_dev devMomenta; - unique_ptr_dev devIsGoodHel; - unique_ptr_dev devWeights; - unique_ptr_dev devMEs; + + unique_ptr_dev devRnarray { devMakeUnique( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) + unique_ptr_dev devMomenta { devMakeUnique( nMomenta ) }; // (previously was: allMomenta) + unique_ptr_dev devIsGoodHel{ devMakeUnique( mgOnGpu::ncomb ) }; + unique_ptr_dev devWeights { devMakeUnique( nWeights ) }; // (previously was: meDevPtr) + unique_ptr_dev devMEs { devMakeUnique( nMEs ) }; // (previously was: meDevPtr) gProc::CPPProcess process; @@ -191,31 +130,15 @@ struct CUDATest : public BaseTest { // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations. // Don't remove! CUDATest() : - BaseTest(), + CUDA_CPU_TestBase(), process(niter, gpublocks, gputhreads, /*verbose=*/false) { process.initProc("../../Cards/param_card.dat"); - - checkCuda( cudaFree( 0 ) ); // SLOW! - - // --- 0b. Allocate memory structures - // Memory structures for random numbers, momenta, matrix elements and weights on host and device - hstRnarray = hstMakeUnique( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) - hstMomenta = hstMakeUnique( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp) - hstIsGoodHel = hstMakeUnique( mgOnGpu::ncomb ); - hstWeights = hstMakeUnique( nWeights ); // (previously was: meHostPtr) - hstMEs = hstMakeUnique( nMEs ); // (previously was: meHostPtr) - - devRnarray = devMakeUnique( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) - devMomenta = devMakeUnique( nMomenta ); // (previously was: allMomenta) - devIsGoodHel = devMakeUnique( mgOnGpu::ncomb ); - devWeights = devMakeUnique( nWeights ); // (previously was: meDevPtr) - devMEs = devMakeUnique( nMEs ); // (previously was: meDevPtr) } - virtual ~CUDATest() { } - void prepareRandomNumbers(int iiter) override { + + void prepareRandomNumbers(unsigned int iiter) override { std::vector rnd = CommonRandomNumbers::generate(nRnarray, 1337 + iiter); std::copy(rnd.begin(), rnd.end(), hstRnarray.get()); checkCuda( cudaMemcpy( devRnarray.get(), hstRnarray.get(), nRnarray * sizeof(decltype(devRnarray)::element_type), cudaMemcpyHostToDevice ) ); @@ -237,6 +160,7 @@ struct CUDATest : public BaseTest { checkCuda( cudaMemcpy( hstMomenta.get(), devMomenta.get(), nMomenta * sizeof(decltype(hstMomenta)::element_type), cudaMemcpyDeviceToHost ) ); } + void runSigmaKin(std::size_t iiter) override { // --- 0d. SGoodHel if ( iiter == 0 ) @@ -262,147 +186,30 @@ struct CUDATest : public BaseTest { checkCuda( cudaMemcpy( hstMEs.get(), devMEs.get(), nMEs * sizeof(decltype(hstMEs)::element_type), cudaMemcpyDeviceToHost ) ); } + + double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override { + assert(component < mgOnGpu::np4); + assert(particle < mgOnGpu::npar); + const auto page = evtNo / mgOnGpu::neppM; // #eventpage in this iteration + const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration + return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM]; + }; + + double getMatrixElement(std::size_t evtNo) const override { + return hstMEs[evtNo]; + } }; #endif #ifdef __CUDACC__ -TEST_F(CUDATest, eemumu) +INSTANTIATE_TEST_SUITE_P(EP1_CUDA_GPU, MadgraphTestDouble, + testing::Values( [](){ return new CUDATest; } ) +); #else -TEST_F(CPUTest, eemumu) +INSTANTIATE_TEST_SUITE_P(EP1_CUDA_CPU, MadgraphTestDouble, + testing::Values([](){ return new CPUTest; }) +); #endif -{ - // Set to dump events: - constexpr bool dumpEvents = false; - constexpr fptype toleranceMomenta = std::is_same::value ? 5.E-12 : 1.E-5; - constexpr fptype toleranceMEs = std::is_same::value ? 1.E-7 : 1.E-5; - - const std::string dumpFileName = std::string("dump_") - + testing::UnitTest::GetInstance()->current_test_info()->test_suite_name() - + "." - + testing::UnitTest::GetInstance()->current_test_info()->name() - + ".txt"; - const std::string refFileName = "dump_CPUTest.eemumu.txt"; - - const int neppR = mgOnGpu::neppR; // ASA layout: constant at compile-time - static_assert( gputhreads%neppR == 0, "ERROR! #threads/block should be a multiple of neppR" ); - - const int neppM = mgOnGpu::neppM; // ASA layout: constant at compile-time - static_assert( gputhreads%neppM == 0, "ERROR! #threads/block should be a multiple of neppM" ); - - using mgOnGpu::ntpbMAX; - static_assert( gputhreads <= ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" ); - - std::ofstream dumpFile; - if ( dumpEvents ) - { - dumpFile.open(dumpFileName, std::ios::trunc); - } - - std::map referenceData = readReferenceData(refFileName); - ASSERT_FALSE(HasFailure()); // It doesn't make any sense to continue if we couldn't read the reference file. - constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) - - // ************************************** - // *** START MAIN LOOP ON #ITERATIONS *** - // ************************************** - - for (unsigned int iiter = 0; iiter < niter; ++iiter) - { - prepareRandomNumbers(iiter); - - prepareMomenta(energy); - - runSigmaKin(iiter); - - // --- Run checks on all events produced in this iteration - for (std::size_t ievt = 0; ievt < nevt && !HasFailure(); ++ievt) - { - auto getMomentum = [&](std::size_t evtNo, int particle, int component) - { - assert(component < mgOnGpu::np4); - assert(particle < mgOnGpu::npar); - const auto page = evtNo / neppM; // #eventpage in this iteration - const auto ieppM = evtNo % neppM; // #event in the current eventpage in this iteration - return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*neppM + particle * neppM*mgOnGpu::np4 + component * neppM + ieppM]; - }; - auto dumpParticles = [&](std::ostream& stream, unsigned precision, bool dumpReference) - { - const auto width = precision + 8; - for (int ipar = 0; ipar < mgOnGpu::npar; ipar++) - { - // NB: 'setw' affects only the next field (of any type) - stream << std::scientific // fixed format: affects all floats (default precision: 6) - << std::setprecision(precision) - << std::setw(4) << ipar - << std::setw(width) << getMomentum(ievt, ipar, 0) - << std::setw(width) << getMomentum(ievt, ipar, 1) - << std::setw(width) << getMomentum(ievt, ipar, 2) - << std::setw(width) << getMomentum(ievt, ipar, 3) - << "\n"; - if (dumpReference) { - stream << "ref" << ipar; - if (ievt < referenceData[iiter].momenta.size()) { - stream << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][0] - << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][1] - << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][2] - << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][3] - << "\n\n"; - } else { - stream << " --- No reference ---\n\n"; - } - } - stream << std::flush << std::defaultfloat; // default format: affects all floats - } - }; - - if (dumpEvents) { - ASSERT_TRUE(dumpFile.is_open()); - dumpFile << "Event " << std::setw(8) << ievt << " " - << "Batch " << std::setw(4) << iiter << "\n"; - dumpParticles(dumpFile, 15, false); - // Dump matrix element - dumpFile << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n" << std::endl << std::defaultfloat; - continue; - } - - ASSERT_GT(referenceData.size(), iiter) << "Don't have enough reference data for iteration " << iiter << ". Ref file:" << refFileName; - ASSERT_GT(referenceData[iiter].MEs.size(), ievt) << "Don't have enough reference events for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; - - // This trace will only be printed in case of failures: - std::stringstream eventTrace; - eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n"; - dumpParticles(eventTrace, 15, true); - eventTrace << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n" - << std::setw(4) << "r.ME" << std::scientific << std::setw(15+8) << referenceData[iiter].MEs[ievt] << std::endl << std::defaultfloat; - SCOPED_TRACE(eventTrace.str()); - - ASSERT_LT( ievt, referenceData[iiter].momenta.size() ) << "Don't have enough events in reference file #ref=" << referenceData[iiter].momenta.size(); - - - // Compare Momenta - for (unsigned int ipar = 0; ipar < mgOnGpu::npar; ++ipar) { - std::stringstream momentumErrors; - for (unsigned int icomp = 0; icomp < mgOnGpu::np4; ++icomp) { - const double pMadg = getMomentum(ievt, ipar, icomp); - const double pOrig = referenceData[iiter].momenta[ievt][ipar][icomp]; - const double relDelta = fabs( (pMadg - pOrig)/pOrig ); - if (relDelta > toleranceMomenta) { - momentumErrors << std::setprecision(15) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp - << "\n\t madGraph: " << std::setw(22) << pMadg - << "\n\t reference: " << std::setw(22) << pOrig - << "\n\t rel delta: " << std::setw(22) << relDelta << " exceeds tolerance of " << toleranceMomenta; - } - } - ASSERT_TRUE(momentumErrors.str().empty()) << momentumErrors.str(); - } - - // Compare ME: - EXPECT_NEAR(hstMEs[ievt], referenceData[iiter].MEs[ievt], toleranceMEs * referenceData[iiter].MEs[ievt]); - } - - - } -} diff --git a/test/Makefile b/test/Makefile new file mode 100644 index 0000000000..d65fd1f8ed --- /dev/null +++ b/test/Makefile @@ -0,0 +1,22 @@ +CXXFLAGS += -Igoogletest/googletest/include/ -std=c++11 + +all: gtest src/MadgraphTest.o + +%.o: %.cc include/*.h + $(CXX) $< -c -o $@ $(CPPFLAGS) $(CXXFLAGS) -Iinclude/ + +.PHONY: gtest + +googletest: + git clone https://github.com/google/googletest.git -b release-1.10.0 googletest + +googletest/build: googletest + mkdir -p $@ + cd googletest/build && cmake -DBUILD_GMOCK=OFF ../ + +gtest: googletest/build + $(MAKE) -C googletest/build + +clean: + rm -rf googletest + rm src/*.o diff --git a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/dump_CPUTest.eemumu.txt b/test/eemumu/dump_CPUTest.eemumu.txt similarity index 99% rename from epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/dump_CPUTest.eemumu.txt rename to test/eemumu/dump_CPUTest.eemumu.txt index db1a950a3d..b1e601a35d 100644 --- a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/dump_CPUTest.eemumu.txt +++ b/test/eemumu/dump_CPUTest.eemumu.txt @@ -1,3 +1,4 @@ +# Dumped in epoch1 cuda Event 0 Batch 0 0 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 7.500000000000000e+02 1 7.500000000000000e+02 0.000000000000000e+00 0.000000000000000e+00 -7.500000000000000e+02 diff --git a/test/include/MadgraphTest.h b/test/include/MadgraphTest.h new file mode 100644 index 0000000000..bc2a634b4e --- /dev/null +++ b/test/include/MadgraphTest.h @@ -0,0 +1,110 @@ +// Stephan Hageboeck, CERN, 12/2020 +#ifndef MADGRAPHTEST_H_ +#define MADGRAPHTEST_H_ + +#include +#include +#include +#include + +#include + +struct ReferenceData { + std::vector< std::vector> > momenta; + std::vector MEs; +}; + +std::map readReferenceData(const std::string& refFileName); + +/** + * Test driver providing a common interface for testing different implementations. + * Users need to implement: + * - Functions to retrieve matrix element and 4-momenta. These are used in the tests. + * - Driver functions that run the madgraph workflow. + * + * Usage: + * ``` + * class TestImplementation : public BaseTest { + * + * } + * + * TEST_F(TestImplementation, ) { + * + * } + */ +template +class TestDriverBase { + public: + using fptype = FpType; + unsigned int nparticle = 4; + static constexpr unsigned int np4 = 4; + static constexpr unsigned int niter = 2; + static constexpr unsigned int gpublocks = 2; + static constexpr unsigned int gputhreads = 128; + static constexpr unsigned int nevt = gpublocks * gputhreads; + + TestDriverBase() { } + virtual ~TestDriverBase() { } + + + // ------------------------------------------------ + // Interface for retrieving info from madgraph + // ------------------------------------------------ + virtual double getMomentum(std::size_t evtNo, unsigned int particleNo, unsigned int component) const = 0; + virtual double getMatrixElement(std::size_t evtNo) const = 0; + + + // ------------------------------------------------ + // Interface for steering madgraph run + // ------------------------------------------------ + virtual void prepareRandomNumbers(unsigned int iiter) = 0; + virtual void prepareMomenta(fptype energy) = 0; + virtual void runSigmaKin(std::size_t iiter) = 0; + + // Print the requested event into the stream. If the reference data has enough events, it will be printed as well. + void dumpParticles(std::ostream& stream, std::size_t ievt, unsigned int numParticles, unsigned int nDigit, const ReferenceData& referenceData) + { + const auto width = nDigit + 8; + for (unsigned int ipar = 0; ipar < numParticles; ipar++) + { + // NB: 'setw' affects only the next field (of any type) + stream << std::scientific // fixed format: affects all floats (default nDigit: 6) + << std::setprecision(nDigit) + << std::setw(4) << ipar + << std::setw(width) << getMomentum(ievt, ipar, 0) + << std::setw(width) << getMomentum(ievt, ipar, 1) + << std::setw(width) << getMomentum(ievt, ipar, 2) + << std::setw(width) << getMomentum(ievt, ipar, 3) + << "\n"; + if (ievt < referenceData.momenta.size()) { + stream << "ref" << ipar; + stream << std::setw(width) << referenceData.momenta[ievt][ipar][0] + << std::setw(width) << referenceData.momenta[ievt][ipar][1] + << std::setw(width) << referenceData.momenta[ievt][ipar][2] + << std::setw(width) << referenceData.momenta[ievt][ipar][3] + << "\n\n"; + } + stream << std::flush << std::defaultfloat; // default format: affects all floats + } + }; +}; + + +// Test class that's using the driver to run the test(s) below. +class MadgraphTestDouble : public testing::TestWithParam*()>> { +protected: + using fptype = double; + using TestDriver_t = TestDriverBase; + std::unique_ptr> testDriver; + + +public: + MadgraphTestDouble() : + TestWithParam(), + testDriver{ GetParam()() } + { } +}; + + + +#endif /* MADGRAPHTEST_H_ */ diff --git a/test/src/MadgraphTest.cc b/test/src/MadgraphTest.cc new file mode 100644 index 0000000000..4c39b042e0 --- /dev/null +++ b/test/src/MadgraphTest.cc @@ -0,0 +1,159 @@ +/* + * MadgraphTest.cc + * + * Created on: 11.12.2020 + * Author: shageboeck + */ + +#include "MadgraphTest.h" + +#include +#include +#include + +std::map readReferenceData(const std::string& refFileName) +{ + std::ifstream referenceFile(refFileName.c_str()); + EXPECT_TRUE(referenceFile.is_open()) << refFileName; + std::map referenceData; + unsigned int evtNo; + unsigned int batchNo; + + for (std::string line; std::getline(referenceFile, line); ) + { + std::stringstream lineStr(line); + if (line.empty() || line[0] == '#') + { + continue; + } + else if (line.find("Event") != std::string::npos) + { + std::string dummy; + lineStr >> dummy >> evtNo >> dummy >> batchNo; + } + else if (line.find("ME") != std::string::npos) + { + if (evtNo <= referenceData[batchNo].MEs.size()) + referenceData[batchNo].MEs.resize(evtNo + 1); + + std::string dummy; + lineStr >> dummy >> referenceData[batchNo].MEs[evtNo]; + } + else + { + unsigned int particleIndex; + lineStr >> particleIndex; + + if (evtNo <= referenceData[batchNo].momenta.size()) + referenceData[batchNo].momenta.resize(evtNo + 1); + if (particleIndex <= referenceData[batchNo].momenta[evtNo].size()) + referenceData[batchNo].momenta[evtNo].resize(particleIndex + 1); + + auto& fourVec = referenceData[batchNo].momenta[evtNo][particleIndex]; + for (unsigned int i=0; i < fourVec.size(); ++i) { + EXPECT_TRUE(lineStr.good()); + lineStr >> fourVec[i]; + } + EXPECT_TRUE(lineStr.eof()); + } + } + return referenceData; +} + + +TEST_P(MadgraphTestDouble, eemumu) +{ + // Set to dump events: + constexpr bool dumpEvents = false; + constexpr fptype toleranceMomenta = std::is_same::value ? 5.E-12 : 1.E-5; + constexpr fptype toleranceMEs = std::is_same::value ? 1.E-7 : 1.E-5; + constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) + + std::string dumpFileName = std::string("dump_") + + testing::UnitTest::GetInstance()->current_test_info()->name() + + ".txt"; + while (dumpFileName.find('/') != std::string::npos) { + dumpFileName.replace(dumpFileName.find('/'), 1, "_"); + } + const std::string refFileName = "../../../../../test/eemumu/dump_CPUTest.eemumu.txt"; + + std::ofstream dumpFile; + if ( dumpEvents ) + { + dumpFile.open(dumpFileName, std::ios::trunc); + } + + // Read reference data + std::map referenceData = readReferenceData(refFileName); + ASSERT_FALSE(HasFailure()); // It doesn't make any sense to continue if we couldn't read the reference file. + + + // ************************************** + // *** START MAIN LOOP ON #ITERATIONS *** + // ************************************** + for (unsigned int iiter = 0; iiter < testDriver->niter; ++iiter) + { + testDriver->prepareRandomNumbers(iiter); + + testDriver->prepareMomenta(energy); + + testDriver->runSigmaKin(iiter); + + // --- Run checks on all events produced in this iteration + for (std::size_t ievt = 0; ievt < testDriver->nevt && !HasFailure(); ++ievt) + { + if (dumpEvents) { + ASSERT_TRUE(dumpFile.is_open()) << dumpFileName; + dumpFile << "Event " << std::setw(8) << ievt << " " + << "Batch " << std::setw(4) << iiter << "\n"; + testDriver->dumpParticles(dumpFile, ievt, testDriver->nparticle, 15, ReferenceData()); + // Dump matrix element + dumpFile << std::setw(4) << "ME" << std::scientific << std::setw(15+8) + << testDriver->getMatrixElement(ievt) << "\n" << std::endl << std::defaultfloat; + continue; + } + + + // Check that we have the required reference data + ASSERT_GT(referenceData.size(), iiter) << "Don't have enough reference data for iteration " << iiter << ". Ref file:" << refFileName; + ASSERT_GT(referenceData[iiter].MEs.size(), ievt) << "Don't have enough reference MEs for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; + ASSERT_GT(referenceData[iiter].momenta.size(), ievt) << "Don't have enough reference momenta for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; + ASSERT_GE(referenceData[iiter].momenta[ievt].size(), testDriver->nparticle) << "Don't have enough reference particles for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName; + + + // This trace will help to understand the event that is being checked. + // It will only be printed in case of failures: + std::stringstream eventTrace; + eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n"; + testDriver->dumpParticles(eventTrace, ievt, testDriver->nparticle, 15, referenceData[iiter]); + eventTrace << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << testDriver->getMatrixElement(ievt) << "\n" + << std::setw(4) << "r.ME" << std::scientific << std::setw(15+8) << referenceData[iiter].MEs[ievt] << std::endl << std::defaultfloat; + SCOPED_TRACE(eventTrace.str()); + + + // Compare Momenta + for (unsigned int ipar = 0; ipar < testDriver->nparticle; ++ipar) { + std::stringstream momentumErrors; + for (unsigned int icomp = 0; icomp < testDriver->np4; ++icomp) { + const double pMadg = testDriver->getMomentum(ievt, ipar, icomp); + const double pOrig = referenceData[iiter].momenta[ievt][ipar][icomp]; + const double relDelta = fabs( (pMadg - pOrig)/pOrig ); + if (relDelta > toleranceMomenta) { + momentumErrors << std::setprecision(15) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp + << "\n\t madGraph: " << std::setw(22) << pMadg + << "\n\t reference: " << std::setw(22) << pOrig + << "\n\t rel delta: " << std::setw(22) << relDelta << " exceeds tolerance of " << toleranceMomenta; + } + } + ASSERT_TRUE(momentumErrors.str().empty()) << momentumErrors.str(); + } + + + // Compare ME: + EXPECT_NEAR(testDriver->getMatrixElement(ievt), + referenceData[iiter].MEs[ievt], + toleranceMEs * referenceData[iiter].MEs[ievt]); + } + } +} + diff --git a/tools/Makefile b/tools/Makefile index 762e36e6a5..bc328fcee3 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,18 +1,2 @@ -# Set up tools, e.g. googletest. - -.PHONY: gtest - -all: gtest - -googletest: - git clone https://github.com/google/googletest.git -b release-1.10.0 googletest - -googletest/build: googletest - mkdir -p $@ - cd googletest/build && cmake -DBUILD_GMOCK=OFF ../ - -gtest: googletest/build - $(MAKE) -C googletest/build - clean: rm -rf googletest From 6afa818ff368ff527b199d4ef5ea0300bf55e26e Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Wed, 16 Dec 2020 16:57:44 +0100 Subject: [PATCH 2/6] [tests] Reduce matrix element test threshold. To port tests from ep1 to ep2, the threshold for comparing matrix elements has to be raised to 1.E-6. This is presumably because of reordered floating-point computations. --- test/src/MadgraphTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/src/MadgraphTest.cc b/test/src/MadgraphTest.cc index 4c39b042e0..6a78493c1a 100644 --- a/test/src/MadgraphTest.cc +++ b/test/src/MadgraphTest.cc @@ -66,7 +66,7 @@ TEST_P(MadgraphTestDouble, eemumu) // Set to dump events: constexpr bool dumpEvents = false; constexpr fptype toleranceMomenta = std::is_same::value ? 5.E-12 : 1.E-5; - constexpr fptype toleranceMEs = std::is_same::value ? 1.E-7 : 1.E-5; + constexpr fptype toleranceMEs = std::is_same::value ? 1.E-6 : 1.E-5; constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) std::string dumpFileName = std::string("dump_") From 597ef64ea06b28a26a994c5224b8ae8e2368d140 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Wed, 16 Dec 2020 17:05:24 +0100 Subject: [PATCH 3/6] [ep2 cuda eemm] Port CUDA tests to epoch2. --- epoch2/cuda/ee_mumu/SubProcesses/Makefile | 26 +- epoch2/cuda/ee_mumu/SubProcesses/runTest.cc | 333 +++++--------------- 2 files changed, 91 insertions(+), 268 deletions(-) diff --git a/epoch2/cuda/ee_mumu/SubProcesses/Makefile b/epoch2/cuda/ee_mumu/SubProcesses/Makefile index 2ebef6abac..598dc30d2d 100644 --- a/epoch2/cuda/ee_mumu/SubProcesses/Makefile +++ b/epoch2/cuda/ee_mumu/SubProcesses/Makefile @@ -1,5 +1,6 @@ LIBDIR = ../../lib TOOLSDIR = ../../../../../tools/ +TESTDIR = ../../../../../test INCFLAGS = -I. -I../../src -I$(TOOLSDIR) MODELLIB = model_sm OPTFLAGS = -O3 @@ -38,7 +39,7 @@ else endif endif -GTESTLIBDIR = $(TOOLSDIR)/googletest/build/lib/ +GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/ GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a MAKEDEBUG= @@ -80,20 +81,23 @@ $(cxx_main): check_sa.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) runTest.o: $(GTESTLIBS) runTest.exe: $(GTESTLIBS) -runTest.exe: INCFLAGS += -I$(TOOLSDIR)/googletest/googletest/include/ +runTest.exe: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include/ +runTest.exe: INCFLAGS += -I$(TESTDIR)/include/ runTest.exe: LIBFLAGS += -L$(GTESTLIBDIR)/ -lgtest -lgtest_main +runTest.exe: runTest.o $(TESTDIR)/src/MadgraphTest.o $(TESTDIR)/include/*.h +runTest.exe: cxx_objects += runTest.o $(TESTDIR)/src/MadgraphTest.o +runTest.exe: cu_objects += runTest_cu.o ifeq ($(NVCC),) -runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS) - $(CXX) -o $@ $(cxx_objects) runTest.o $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS) +runTest.exe: $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS) + $(CXX) -o $@ $(cxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS) else -runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS) - ln -sf runTest.cc runTest_tmp.cu - $(NVCC) -o $@ $(cxx_objects) runTest.o $(cu_objects) runTest_tmp.cu $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda - unlink runTest_tmp.cu +runTest.exe runTest_cu.o &: runTest.cc $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cu_objects) $(GTESTLIBS) + $(NVCC) -o runTest_cu.o -c -x cu runTest.cc $(CPPFLAGS) $(CUFLAGS) + $(NVCC) -o $@ $(cxx_objects) $(cu_objects) $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp endif $(GTESTLIBS): - $(MAKE) -C $(TOOLSDIR) + $(MAKE) -C $(TESTDIR) check: runTest.exe ./runTest.exe @@ -104,6 +108,10 @@ clean: cd ../../src && make clean rm -f *.o *.exe +distclean: clean + make -C $(TOOLSDIR) clean + make -C $(TESTDIR) clean + memcheck: $(cu_main) /usr/local/cuda/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes ./gcheck.exe 2 32 2 diff --git a/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc b/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc index 8f3e4c6024..dbde364896 100644 --- a/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc +++ b/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc @@ -1,6 +1,8 @@ #include "mgOnGpuConfig.h" #include "mgOnGpuTypes.h" +#include "MadgraphTest.h" + #include "CommonRandomNumbers.h" #include "gCPPProcess.h" #include "Memory.h" @@ -10,110 +12,38 @@ #include "rambo.h" #endif -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -struct ReferenceData { - std::vector< std::array, mgOnGpu::npar> > momenta; - std::vector MEs; -}; +struct CUDA_CPU_TestBase : public TestDriverBase { + static_assert( gputhreads%mgOnGpu::neppR == 0, "ERROR! #threads/block should be a multiple of neppR" ); + static_assert( gputhreads%mgOnGpu::neppM == 0, "ERROR! #threads/block should be a multiple of neppM" ); + static_assert( gputhreads <= mgOnGpu::ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" ); -std::map readReferenceData(const std::string& refFileName); + const std::size_t nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }; // (NB: ASA layout with nevt=npagR*neppR events per iteration) + const std::size_t nMomenta{ mgOnGpu::np4 * mgOnGpu::npar * nevt }; // (NB: nevt=npagM*neppM for ASA layouts) + const std::size_t nWeights{ nevt }; + const std::size_t nMEs { nevt }; -#ifndef __CUDACC__ -std::map readReferenceData(const std::string& refFileName) -{ - std::ifstream referenceFile(refFileName.c_str()); - EXPECT_TRUE(referenceFile.is_open()) << refFileName; - std::map referenceData; - unsigned int evtNo; - unsigned int batchNo; - - for (std::string line; std::getline(referenceFile, line); ) + CUDA_CPU_TestBase() : + TestDriverBase() { - std::stringstream lineStr(line); - if (line.empty()) - { - continue; - } - else if (line.find("Event") != std::string::npos) - { - std::string dummy; - lineStr >> dummy >> evtNo >> dummy >> batchNo; - } - else if (line.find("ME") != std::string::npos) - { - if (evtNo <= referenceData[batchNo].MEs.size()) - referenceData[batchNo].MEs.resize(evtNo + 1); - - std::string dummy; - lineStr >> dummy >> referenceData[batchNo].MEs[evtNo]; - } - else - { - unsigned int particleIndex; - lineStr >> particleIndex; - - if (evtNo <= referenceData[batchNo].momenta.size()) - referenceData[batchNo].momenta.resize(evtNo + 1); - - for (unsigned int i=0; i < mgOnGpu::np4; ++i) { - EXPECT_TRUE(lineStr.good()); - lineStr >> referenceData[batchNo].momenta[evtNo][particleIndex][i]; - } - EXPECT_TRUE(lineStr.eof()); - } + TestDriverBase::nparticle = mgOnGpu::npar; } - return referenceData; -} -#endif -class BaseTest : public ::testing::Test { - protected: - - static constexpr unsigned niter = 2; - static constexpr unsigned gpublocks = 2; - static constexpr unsigned gputhreads = 32; - static constexpr std::size_t nevt = gpublocks * gputhreads; - - const std::size_t nRnarray; // (NB: ASA layout with nevt=npagR*neppR events per iteration) - const std::size_t nMomenta; // (NB: nevt=npagM*neppM for ASA layouts) - const std::size_t nWeights; - const std::size_t nMEs; - - BaseTest() : - nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }, // (NB: ASA layout with nevt=npagR*neppR events per iteration) - nMomenta{ mgOnGpu::np4 * mgOnGpu::npar * nevt },// (NB: nevt=npagM*neppM for ASA layouts) - nWeights{ nevt }, - nMEs { nevt } - { } - - virtual void prepareRandomNumbers(int iiter) = 0; - virtual void prepareMomenta(fptype energy) = 0; - virtual void runSigmaKin(std::size_t iiter) = 0; }; #ifndef __CUDACC__ -struct CPUTest : public BaseTest { +struct CPUTest : public CUDA_CPU_TestBase { Proc::CPPProcess process; - unique_ptr_host hstRnarray; - unique_ptr_host hstMomenta; - unique_ptr_host hstIsGoodHel; - unique_ptr_host hstWeights; - unique_ptr_host hstMEs; + // --- 0b. Allocate memory structures + // Memory structures for random numbers, momenta, matrix elements and weights on host and device + unique_ptr_host hstRnarray { hstMakeUnique( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) + unique_ptr_host hstMomenta { hstMakeUnique( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp) + unique_ptr_host hstIsGoodHel{ hstMakeUnique( mgOnGpu::ncomb ) }; + unique_ptr_host hstWeights { hstMakeUnique( nWeights ) }; + unique_ptr_host hstMEs { hstMakeUnique( nMEs ) }; // Create a process object // Read param_card and set parameters @@ -121,23 +51,15 @@ struct CPUTest : public BaseTest { // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations. // Don't remove! CPUTest() : - BaseTest(), + CUDA_CPU_TestBase(), process(niter, gpublocks, gputhreads, /*verbose=*/false) { process.initProc("../../Cards/param_card.dat"); - - // --- 0b. Allocate memory structures - // Memory structures for random numbers, momenta, matrix elements and weights on host and device - hstRnarray = hstMakeUnique( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) - hstMomenta = hstMakeUnique( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp) - hstIsGoodHel = hstMakeUnique( mgOnGpu::ncomb ); - hstWeights = hstMakeUnique( nWeights ); // (previously was: meHostPtr) - hstMEs = hstMakeUnique( nMEs ); // (previously was: meHostPtr) } virtual ~CPUTest() { } - void prepareRandomNumbers(int iiter) override { + void prepareRandomNumbers(unsigned int iiter) override { std::vector rnd = CommonRandomNumbers::generate(nRnarray, 1337 + iiter); std::copy(rnd.begin(), rnd.end(), hstRnarray.get()); } @@ -157,11 +79,26 @@ struct CPUTest : public BaseTest { // --- 3a. SigmaKin Proc::sigmaKin(hstMomenta.get(), hstMEs.get(), nevt); } + + + + double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override { + assert(component < mgOnGpu::np4); + assert(particle < mgOnGpu::npar); + const auto page = evtNo / mgOnGpu::neppM; // #eventpage in this iteration + const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration + return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM]; + }; + + double getMatrixElement(std::size_t evtNo) const override { + return hstMEs[evtNo]; + } }; #endif + #ifdef __CUDACC__ -struct CUDATest : public BaseTest { +struct CUDATest : public CUDA_CPU_TestBase { // Reset the device when our test goes out of scope. Note that this should happen after // the frees, i.e. be declared before the pointers to device memory. struct DeviceReset { @@ -170,17 +107,20 @@ struct CUDATest : public BaseTest { } } deviceResetter; - unique_ptr_host hstRnarray; - unique_ptr_host hstMomenta; - unique_ptr_host hstIsGoodHel; - unique_ptr_host hstWeights; - unique_ptr_host hstMEs; + // --- 0b. Allocate memory structures + // Memory structures for random numbers, momenta, matrix elements and weights on host and device + unique_ptr_host hstRnarray { hstMakeUnique( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) + unique_ptr_host hstMomenta { hstMakeUnique( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp) + unique_ptr_host hstIsGoodHel{ hstMakeUnique( mgOnGpu::ncomb ) }; + unique_ptr_host hstWeights { hstMakeUnique( nWeights ) }; + unique_ptr_host hstMEs { hstMakeUnique( nMEs ) }; + - unique_ptr_dev devRnarray; - unique_ptr_dev devMomenta; - unique_ptr_dev devIsGoodHel; - unique_ptr_dev devWeights; - unique_ptr_dev devMEs; + unique_ptr_dev devRnarray { devMakeUnique( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) + unique_ptr_dev devMomenta { devMakeUnique( nMomenta ) }; // (previously was: allMomenta) + unique_ptr_dev devIsGoodHel{ devMakeUnique( mgOnGpu::ncomb ) }; + unique_ptr_dev devWeights { devMakeUnique( nWeights ) }; // (previously was: meDevPtr) + unique_ptr_dev devMEs { devMakeUnique( nMEs ) }; // (previously was: meDevPtr) gProc::CPPProcess process; @@ -190,31 +130,15 @@ struct CUDATest : public BaseTest { // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations. // Don't remove! CUDATest() : - BaseTest(), + CUDA_CPU_TestBase(), process(niter, gpublocks, gputhreads, /*verbose=*/false) { process.initProc("../../Cards/param_card.dat"); - - checkCuda( cudaFree( 0 ) ); // SLOW! - - // --- 0b. Allocate memory structures - // Memory structures for random numbers, momenta, matrix elements and weights on host and device - hstRnarray = hstMakeUnique( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) - hstMomenta = hstMakeUnique( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp) - hstIsGoodHel = hstMakeUnique( mgOnGpu::ncomb ); - hstWeights = hstMakeUnique( nWeights ); // (previously was: meHostPtr) - hstMEs = hstMakeUnique( nMEs ); // (previously was: meHostPtr) - - devRnarray = devMakeUnique( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR) - devMomenta = devMakeUnique( nMomenta ); // (previously was: allMomenta) - devIsGoodHel = devMakeUnique( mgOnGpu::ncomb ); - devWeights = devMakeUnique( nWeights ); // (previously was: meDevPtr) - devMEs = devMakeUnique( nMEs ); // (previously was: meDevPtr) } - virtual ~CUDATest() { } - void prepareRandomNumbers(int iiter) override { + + void prepareRandomNumbers(unsigned int iiter) override { std::vector rnd = CommonRandomNumbers::generate(nRnarray, 1337 + iiter); std::copy(rnd.begin(), rnd.end(), hstRnarray.get()); checkCuda( cudaMemcpy( devRnarray.get(), hstRnarray.get(), nRnarray * sizeof(decltype(devRnarray)::element_type), cudaMemcpyHostToDevice ) ); @@ -236,6 +160,7 @@ struct CUDATest : public BaseTest { checkCuda( cudaMemcpy( hstMomenta.get(), devMomenta.get(), nMomenta * sizeof(decltype(hstMomenta)::element_type), cudaMemcpyDeviceToHost ) ); } + void runSigmaKin(std::size_t iiter) override { // --- 0d. SGoodHel if ( iiter == 0 ) @@ -261,140 +186,30 @@ struct CUDATest : public BaseTest { checkCuda( cudaMemcpy( hstMEs.get(), devMEs.get(), nMEs * sizeof(decltype(hstMEs)::element_type), cudaMemcpyDeviceToHost ) ); } + + double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override { + assert(component < mgOnGpu::np4); + assert(particle < mgOnGpu::npar); + const auto page = evtNo / mgOnGpu::neppM; // #eventpage in this iteration + const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration + return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM]; + }; + + double getMatrixElement(std::size_t evtNo) const override { + return hstMEs[evtNo]; + } }; #endif #ifdef __CUDACC__ -TEST_F(CUDATest, eemumu) +INSTANTIATE_TEST_SUITE_P(EP2_CUDA_GPU, MadgraphTestDouble, + testing::Values( [](){ return new CUDATest; } ) +); #else -TEST_F(CPUTest, eemumu) +INSTANTIATE_TEST_SUITE_P(EP2_CUDA_CPU, MadgraphTestDouble, + testing::Values([](){ return new CPUTest; }) +); #endif -{ - // Set to dump events: - constexpr bool dumpEvents = false; - const std::string dumpFileName = dumpEvents ? - std::string("dump_") + testing::UnitTest::GetInstance()->current_test_info()->test_suite_name() + "." + testing::UnitTest::GetInstance()->current_test_info()->name() + ".txt" : - ""; - const std::string refFileName = "dump_CPUTest.eemumu.txt"; - - const int neppR = mgOnGpu::neppR; // ASA layout: constant at compile-time - static_assert( gputhreads%neppR == 0, "ERROR! #threads/block should be a multiple of neppR" ); - - const int neppM = mgOnGpu::neppM; // ASA layout: constant at compile-time - static_assert( gputhreads%neppM == 0, "ERROR! #threads/block should be a multiple of neppM" ); - - using mgOnGpu::ntpbMAX; - static_assert( gputhreads <= ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" ); - - std::ofstream dumpFile; - if ( !dumpFileName.empty() ) - { - dumpFile.open(dumpFileName, std::ios::trunc); - } - std::map referenceData = readReferenceData(refFileName); - ASSERT_FALSE(HasFailure()); // It doesn't make any sense to continue if we couldn't read the reference file. - constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak) - - - // ************************************** - // *** START MAIN LOOP ON #ITERATIONS *** - // ************************************** - - for (unsigned int iiter = 0; iiter < niter; ++iiter) - { - prepareRandomNumbers(iiter); - - prepareMomenta(energy); - - runSigmaKin(iiter); - - // --- Run checks on all events produced in this iteration - for (std::size_t ievt = 0; ievt < nevt && !HasFailure(); ++ievt) - { - auto getMomentum = [&](std::size_t evtNo, int particle, int component) - { - assert(component < mgOnGpu::np4); - assert(particle < mgOnGpu::npar); - const auto page = evtNo / neppM; // #eventpage in this iteration - const auto ieppM = evtNo % neppM; // #event in the current eventpage in this iteration - return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*neppM + particle * neppM*mgOnGpu::np4 + component * neppM + ieppM]; - }; - auto dumpParticles = [&](std::ostream& stream, std::size_t evtNo, unsigned precision, bool dumpReference) - { - const auto width = precision + 8; - for (int ipar = 0; ipar < mgOnGpu::npar; ipar++) - { - // NB: 'setw' affects only the next field (of any type) - stream << std::scientific // fixed format: affects all floats (default precision: 6) - << std::setprecision(precision) - << std::setw(4) << ipar - << std::setw(width) << getMomentum(ievt, ipar, 0) - << std::setw(width) << getMomentum(ievt, ipar, 1) - << std::setw(width) << getMomentum(ievt, ipar, 2) - << std::setw(width) << getMomentum(ievt, ipar, 3) - << "\n"; - if (dumpReference) { - stream << "ref" << ipar; - if (ievt < referenceData[iiter].momenta.size()) { - stream << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][0] - << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][1] - << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][2] - << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][3] - << "\n\n"; - } else { - stream << " --- No reference ---\n\n"; - } - } - stream << std::flush << std::defaultfloat; // default format: affects all floats - } - }; - - if (dumpFile.is_open()) { - dumpFile << "Event " << std::setw(8) << ievt << " " - << "Batch " << std::setw(4) << iiter << "\n"; - dumpParticles(dumpFile, ievt, 15, false); - // Dump matrix element - dumpFile << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n" << std::endl << std::defaultfloat; - continue; - } - - // This trace will only be printed in case of failures: - std::stringstream eventTrace; - eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n"; - dumpParticles(eventTrace, ievt, 15, true); - eventTrace << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n" - << std::setw(4) << "r.ME" << std::scientific << std::setw(15+8) << referenceData[iiter].MEs[ievt] << std::endl << std::defaultfloat; - SCOPED_TRACE(eventTrace.str()); - - ASSERT_LT( ievt, referenceData[iiter].momenta.size() ) << "Don't have enough events in reference file #ref=" << referenceData[iiter].momenta.size(); - - - // Compare Momenta - const fptype toleranceMomenta = 200. * std::pow(10., -std::numeric_limits::digits10); - for (unsigned int ipar = 0; ipar < mgOnGpu::npar; ++ipar) { - std::stringstream momentumErrors; - for (unsigned int icomp = 0; icomp < mgOnGpu::np4; ++icomp) { - const double pMadg = getMomentum(ievt, ipar, icomp); - const double pOrig = referenceData[iiter].momenta[ievt][ipar][icomp]; - const double relDelta = fabs( (pMadg - pOrig)/pOrig ); - if (relDelta > toleranceMomenta) { - momentumErrors << std::setprecision(15) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp - << "\n\t madGraph: " << std::setw(22) << pMadg - << "\n\t reference: " << std::setw(22) << pOrig - << "\n\t rel delta: " << std::setw(22) << relDelta << " exceeds tolerance of " << toleranceMomenta; - } - } - ASSERT_TRUE(momentumErrors.str().empty()) << momentumErrors.str(); - } - - // Compare ME: - const fptype toleranceMEs = 500. * std::pow(10., -std::numeric_limits::digits10); - EXPECT_NEAR(hstMEs[ievt], referenceData[iiter].MEs[ievt], toleranceMEs * referenceData[iiter].MEs[ievt]); - } - - - } -} From 041bdf1af14492e94dd0e4062e222f9a261d87c9 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Tue, 1 Dec 2020 15:38:42 +0100 Subject: [PATCH 4/6] Implement epoch2 github action. --- .github/workflows/c-cpp.yml | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 66cbdf28ec..68cb15fe5e 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -12,7 +12,20 @@ jobs: defaults: run: working-directory: epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum - + steps: + - uses: actions/checkout@v2 + - name: make gtest + working-directory: tools + run: make + - name: make + run: make + - name: make check + run: make check + epoch2_eemumu: + runs-on: ubuntu-latest + defaults: + run: + working-directory: epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum steps: - uses: actions/checkout@v2 - name: make gtest From 33d908c2ad205b9e36c122ddc7140e9804411005 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Wed, 16 Dec 2020 18:22:26 +0100 Subject: [PATCH 5/6] [ep2 cuda eemm] Port fixes in Makefile to epoch2. --- epoch2/cuda/ee_mumu/SubProcesses/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/epoch2/cuda/ee_mumu/SubProcesses/Makefile b/epoch2/cuda/ee_mumu/SubProcesses/Makefile index 598dc30d2d..60a772656a 100644 --- a/epoch2/cuda/ee_mumu/SubProcesses/Makefile +++ b/epoch2/cuda/ee_mumu/SubProcesses/Makefile @@ -1,6 +1,6 @@ LIBDIR = ../../lib TOOLSDIR = ../../../../../tools/ -TESTDIR = ../../../../../test +TESTDIR = ../../../../../test/ INCFLAGS = -I. -I../../src -I$(TOOLSDIR) MODELLIB = model_sm OPTFLAGS = -O3 @@ -25,7 +25,8 @@ ifdef CUDA_HOME CUARCHFLAGS= -arch=compute_$(CUARCHNUM) CUINC = -I$(CUDA_HOME)/include/ CULIBFLAGS = -L$(CUDA_HOME)/lib64/ -lcuda -lcurand - CUFLAGS= $(OPTFLAGS) -std=c++14 $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math -lineinfo $(MGONGPU_CONFIG) + CUOPTFLAGS = -lineinfo + CUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) -std=c++14 $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math $(MGONGPU_CONFIG) cu_main = gcheck.exe cu_objects = gCPPProcess.o @@ -54,9 +55,8 @@ endif all: ../../src $(cu_main) $(cxx_main) runTest.exe -debug: OPTFLAGS = -g -O0 -DDEBUG2 -debug: CUFLAGS := $(filter-out -lineinfo,$(CUFLAGS)) -debug: CUFLAGS += -G +debug: OPTFLAGS = -g -O0 -DDEBUG2 +debug: CUOPTFLAGS = -G debug: MAKEDEBUG := debug debug: all @@ -105,7 +105,7 @@ check: runTest.exe .PHONY: clean clean: - cd ../../src && make clean + make -C ../../src clean rm -f *.o *.exe distclean: clean @@ -125,7 +125,7 @@ test: force info: ifdef CUDA_HOME $(NVCC) --version - echo "" + @echo "" endif $(CXX) --version From 2136265faf37a95b83feede882c4b04546fe39c0 Mon Sep 17 00:00:00 2001 From: Stephan Hageboeck Date: Wed, 16 Dec 2020 18:32:24 +0100 Subject: [PATCH 6/6] [CI] Add CI step for debug builds. --- .github/workflows/c-cpp.yml | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 68cb15fe5e..d3373d8ee6 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -14,13 +14,21 @@ jobs: working-directory: epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum steps: - uses: actions/checkout@v2 - - name: make gtest - working-directory: tools - run: make - name: make run: make - name: make check run: make check + epoch1_eemumu_debug: + runs-on: ubuntu-latest + defaults: + run: + working-directory: epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum + steps: + - uses: actions/checkout@v2 + - name: make + run: make debug + - name: make check + run: make check epoch2_eemumu: runs-on: ubuntu-latest defaults: @@ -28,10 +36,18 @@ jobs: working-directory: epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum steps: - uses: actions/checkout@v2 - - name: make gtest - working-directory: tools - run: make - name: make run: make - name: make check run: make check + epoch2_eemumu_debug: + runs-on: ubuntu-latest + defaults: + run: + working-directory: epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum + steps: + - uses: actions/checkout@v2 + - name: make + run: make debug + - name: make check + run: make check