From 8145b0e8ccfe41f46662e31c9cb5fce64ec130b9 Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Fri, 11 Dec 2020 13:57:29 +0100
Subject: [PATCH 1/6] [tests eemm] Move common test code into test/eemumu.

To run their own test, each epoch/abstraction implements a test driver
interface, and instantiates a test suite defined in test/src.

Refactor testing code, make it independent of CUDA and Epoch1.
To run their own test, each epoch/abstraction implements a test driver
interface, and instantiates a test suite defined in test/src.

In more detail:
- Move reference file for process 1 in eemumu to test/.
- Split framework-specific parts and generic test code in epoch1.
- Create base test class as general interface for madgraph runs.
- Move googletest and common testing code into test/.
- Instantiate epoch1 cuda test for eemumu.
---
 .gitignore                                    |   2 +-
 epoch1/cuda/ee_mumu/SubProcesses/Makefile     |  26 +-
 .../P1_Sigma_sm_epem_mupmum/runTest.cc        | 341 ++++--------------
 test/Makefile                                 |  22 ++
 .../eemumu}/dump_CPUTest.eemumu.txt           |   1 +
 test/include/MadgraphTest.h                   | 110 ++++++
 test/src/MadgraphTest.cc                      | 159 ++++++++
 tools/Makefile                                |  16 -
 8 files changed, 384 insertions(+), 293 deletions(-)
 create mode 100644 test/Makefile
 rename {epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum => test/eemumu}/dump_CPUTest.eemumu.txt (99%)
 create mode 100644 test/include/MadgraphTest.h
 create mode 100644 test/src/MadgraphTest.cc

diff --git a/.gitignore b/.gitignore
index b2c6044a89..9792d863d5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,4 @@ profile_*
 ***/perf/params*.txt
 
 # googletest
-tools/googletest
+test/googletest
diff --git a/epoch1/cuda/ee_mumu/SubProcesses/Makefile b/epoch1/cuda/ee_mumu/SubProcesses/Makefile
index 63392ec925..9abb6cbc84 100644
--- a/epoch1/cuda/ee_mumu/SubProcesses/Makefile
+++ b/epoch1/cuda/ee_mumu/SubProcesses/Makefile
@@ -1,5 +1,6 @@
 LIBDIR   = ../../lib
 TOOLSDIR = ../../../../../tools
+TESTDIR  = ../../../../../test
 INCFLAGS = -I. -I../../src -I$(TOOLSDIR)
 MODELLIB = model_sm
 OPTFLAGS = -O3
@@ -47,7 +48,7 @@ else
   endif
 endif
 
-GTESTLIBDIR = $(TOOLSDIR)/googletest/build/lib/
+GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/
 GTESTLIBS   = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a
 MAKEDEBUG=
 
@@ -88,20 +89,23 @@ $(cxx_main): check.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects)
 
 runTest.o:   $(GTESTLIBS)
 runTest.exe: $(GTESTLIBS)
-runTest.exe: INCFLAGS += -I$(TOOLSDIR)/googletest/googletest/include/
+runTest.exe: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include/
+runTest.exe: INCFLAGS += -I$(TESTDIR)/include/
 runTest.exe: LIBFLAGS += -L$(GTESTLIBDIR)/ -lgtest -lgtest_main
+runTest.exe: runTest.o $(TESTDIR)/src/MadgraphTest.o $(TESTDIR)/include/*.h
+runTest.exe: cxx_objects += runTest.o $(TESTDIR)/src/MadgraphTest.o
+runTest.exe: cu_objects  += runTest_cu.o
 ifeq ($(NVCC),)
-runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
-	$(CXX) -o $@ $(cxx_objects) runTest.o $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
+runTest.exe: $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
+	$(CXX) -o $@ $(cxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
 else
-runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
-	ln -sf runTest.cc runTest_tmp.cu
-	$(NVCC) -o $@ $(cxx_objects) runTest.o $(cu_objects) runTest_tmp.cu $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
-	unlink runTest_tmp.cu
+runTest.exe runTest_cu.o &: runTest.cc $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cu_objects) $(GTESTLIBS)
+	$(NVCC) -o runTest_cu.o -c -x cu runTest.cc $(CPPFLAGS) $(CUFLAGS)
+	$(NVCC) -o $@ $(cxx_objects) $(cu_objects) $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
 endif
 
 $(GTESTLIBS):
-	$(MAKE) -C $(TOOLSDIR)
+	$(MAKE) -C $(TESTDIR)
 
 check: runTest.exe
 	./runTest.exe
@@ -112,6 +116,10 @@ clean:
 	make -C ../../src clean
 	rm -f *.o *.exe
 
+distclean: clean
+	make -C $(TOOLSDIR) clean
+	make -C $(TESTDIR) clean
+
 memcheck: $(cu_main)
 	/usr/local/cuda/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes ./gcheck.exe 2 32 2
 
diff --git a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc b/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc
index 2aa32994a3..5bbf1b64e4 100644
--- a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc
+++ b/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/runTest.cc
@@ -1,6 +1,8 @@
 #include "mgOnGpuConfig.h"
 #include "mgOnGpuTypes.h"
 
+#include "MadgraphTest.h"
+
 #include "CommonRandomNumbers.h"
 #include "CPPProcess.h"
 #include "Memory.h"
@@ -10,111 +12,38 @@
 #include "rambo.h"
 #endif
 
-#include <algorithm>
-#include <array>
-#include <cmath>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <type_traits>
-#include <unistd.h>
-
-#include <gtest/gtest.h>
 
 
-struct ReferenceData {
-  std::vector< std::array<std::array<fptype,mgOnGpu::np4>, mgOnGpu::npar> > momenta;
-  std::vector<fptype> MEs;
-};
+struct CUDA_CPU_TestBase : public TestDriverBase<double> {
+  static_assert( gputhreads%mgOnGpu::neppR == 0, "ERROR! #threads/block should be a multiple of neppR" );
+  static_assert( gputhreads%mgOnGpu::neppM == 0, "ERROR! #threads/block should be a multiple of neppM" );
+  static_assert( gputhreads <= mgOnGpu::ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" );
 
-std::map<unsigned int, ReferenceData> readReferenceData(const std::string& refFileName);
+  const std::size_t nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }; // (NB: ASA layout with nevt=npagR*neppR events per iteration)
+  const std::size_t nMomenta{ mgOnGpu::np4 * mgOnGpu::npar  * nevt }; // (NB: nevt=npagM*neppM for ASA layouts)
+  const std::size_t nWeights{ nevt };
+  const std::size_t nMEs    { nevt };
 
-#ifndef __CUDACC__
-std::map<unsigned int, ReferenceData> readReferenceData(const std::string& refFileName)
-{
-  std::ifstream referenceFile(refFileName.c_str());
-  EXPECT_TRUE(referenceFile.is_open()) << refFileName;
-  std::map<unsigned int, ReferenceData> referenceData;
-  unsigned int evtNo;
-  unsigned int batchNo;
-
-  for (std::string line; std::getline(referenceFile, line); )
+  CUDA_CPU_TestBase() :
+  TestDriverBase()
   {
-    std::stringstream lineStr(line);
-    if (line.empty())
-    {
-      continue;
-    }
-    else if (line.find("Event") != std::string::npos)
-    {
-      std::string dummy;
-      lineStr >> dummy >> evtNo >> dummy >> batchNo;
-    }
-    else if (line.find("ME") != std::string::npos)
-    {
-      if (evtNo <= referenceData[batchNo].MEs.size())
-        referenceData[batchNo].MEs.resize(evtNo + 1);
-
-      std::string dummy;
-      lineStr >> dummy >> referenceData[batchNo].MEs[evtNo];
-    }
-    else
-    {
-      unsigned int particleIndex;
-      lineStr >> particleIndex;
-
-      if (evtNo <= referenceData[batchNo].momenta.size())
-        referenceData[batchNo].momenta.resize(evtNo + 1);
-
-      for (unsigned int i=0; i < mgOnGpu::np4; ++i) {
-        EXPECT_TRUE(lineStr.good());
-        lineStr >> referenceData[batchNo].momenta[evtNo][particleIndex][i];
-      }
-      EXPECT_TRUE(lineStr.eof());
-    }
+    TestDriverBase::nparticle = mgOnGpu::npar;
   }
-  return referenceData;
-}
-#endif
 
-class BaseTest : public ::testing::Test {
- protected:
-
-  static constexpr unsigned niter = 2;
-  static constexpr unsigned gpublocks = 2;
-  static constexpr unsigned gputhreads = 128;
-  static constexpr std::size_t nevt = gpublocks * gputhreads;
-
-  const std::size_t nRnarray; // (NB: ASA layout with nevt=npagR*neppR events per iteration)
-  const std::size_t nMomenta; // (NB: nevt=npagM*neppM for ASA layouts)
-  const std::size_t nWeights;
-  const std::size_t nMEs;
-
-  BaseTest() :
-    nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }, // (NB: ASA layout with nevt=npagR*neppR events per iteration)
-    nMomenta{ mgOnGpu::np4 * mgOnGpu::npar  * nevt },// (NB: nevt=npagM*neppM for ASA layouts)
-    nWeights{ nevt },
-    nMEs    { nevt }
-  { }
-
-  virtual void prepareRandomNumbers(int iiter) = 0;
-  virtual void prepareMomenta(fptype energy) = 0;
-  virtual void runSigmaKin(std::size_t iiter) = 0;
 };
 
 
 #ifndef __CUDACC__
-struct CPUTest : public BaseTest {
+struct CPUTest : public CUDA_CPU_TestBase {
   Proc::CPPProcess process;
 
-  unique_ptr_host<fptype> hstRnarray;
-  unique_ptr_host<fptype> hstMomenta;
-  unique_ptr_host<bool  > hstIsGoodHel;
-  unique_ptr_host<fptype> hstWeights;
-  unique_ptr_host<fptype> hstMEs;
+  // --- 0b. Allocate memory structures
+  // Memory structures for random numbers, momenta, matrix elements and weights on host and device
+  unique_ptr_host<fptype> hstRnarray  { hstMakeUnique<fptype>( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
+  unique_ptr_host<fptype> hstMomenta  { hstMakeUnique<fptype>( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
+  unique_ptr_host<bool  > hstIsGoodHel{ hstMakeUnique<bool  >( mgOnGpu::ncomb ) };
+  unique_ptr_host<fptype> hstWeights  { hstMakeUnique<fptype>( nWeights ) };
+  unique_ptr_host<fptype> hstMEs      { hstMakeUnique<fptype>( nMEs ) };
 
   // Create a process object
   // Read param_card and set parameters
@@ -122,23 +51,15 @@ struct CPUTest : public BaseTest {
   // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations.
   // Don't remove!
   CPUTest() :
-  BaseTest(),
+  CUDA_CPU_TestBase(),
   process(niter, gpublocks, gputhreads, /*verbose=*/false)
   {
     process.initProc("../../Cards/param_card.dat");
-
-    // --- 0b. Allocate memory structures
-    // Memory structures for random numbers, momenta, matrix elements and weights on host and device
-    hstRnarray   = hstMakeUnique<fptype>( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
-    hstMomenta   = hstMakeUnique<fptype>( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
-    hstIsGoodHel = hstMakeUnique<bool  >( mgOnGpu::ncomb );
-    hstWeights   = hstMakeUnique<fptype>( nWeights ); // (previously was: meHostPtr)
-    hstMEs       = hstMakeUnique<fptype>( nMEs ); // (previously was: meHostPtr)
   }
   virtual ~CPUTest() { }
 
 
-  void prepareRandomNumbers(int iiter) override {
+  void prepareRandomNumbers(unsigned int iiter) override {
     std::vector<fptype> rnd = CommonRandomNumbers::generate<fptype>(nRnarray, 1337 + iiter);
     std::copy(rnd.begin(), rnd.end(), hstRnarray.get());
   }
@@ -158,11 +79,26 @@ struct CPUTest : public BaseTest {
     // --- 3a. SigmaKin
     Proc::sigmaKin(hstMomenta.get(), hstMEs.get(), nevt);
   }
+
+
+
+  double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
+    assert(component < mgOnGpu::np4);
+    assert(particle  < mgOnGpu::npar);
+    const auto page  = evtNo / mgOnGpu::neppM; // #eventpage in this iteration
+    const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration
+    return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM];
+  };
+
+  double getMatrixElement(std::size_t evtNo) const override {
+    return hstMEs[evtNo];
+  }
 };
 #endif
 
+
 #ifdef __CUDACC__
-struct CUDATest : public BaseTest {
+struct CUDATest : public CUDA_CPU_TestBase {
   // Reset the device when our test goes out of scope. Note that this should happen after
   // the frees, i.e. be declared before the pointers to device memory.
   struct DeviceReset {
@@ -171,17 +107,20 @@ struct CUDATest : public BaseTest {
     }
   } deviceResetter;
 
-  unique_ptr_host<fptype> hstRnarray;
-  unique_ptr_host<fptype> hstMomenta;
-  unique_ptr_host<bool  > hstIsGoodHel;
-  unique_ptr_host<fptype> hstWeights;
-  unique_ptr_host<fptype> hstMEs;
+  // --- 0b. Allocate memory structures
+  // Memory structures for random numbers, momenta, matrix elements and weights on host and device
+  unique_ptr_host<fptype> hstRnarray  { hstMakeUnique<fptype>( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
+  unique_ptr_host<fptype> hstMomenta  { hstMakeUnique<fptype>( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
+  unique_ptr_host<bool  > hstIsGoodHel{ hstMakeUnique<bool  >( mgOnGpu::ncomb ) };
+  unique_ptr_host<fptype> hstWeights  { hstMakeUnique<fptype>( nWeights ) };
+  unique_ptr_host<fptype> hstMEs      { hstMakeUnique<fptype>( nMEs ) };
 
-  unique_ptr_dev<fptype> devRnarray;
-  unique_ptr_dev<fptype> devMomenta;
-  unique_ptr_dev<bool  > devIsGoodHel;
-  unique_ptr_dev<fptype> devWeights;
-  unique_ptr_dev<fptype> devMEs;
+
+  unique_ptr_dev<fptype> devRnarray  { devMakeUnique<fptype>( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
+  unique_ptr_dev<fptype> devMomenta  { devMakeUnique<fptype>( nMomenta ) }; // (previously was: allMomenta)
+  unique_ptr_dev<bool  > devIsGoodHel{ devMakeUnique<bool  >( mgOnGpu::ncomb ) };
+  unique_ptr_dev<fptype> devWeights  { devMakeUnique<fptype>( nWeights ) }; // (previously was: meDevPtr)
+  unique_ptr_dev<fptype> devMEs      { devMakeUnique<fptype>( nMEs )     }; // (previously was: meDevPtr)
 
   gProc::CPPProcess process;
 
@@ -191,31 +130,15 @@ struct CUDATest : public BaseTest {
   // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations.
   // Don't remove!
   CUDATest() :
-  BaseTest(),
+  CUDA_CPU_TestBase(),
   process(niter, gpublocks, gputhreads, /*verbose=*/false)
   {
     process.initProc("../../Cards/param_card.dat");
-
-    checkCuda( cudaFree( 0 ) ); // SLOW!
-
-    // --- 0b. Allocate memory structures
-    // Memory structures for random numbers, momenta, matrix elements and weights on host and device
-    hstRnarray   = hstMakeUnique<fptype>( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
-    hstMomenta   = hstMakeUnique<fptype>( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
-    hstIsGoodHel = hstMakeUnique<bool  >( mgOnGpu::ncomb );
-    hstWeights   = hstMakeUnique<fptype>( nWeights ); // (previously was: meHostPtr)
-    hstMEs       = hstMakeUnique<fptype>( nMEs ); // (previously was: meHostPtr)
-
-    devRnarray   = devMakeUnique<fptype>( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
-    devMomenta   = devMakeUnique<fptype>( nMomenta ); // (previously was: allMomenta)
-    devIsGoodHel = devMakeUnique<bool  >( mgOnGpu::ncomb );
-    devWeights   = devMakeUnique<fptype>( nWeights ); // (previously was: meDevPtr)
-    devMEs       = devMakeUnique<fptype>( nMEs ); // (previously was: meDevPtr)
   }
-
   virtual ~CUDATest() { }
 
-  void prepareRandomNumbers(int iiter) override {
+
+  void prepareRandomNumbers(unsigned int iiter) override {
     std::vector<fptype> rnd = CommonRandomNumbers::generate<fptype>(nRnarray, 1337 + iiter);
     std::copy(rnd.begin(), rnd.end(), hstRnarray.get());
     checkCuda( cudaMemcpy( devRnarray.get(), hstRnarray.get(), nRnarray * sizeof(decltype(devRnarray)::element_type), cudaMemcpyHostToDevice ) );
@@ -237,6 +160,7 @@ struct CUDATest : public BaseTest {
     checkCuda( cudaMemcpy( hstMomenta.get(), devMomenta.get(), nMomenta * sizeof(decltype(hstMomenta)::element_type), cudaMemcpyDeviceToHost ) );
   }
 
+
   void runSigmaKin(std::size_t iiter) override {
     // --- 0d. SGoodHel
     if ( iiter == 0 )
@@ -262,147 +186,30 @@ struct CUDATest : public BaseTest {
     checkCuda( cudaMemcpy( hstMEs.get(), devMEs.get(), nMEs * sizeof(decltype(hstMEs)::element_type), cudaMemcpyDeviceToHost ) );
   }
 
+
+  double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
+    assert(component < mgOnGpu::np4);
+    assert(particle  < mgOnGpu::npar);
+    const auto page  = evtNo / mgOnGpu::neppM; // #eventpage in this iteration
+    const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration
+    return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM];
+  };
+
+  double getMatrixElement(std::size_t evtNo) const override {
+    return hstMEs[evtNo];
+  }
 };
 #endif
 
 
 #ifdef __CUDACC__
-TEST_F(CUDATest, eemumu)
+INSTANTIATE_TEST_SUITE_P(EP1_CUDA_GPU, MadgraphTestDouble,
+    testing::Values( [](){ return new CUDATest; } )
+);
 #else
-TEST_F(CPUTest, eemumu)
+INSTANTIATE_TEST_SUITE_P(EP1_CUDA_CPU, MadgraphTestDouble,
+    testing::Values([](){ return new CPUTest; })
+);
 #endif
-{
-  // Set to dump events:
-  constexpr bool dumpEvents = false;
-  constexpr fptype toleranceMomenta = std::is_same<fptype, double>::value ? 5.E-12 : 1.E-5;
-  constexpr fptype toleranceMEs     = std::is_same<fptype, double>::value ? 1.E-7  : 1.E-5;
-
-  const std::string dumpFileName = std::string("dump_")
-      + testing::UnitTest::GetInstance()->current_test_info()->test_suite_name()
-      + "."
-      + testing::UnitTest::GetInstance()->current_test_info()->name()
-      + ".txt";
-  const std::string refFileName = "dump_CPUTest.eemumu.txt";
-
-  const int neppR = mgOnGpu::neppR; // ASA layout: constant at compile-time
-  static_assert( gputhreads%neppR == 0, "ERROR! #threads/block should be a multiple of neppR" );
-
-  const int neppM = mgOnGpu::neppM; // ASA layout: constant at compile-time
-  static_assert( gputhreads%neppM == 0, "ERROR! #threads/block should be a multiple of neppM" );
-
-  using mgOnGpu::ntpbMAX;
-  static_assert( gputhreads <= ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" );
-
-  std::ofstream dumpFile;
-  if ( dumpEvents )
-  {
-    dumpFile.open(dumpFileName, std::ios::trunc);
-  }
-
-  std::map<unsigned int, ReferenceData> referenceData = readReferenceData(refFileName);
-  ASSERT_FALSE(HasFailure()); // It doesn't make any sense to continue if we couldn't read the reference file.
 
-  constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak)
 
-
-  // **************************************
-  // *** START MAIN LOOP ON #ITERATIONS ***
-  // **************************************
-
-  for (unsigned int iiter = 0; iiter < niter; ++iiter)
-  {
-    prepareRandomNumbers(iiter);
-
-    prepareMomenta(energy);
-
-    runSigmaKin(iiter);
-
-    // --- Run checks on all events produced in this iteration
-    for (std::size_t ievt = 0; ievt < nevt && !HasFailure(); ++ievt)
-    {
-      auto getMomentum = [&](std::size_t evtNo, int particle, int component)
-      {
-        assert(component < mgOnGpu::np4);
-        assert(particle  < mgOnGpu::npar);
-        const auto page  = evtNo / neppM; // #eventpage in this iteration
-        const auto ieppM = evtNo % neppM; // #event in the current eventpage in this iteration
-        return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*neppM + particle * neppM*mgOnGpu::np4 + component * neppM + ieppM];
-      };
-      auto dumpParticles = [&](std::ostream& stream, unsigned precision, bool dumpReference)
-      {
-        const auto width = precision + 8;
-        for (int ipar = 0; ipar < mgOnGpu::npar; ipar++)
-        {
-          // NB: 'setw' affects only the next field (of any type)
-          stream << std::scientific // fixed format: affects all floats (default precision: 6)
-                 << std::setprecision(precision)
-                 << std::setw(4) << ipar
-                 << std::setw(width) << getMomentum(ievt, ipar, 0)
-                 << std::setw(width) << getMomentum(ievt, ipar, 1)
-                 << std::setw(width) << getMomentum(ievt, ipar, 2)
-                 << std::setw(width) << getMomentum(ievt, ipar, 3)
-                 << "\n";
-          if (dumpReference) {
-            stream << "ref" << ipar;
-            if (ievt < referenceData[iiter].momenta.size()) {
-              stream << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][0]
-                     << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][1]
-                     << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][2]
-                     << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][3]
-                     << "\n\n";
-            } else {
-              stream << "  --- No reference ---\n\n";
-            }
-          }
-          stream << std::flush << std::defaultfloat; // default format: affects all floats
-        }
-      };
-
-      if (dumpEvents) {
-        ASSERT_TRUE(dumpFile.is_open());
-        dumpFile << "Event " << std::setw(8) << ievt << "  "
-                 << "Batch " << std::setw(4) << iiter << "\n";
-        dumpParticles(dumpFile, 15, false);
-        // Dump matrix element
-        dumpFile << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n" << std::endl << std::defaultfloat;
-        continue;
-      }
-
-      ASSERT_GT(referenceData.size(), iiter) << "Don't have enough reference data for iteration " << iiter << ". Ref file:" << refFileName;
-      ASSERT_GT(referenceData[iiter].MEs.size(), ievt) << "Don't have enough reference events for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName;
-
-      // This trace will only be printed in case of failures:
-      std::stringstream eventTrace;
-      eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n";
-      dumpParticles(eventTrace, 15, true);
-      eventTrace << std::setw(4) << "ME"   << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n"
-                 << std::setw(4) << "r.ME" << std::scientific << std::setw(15+8) << referenceData[iiter].MEs[ievt] << std::endl << std::defaultfloat;
-      SCOPED_TRACE(eventTrace.str());
-
-      ASSERT_LT( ievt, referenceData[iiter].momenta.size() ) << "Don't have enough events in reference file #ref=" << referenceData[iiter].momenta.size();
-
-
-      // Compare Momenta
-      for (unsigned int ipar = 0; ipar < mgOnGpu::npar; ++ipar) {
-        std::stringstream momentumErrors;
-        for (unsigned int icomp = 0; icomp < mgOnGpu::np4; ++icomp) {
-          const double pMadg = getMomentum(ievt, ipar, icomp);
-          const double pOrig = referenceData[iiter].momenta[ievt][ipar][icomp];
-          const double relDelta = fabs( (pMadg - pOrig)/pOrig );
-          if (relDelta > toleranceMomenta) {
-            momentumErrors << std::setprecision(15) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp
-                << "\n\t madGraph:  " << std::setw(22) << pMadg
-                << "\n\t reference: " << std::setw(22) << pOrig
-                << "\n\t rel delta: " << std::setw(22) << relDelta << " exceeds tolerance of " << toleranceMomenta;
-          }
-        }
-        ASSERT_TRUE(momentumErrors.str().empty()) << momentumErrors.str();
-      }
-
-      // Compare ME:
-      EXPECT_NEAR(hstMEs[ievt], referenceData[iiter].MEs[ievt], toleranceMEs * referenceData[iiter].MEs[ievt]);
-    }
-
-
-  }
-}
diff --git a/test/Makefile b/test/Makefile
new file mode 100644
index 0000000000..d65fd1f8ed
--- /dev/null
+++ b/test/Makefile
@@ -0,0 +1,22 @@
+CXXFLAGS += -Igoogletest/googletest/include/ -std=c++11
+
+all: gtest src/MadgraphTest.o
+
+%.o: %.cc include/*.h
+	$(CXX) $< -c -o $@ $(CPPFLAGS) $(CXXFLAGS) -Iinclude/
+
+.PHONY: gtest
+
+googletest:
+	git clone https://github.com/google/googletest.git -b release-1.10.0 googletest
+
+googletest/build: googletest
+	mkdir -p $@
+	cd googletest/build && cmake -DBUILD_GMOCK=OFF ../
+
+gtest: googletest/build
+	$(MAKE) -C googletest/build
+
+clean:
+	rm -rf googletest
+	rm src/*.o
diff --git a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/dump_CPUTest.eemumu.txt b/test/eemumu/dump_CPUTest.eemumu.txt
similarity index 99%
rename from epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/dump_CPUTest.eemumu.txt
rename to test/eemumu/dump_CPUTest.eemumu.txt
index db1a950a3d..b1e601a35d 100644
--- a/epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/dump_CPUTest.eemumu.txt
+++ b/test/eemumu/dump_CPUTest.eemumu.txt
@@ -1,3 +1,4 @@
+# Dumped in epoch1 cuda
 Event        0  Batch    0
    0  7.500000000000000e+02  0.000000000000000e+00  0.000000000000000e+00  7.500000000000000e+02
    1  7.500000000000000e+02  0.000000000000000e+00  0.000000000000000e+00 -7.500000000000000e+02
diff --git a/test/include/MadgraphTest.h b/test/include/MadgraphTest.h
new file mode 100644
index 0000000000..bc2a634b4e
--- /dev/null
+++ b/test/include/MadgraphTest.h
@@ -0,0 +1,110 @@
+// Stephan Hageboeck, CERN, 12/2020
+#ifndef MADGRAPHTEST_H_
+#define MADGRAPHTEST_H_
+
+#include <array>
+#include <iomanip>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+struct ReferenceData {
+  std::vector< std::vector<std::array<double, 4>> > momenta;
+  std::vector<double> MEs;
+};
+
+std::map<unsigned int, ReferenceData> readReferenceData(const std::string& refFileName);
+
+/**
+ * Test driver providing a common interface for testing different implementations.
+ * Users need to implement:
+ * - Functions to retrieve matrix element and 4-momenta. These are used in the tests.
+ * - Driver functions that run the madgraph workflow.
+ *
+ * Usage:
+ * ```
+ * class TestImplementation : public BaseTest {
+ *   <implement functions>
+ * }
+ *
+ * TEST_F(TestImplementation, <testName>) {
+ *   <test code>
+ * }
+ */
+template<typename FpType>
+class TestDriverBase {
+ public:
+  using fptype = FpType;
+  unsigned int nparticle = 4;
+  static constexpr unsigned int np4 = 4;
+  static constexpr unsigned int niter = 2;
+  static constexpr unsigned int gpublocks = 2;
+  static constexpr unsigned int gputhreads = 128;
+  static constexpr unsigned int nevt = gpublocks * gputhreads;
+
+  TestDriverBase() { }
+  virtual ~TestDriverBase() { }
+
+
+  // ------------------------------------------------
+  // Interface for retrieving info from madgraph
+  // ------------------------------------------------
+  virtual double getMomentum(std::size_t evtNo, unsigned int particleNo, unsigned int component) const = 0;
+  virtual double getMatrixElement(std::size_t evtNo) const = 0;
+
+
+  // ------------------------------------------------
+  // Interface for steering madgraph run
+  // ------------------------------------------------
+  virtual void prepareRandomNumbers(unsigned int iiter) = 0;
+  virtual void prepareMomenta(fptype energy) = 0;
+  virtual void runSigmaKin(std::size_t iiter) = 0;
+
+  // Print the requested event into the stream. If the reference data has enough events, it will be printed as well.
+  void dumpParticles(std::ostream& stream, std::size_t ievt, unsigned int numParticles, unsigned int nDigit, const ReferenceData& referenceData)
+  {
+    const auto width = nDigit + 8;
+    for (unsigned int ipar = 0; ipar < numParticles; ipar++)
+    {
+      // NB: 'setw' affects only the next field (of any type)
+      stream << std::scientific // fixed format: affects all floats (default nDigit: 6)
+             << std::setprecision(nDigit)
+             << std::setw(4) << ipar
+             << std::setw(width) << getMomentum(ievt, ipar, 0)
+             << std::setw(width) << getMomentum(ievt, ipar, 1)
+             << std::setw(width) << getMomentum(ievt, ipar, 2)
+             << std::setw(width) << getMomentum(ievt, ipar, 3)
+             << "\n";
+      if (ievt < referenceData.momenta.size()) {
+        stream << "ref" << ipar;
+        stream << std::setw(width) << referenceData.momenta[ievt][ipar][0]
+            << std::setw(width) << referenceData.momenta[ievt][ipar][1]
+            << std::setw(width) << referenceData.momenta[ievt][ipar][2]
+            << std::setw(width) << referenceData.momenta[ievt][ipar][3]
+            << "\n\n";
+      }
+      stream << std::flush << std::defaultfloat; // default format: affects all floats
+    }
+  };
+};
+
+
+// Test class that's using the driver to run the test(s) below.
+class MadgraphTestDouble : public testing::TestWithParam<std::function<TestDriverBase<double>*()>> {
+protected:
+  using fptype = double;
+  using TestDriver_t = TestDriverBase<fptype>;
+  std::unique_ptr<TestDriverBase<fptype>> testDriver;
+
+
+public:
+  MadgraphTestDouble() :
+    TestWithParam(),
+    testDriver{ GetParam()() }
+  { }
+};
+
+
+
+#endif /* MADGRAPHTEST_H_ */
diff --git a/test/src/MadgraphTest.cc b/test/src/MadgraphTest.cc
new file mode 100644
index 0000000000..4c39b042e0
--- /dev/null
+++ b/test/src/MadgraphTest.cc
@@ -0,0 +1,159 @@
+/*
+ * MadgraphTest.cc
+ *
+ *  Created on: 11.12.2020
+ *      Author: shageboeck
+ */
+
+#include "MadgraphTest.h"
+
+#include <cmath>
+#include <fstream>
+#include <sstream>
+
+std::map<unsigned int, ReferenceData> readReferenceData(const std::string& refFileName)
+{
+  std::ifstream referenceFile(refFileName.c_str());
+  EXPECT_TRUE(referenceFile.is_open()) << refFileName;
+  std::map<unsigned int, ReferenceData> referenceData;
+  unsigned int evtNo;
+  unsigned int batchNo;
+
+  for (std::string line; std::getline(referenceFile, line); )
+  {
+    std::stringstream lineStr(line);
+    if (line.empty() || line[0] == '#')
+    {
+      continue;
+    }
+    else if (line.find("Event") != std::string::npos)
+    {
+      std::string dummy;
+      lineStr >> dummy >> evtNo >> dummy >> batchNo;
+    }
+    else if (line.find("ME") != std::string::npos)
+    {
+      if (evtNo <= referenceData[batchNo].MEs.size())
+        referenceData[batchNo].MEs.resize(evtNo + 1);
+
+      std::string dummy;
+      lineStr >> dummy >> referenceData[batchNo].MEs[evtNo];
+    }
+    else
+    {
+      unsigned int particleIndex;
+      lineStr >> particleIndex;
+
+      if (evtNo <= referenceData[batchNo].momenta.size())
+        referenceData[batchNo].momenta.resize(evtNo + 1);
+      if (particleIndex <= referenceData[batchNo].momenta[evtNo].size())
+        referenceData[batchNo].momenta[evtNo].resize(particleIndex + 1);
+
+      auto& fourVec = referenceData[batchNo].momenta[evtNo][particleIndex];
+      for (unsigned int i=0; i < fourVec.size(); ++i) {
+        EXPECT_TRUE(lineStr.good());
+        lineStr >> fourVec[i];
+      }
+      EXPECT_TRUE(lineStr.eof());
+    }
+  }
+  return referenceData;
+}
+
+
+TEST_P(MadgraphTestDouble, eemumu)
+{
+  // Set to dump events:
+  constexpr bool dumpEvents = false;
+  constexpr fptype toleranceMomenta = std::is_same<fptype, double>::value ? 5.E-12 : 1.E-5;
+  constexpr fptype toleranceMEs     = std::is_same<fptype, double>::value ? 1.E-7  : 1.E-5;
+  constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak)
+
+  std::string dumpFileName = std::string("dump_")
+      + testing::UnitTest::GetInstance()->current_test_info()->name()
+      + ".txt";
+  while (dumpFileName.find('/') != std::string::npos) {
+    dumpFileName.replace(dumpFileName.find('/'), 1, "_");
+  }
+  const std::string refFileName = "../../../../../test/eemumu/dump_CPUTest.eemumu.txt";
+
+  std::ofstream dumpFile;
+  if ( dumpEvents )
+  {
+    dumpFile.open(dumpFileName, std::ios::trunc);
+  }
+
+  // Read reference data
+  std::map<unsigned int, ReferenceData> referenceData = readReferenceData(refFileName);
+  ASSERT_FALSE(HasFailure()); // It doesn't make any sense to continue if we couldn't read the reference file.
+
+
+  // **************************************
+  // *** START MAIN LOOP ON #ITERATIONS ***
+  // **************************************
+  for (unsigned int iiter = 0; iiter < testDriver->niter; ++iiter)
+  {
+    testDriver->prepareRandomNumbers(iiter);
+
+    testDriver->prepareMomenta(energy);
+
+    testDriver->runSigmaKin(iiter);
+
+    // --- Run checks on all events produced in this iteration
+    for (std::size_t ievt = 0; ievt < testDriver->nevt && !HasFailure(); ++ievt)
+    {
+      if (dumpEvents) {
+        ASSERT_TRUE(dumpFile.is_open()) << dumpFileName;
+        dumpFile << "Event " << std::setw(8) << ievt << "  "
+                 << "Batch " << std::setw(4) << iiter << "\n";
+        testDriver->dumpParticles(dumpFile, ievt, testDriver->nparticle, 15, ReferenceData());
+        // Dump matrix element
+        dumpFile << std::setw(4) << "ME" << std::scientific << std::setw(15+8)
+            << testDriver->getMatrixElement(ievt) << "\n" << std::endl << std::defaultfloat;
+        continue;
+      }
+
+
+      // Check that we have the required reference data
+      ASSERT_GT(referenceData.size(), iiter) << "Don't have enough reference data for iteration " << iiter << ". Ref file:" << refFileName;
+      ASSERT_GT(referenceData[iiter].MEs.size(), ievt)     << "Don't have enough reference MEs for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName;
+      ASSERT_GT(referenceData[iiter].momenta.size(), ievt) << "Don't have enough reference momenta for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName;
+      ASSERT_GE(referenceData[iiter].momenta[ievt].size(), testDriver->nparticle) << "Don't have enough reference particles for iteration " << iiter << " event " << ievt << ".\nRef file: " << refFileName;
+
+
+      // This trace will help to understand the event that is being checked.
+      // It will only be printed in case of failures:
+      std::stringstream eventTrace;
+      eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n";
+      testDriver->dumpParticles(eventTrace, ievt, testDriver->nparticle, 15, referenceData[iiter]);
+      eventTrace << std::setw(4) << "ME"   << std::scientific << std::setw(15+8) << testDriver->getMatrixElement(ievt) << "\n"
+                 << std::setw(4) << "r.ME" << std::scientific << std::setw(15+8) << referenceData[iiter].MEs[ievt] << std::endl << std::defaultfloat;
+      SCOPED_TRACE(eventTrace.str());
+
+
+      // Compare Momenta
+      for (unsigned int ipar = 0; ipar < testDriver->nparticle; ++ipar) {
+        std::stringstream momentumErrors;
+        for (unsigned int icomp = 0; icomp < testDriver->np4; ++icomp) {
+          const double pMadg = testDriver->getMomentum(ievt, ipar, icomp);
+          const double pOrig = referenceData[iiter].momenta[ievt][ipar][icomp];
+          const double relDelta = fabs( (pMadg - pOrig)/pOrig );
+          if (relDelta > toleranceMomenta) {
+            momentumErrors << std::setprecision(15) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp
+                << "\n\t madGraph:  " << std::setw(22) << pMadg
+                << "\n\t reference: " << std::setw(22) << pOrig
+                << "\n\t rel delta: " << std::setw(22) << relDelta << " exceeds tolerance of " << toleranceMomenta;
+          }
+        }
+        ASSERT_TRUE(momentumErrors.str().empty()) << momentumErrors.str();
+      }
+
+
+      // Compare ME:
+      EXPECT_NEAR(testDriver->getMatrixElement(ievt),
+          referenceData[iiter].MEs[ievt],
+          toleranceMEs * referenceData[iiter].MEs[ievt]);
+    }
+  }
+}
+
diff --git a/tools/Makefile b/tools/Makefile
index 762e36e6a5..bc328fcee3 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,18 +1,2 @@
-# Set up tools, e.g. googletest.
-
-.PHONY: gtest
-
-all: gtest
-
-googletest:
-	git clone https://github.com/google/googletest.git -b release-1.10.0 googletest
-
-googletest/build: googletest
-	mkdir -p $@
-	cd googletest/build && cmake -DBUILD_GMOCK=OFF ../
-
-gtest: googletest/build
-	$(MAKE) -C googletest/build
-
 clean:
 	rm -rf googletest

From 6afa818ff368ff527b199d4ef5ea0300bf55e26e Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Wed, 16 Dec 2020 16:57:44 +0100
Subject: [PATCH 2/6] [tests] Reduce matrix element test threshold.

To port tests from ep1 to ep2, the threshold for comparing matrix
elements has to be raised to 1.E-6.
This is presumably because of reordered floating-point computations.
---
 test/src/MadgraphTest.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/src/MadgraphTest.cc b/test/src/MadgraphTest.cc
index 4c39b042e0..6a78493c1a 100644
--- a/test/src/MadgraphTest.cc
+++ b/test/src/MadgraphTest.cc
@@ -66,7 +66,7 @@ TEST_P(MadgraphTestDouble, eemumu)
   // Set to dump events:
   constexpr bool dumpEvents = false;
   constexpr fptype toleranceMomenta = std::is_same<fptype, double>::value ? 5.E-12 : 1.E-5;
-  constexpr fptype toleranceMEs     = std::is_same<fptype, double>::value ? 1.E-7  : 1.E-5;
+  constexpr fptype toleranceMEs     = std::is_same<fptype, double>::value ? 1.E-6  : 1.E-5;
   constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak)
 
   std::string dumpFileName = std::string("dump_")

From 597ef64ea06b28a26a994c5224b8ae8e2368d140 Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Wed, 16 Dec 2020 17:05:24 +0100
Subject: [PATCH 3/6] [ep2 cuda eemm] Port CUDA tests to epoch2.

---
 epoch2/cuda/ee_mumu/SubProcesses/Makefile   |  26 +-
 epoch2/cuda/ee_mumu/SubProcesses/runTest.cc | 333 +++++---------------
 2 files changed, 91 insertions(+), 268 deletions(-)

diff --git a/epoch2/cuda/ee_mumu/SubProcesses/Makefile b/epoch2/cuda/ee_mumu/SubProcesses/Makefile
index 2ebef6abac..598dc30d2d 100644
--- a/epoch2/cuda/ee_mumu/SubProcesses/Makefile
+++ b/epoch2/cuda/ee_mumu/SubProcesses/Makefile
@@ -1,5 +1,6 @@
 LIBDIR   = ../../lib
 TOOLSDIR = ../../../../../tools/
+TESTDIR  = ../../../../../test
 INCFLAGS = -I. -I../../src -I$(TOOLSDIR)
 MODELLIB = model_sm
 OPTFLAGS = -O3
@@ -38,7 +39,7 @@ else
   endif
 endif
 
-GTESTLIBDIR = $(TOOLSDIR)/googletest/build/lib/
+GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/
 GTESTLIBS   = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a
 MAKEDEBUG=
 
@@ -80,20 +81,23 @@ $(cxx_main): check_sa.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects)
 
 runTest.o:   $(GTESTLIBS)
 runTest.exe: $(GTESTLIBS)
-runTest.exe: INCFLAGS += -I$(TOOLSDIR)/googletest/googletest/include/
+runTest.exe: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include/
+runTest.exe: INCFLAGS += -I$(TESTDIR)/include/
 runTest.exe: LIBFLAGS += -L$(GTESTLIBDIR)/ -lgtest -lgtest_main
+runTest.exe: runTest.o $(TESTDIR)/src/MadgraphTest.o $(TESTDIR)/include/*.h
+runTest.exe: cxx_objects += runTest.o $(TESTDIR)/src/MadgraphTest.o
+runTest.exe: cu_objects  += runTest_cu.o
 ifeq ($(NVCC),)
-runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
-	$(CXX) -o $@ $(cxx_objects) runTest.o $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
+runTest.exe: $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
+	$(CXX) -o $@ $(cxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
 else
-runTest.exe: runTest.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
-	ln -sf runTest.cc runTest_tmp.cu
-	$(NVCC) -o $@ $(cxx_objects) runTest.o $(cu_objects) runTest_tmp.cu $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda
-	unlink runTest_tmp.cu
+runTest.exe runTest_cu.o &: runTest.cc $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cu_objects) $(GTESTLIBS)
+	$(NVCC) -o runTest_cu.o -c -x cu runTest.cc $(CPPFLAGS) $(CUFLAGS)
+	$(NVCC) -o $@ $(cxx_objects) $(cu_objects) $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
 endif
 
 $(GTESTLIBS):
-	$(MAKE) -C $(TOOLSDIR)
+	$(MAKE) -C $(TESTDIR)
 
 check: runTest.exe
 	./runTest.exe
@@ -104,6 +108,10 @@ clean:
 	cd ../../src && make clean
 	rm -f *.o *.exe
 
+distclean: clean
+	make -C $(TOOLSDIR) clean
+	make -C $(TESTDIR) clean
+
 memcheck: $(cu_main)
 	/usr/local/cuda/bin/cuda-memcheck --check-api-memory-access yes --check-deprecated-instr yes --check-device-heap yes --demangle full --language c --leak-check full --racecheck-report all --report-api-errors all --show-backtrace yes --tool memcheck --track-unused-memory yes ./gcheck.exe 2 32 2
 
diff --git a/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc b/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc
index 8f3e4c6024..dbde364896 100644
--- a/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc
+++ b/epoch2/cuda/ee_mumu/SubProcesses/runTest.cc
@@ -1,6 +1,8 @@
 #include "mgOnGpuConfig.h"
 #include "mgOnGpuTypes.h"
 
+#include "MadgraphTest.h"
+
 #include "CommonRandomNumbers.h"
 #include "gCPPProcess.h"
 #include "Memory.h"
@@ -10,110 +12,38 @@
 #include "rambo.h"
 #endif
 
-#include <algorithm>
-#include <array>
-#include <cmath>
-#include <cstring>
-#include <fstream>
-#include <iomanip>
-#include <iostream>
-#include <numeric>
-#include <string>
-#include <unistd.h>
-
-#include <gtest/gtest.h>
 
 
-struct ReferenceData {
-  std::vector< std::array<std::array<fptype,mgOnGpu::np4>, mgOnGpu::npar> > momenta;
-  std::vector<fptype> MEs;
-};
+struct CUDA_CPU_TestBase : public TestDriverBase<double> {
+  static_assert( gputhreads%mgOnGpu::neppR == 0, "ERROR! #threads/block should be a multiple of neppR" );
+  static_assert( gputhreads%mgOnGpu::neppM == 0, "ERROR! #threads/block should be a multiple of neppM" );
+  static_assert( gputhreads <= mgOnGpu::ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" );
 
-std::map<unsigned int, ReferenceData> readReferenceData(const std::string& refFileName);
+  const std::size_t nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }; // (NB: ASA layout with nevt=npagR*neppR events per iteration)
+  const std::size_t nMomenta{ mgOnGpu::np4 * mgOnGpu::npar  * nevt }; // (NB: nevt=npagM*neppM for ASA layouts)
+  const std::size_t nWeights{ nevt };
+  const std::size_t nMEs    { nevt };
 
-#ifndef __CUDACC__
-std::map<unsigned int, ReferenceData> readReferenceData(const std::string& refFileName)
-{
-  std::ifstream referenceFile(refFileName.c_str());
-  EXPECT_TRUE(referenceFile.is_open()) << refFileName;
-  std::map<unsigned int, ReferenceData> referenceData;
-  unsigned int evtNo;
-  unsigned int batchNo;
-
-  for (std::string line; std::getline(referenceFile, line); )
+  CUDA_CPU_TestBase() :
+  TestDriverBase()
   {
-    std::stringstream lineStr(line);
-    if (line.empty())
-    {
-      continue;
-    }
-    else if (line.find("Event") != std::string::npos)
-    {
-      std::string dummy;
-      lineStr >> dummy >> evtNo >> dummy >> batchNo;
-    }
-    else if (line.find("ME") != std::string::npos)
-    {
-      if (evtNo <= referenceData[batchNo].MEs.size())
-        referenceData[batchNo].MEs.resize(evtNo + 1);
-
-      std::string dummy;
-      lineStr >> dummy >> referenceData[batchNo].MEs[evtNo];
-    }
-    else
-    {
-      unsigned int particleIndex;
-      lineStr >> particleIndex;
-
-      if (evtNo <= referenceData[batchNo].momenta.size())
-        referenceData[batchNo].momenta.resize(evtNo + 1);
-
-      for (unsigned int i=0; i < mgOnGpu::np4; ++i) {
-        EXPECT_TRUE(lineStr.good());
-        lineStr >> referenceData[batchNo].momenta[evtNo][particleIndex][i];
-      }
-      EXPECT_TRUE(lineStr.eof());
-    }
+    TestDriverBase::nparticle = mgOnGpu::npar;
   }
-  return referenceData;
-}
-#endif
 
-class BaseTest : public ::testing::Test {
- protected:
-
-  static constexpr unsigned niter = 2;
-  static constexpr unsigned gpublocks = 2;
-  static constexpr unsigned gputhreads = 32;
-  static constexpr std::size_t nevt = gpublocks * gputhreads;
-
-  const std::size_t nRnarray; // (NB: ASA layout with nevt=npagR*neppR events per iteration)
-  const std::size_t nMomenta; // (NB: nevt=npagM*neppM for ASA layouts)
-  const std::size_t nWeights;
-  const std::size_t nMEs;
-
-  BaseTest() :
-    nRnarray{ mgOnGpu::np4 * mgOnGpu::nparf * nevt }, // (NB: ASA layout with nevt=npagR*neppR events per iteration)
-    nMomenta{ mgOnGpu::np4 * mgOnGpu::npar  * nevt },// (NB: nevt=npagM*neppM for ASA layouts)
-    nWeights{ nevt },
-    nMEs    { nevt }
-  { }
-
-  virtual void prepareRandomNumbers(int iiter) = 0;
-  virtual void prepareMomenta(fptype energy) = 0;
-  virtual void runSigmaKin(std::size_t iiter) = 0;
 };
 
 
 #ifndef __CUDACC__
-struct CPUTest : public BaseTest {
+struct CPUTest : public CUDA_CPU_TestBase {
   Proc::CPPProcess process;
 
-  unique_ptr_host<fptype> hstRnarray;
-  unique_ptr_host<fptype> hstMomenta;
-  unique_ptr_host<bool  > hstIsGoodHel;
-  unique_ptr_host<fptype> hstWeights;
-  unique_ptr_host<fptype> hstMEs;
+  // --- 0b. Allocate memory structures
+  // Memory structures for random numbers, momenta, matrix elements and weights on host and device
+  unique_ptr_host<fptype> hstRnarray  { hstMakeUnique<fptype>( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
+  unique_ptr_host<fptype> hstMomenta  { hstMakeUnique<fptype>( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
+  unique_ptr_host<bool  > hstIsGoodHel{ hstMakeUnique<bool  >( mgOnGpu::ncomb ) };
+  unique_ptr_host<fptype> hstWeights  { hstMakeUnique<fptype>( nWeights ) };
+  unique_ptr_host<fptype> hstMEs      { hstMakeUnique<fptype>( nMEs ) };
 
   // Create a process object
   // Read param_card and set parameters
@@ -121,23 +51,15 @@ struct CPUTest : public BaseTest {
   // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations.
   // Don't remove!
   CPUTest() :
-  BaseTest(),
+  CUDA_CPU_TestBase(),
   process(niter, gpublocks, gputhreads, /*verbose=*/false)
   {
     process.initProc("../../Cards/param_card.dat");
-
-    // --- 0b. Allocate memory structures
-    // Memory structures for random numbers, momenta, matrix elements and weights on host and device
-    hstRnarray   = hstMakeUnique<fptype>( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
-    hstMomenta   = hstMakeUnique<fptype>( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
-    hstIsGoodHel = hstMakeUnique<bool  >( mgOnGpu::ncomb );
-    hstWeights   = hstMakeUnique<fptype>( nWeights ); // (previously was: meHostPtr)
-    hstMEs       = hstMakeUnique<fptype>( nMEs ); // (previously was: meHostPtr)
   }
   virtual ~CPUTest() { }
 
 
-  void prepareRandomNumbers(int iiter) override {
+  void prepareRandomNumbers(unsigned int iiter) override {
     std::vector<fptype> rnd = CommonRandomNumbers::generate<fptype>(nRnarray, 1337 + iiter);
     std::copy(rnd.begin(), rnd.end(), hstRnarray.get());
   }
@@ -157,11 +79,26 @@ struct CPUTest : public BaseTest {
     // --- 3a. SigmaKin
     Proc::sigmaKin(hstMomenta.get(), hstMEs.get(), nevt);
   }
+
+
+
+  double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
+    assert(component < mgOnGpu::np4);
+    assert(particle  < mgOnGpu::npar);
+    const auto page  = evtNo / mgOnGpu::neppM; // #eventpage in this iteration
+    const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration
+    return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM];
+  };
+
+  double getMatrixElement(std::size_t evtNo) const override {
+    return hstMEs[evtNo];
+  }
 };
 #endif
 
+
 #ifdef __CUDACC__
-struct CUDATest : public BaseTest {
+struct CUDATest : public CUDA_CPU_TestBase {
   // Reset the device when our test goes out of scope. Note that this should happen after
   // the frees, i.e. be declared before the pointers to device memory.
   struct DeviceReset {
@@ -170,17 +107,20 @@ struct CUDATest : public BaseTest {
     }
   } deviceResetter;
 
-  unique_ptr_host<fptype> hstRnarray;
-  unique_ptr_host<fptype> hstMomenta;
-  unique_ptr_host<bool  > hstIsGoodHel;
-  unique_ptr_host<fptype> hstWeights;
-  unique_ptr_host<fptype> hstMEs;
+  // --- 0b. Allocate memory structures
+  // Memory structures for random numbers, momenta, matrix elements and weights on host and device
+  unique_ptr_host<fptype> hstRnarray  { hstMakeUnique<fptype>( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
+  unique_ptr_host<fptype> hstMomenta  { hstMakeUnique<fptype>( nMomenta ) }; // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
+  unique_ptr_host<bool  > hstIsGoodHel{ hstMakeUnique<bool  >( mgOnGpu::ncomb ) };
+  unique_ptr_host<fptype> hstWeights  { hstMakeUnique<fptype>( nWeights ) };
+  unique_ptr_host<fptype> hstMEs      { hstMakeUnique<fptype>( nMEs ) };
+
 
-  unique_ptr_dev<fptype> devRnarray;
-  unique_ptr_dev<fptype> devMomenta;
-  unique_ptr_dev<bool  > devIsGoodHel;
-  unique_ptr_dev<fptype> devWeights;
-  unique_ptr_dev<fptype> devMEs;
+  unique_ptr_dev<fptype> devRnarray  { devMakeUnique<fptype>( nRnarray ) }; // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
+  unique_ptr_dev<fptype> devMomenta  { devMakeUnique<fptype>( nMomenta ) }; // (previously was: allMomenta)
+  unique_ptr_dev<bool  > devIsGoodHel{ devMakeUnique<bool  >( mgOnGpu::ncomb ) };
+  unique_ptr_dev<fptype> devWeights  { devMakeUnique<fptype>( nWeights ) }; // (previously was: meDevPtr)
+  unique_ptr_dev<fptype> devMEs      { devMakeUnique<fptype>( nMEs )     }; // (previously was: meDevPtr)
 
   gProc::CPPProcess process;
 
@@ -190,31 +130,15 @@ struct CUDATest : public BaseTest {
   // The CPPProcess constructor has side effects on the globals Proc::cHel, which is needed in ME calculations.
   // Don't remove!
   CUDATest() :
-  BaseTest(),
+  CUDA_CPU_TestBase(),
   process(niter, gpublocks, gputhreads, /*verbose=*/false)
   {
     process.initProc("../../Cards/param_card.dat");
-
-    checkCuda( cudaFree( 0 ) ); // SLOW!
-
-    // --- 0b. Allocate memory structures
-    // Memory structures for random numbers, momenta, matrix elements and weights on host and device
-    hstRnarray   = hstMakeUnique<fptype>( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
-    hstMomenta   = hstMakeUnique<fptype>( nMomenta ); // AOSOA[npagM][npar][np4][neppM] (previously was: lp)
-    hstIsGoodHel = hstMakeUnique<bool  >( mgOnGpu::ncomb );
-    hstWeights   = hstMakeUnique<fptype>( nWeights ); // (previously was: meHostPtr)
-    hstMEs       = hstMakeUnique<fptype>( nMEs ); // (previously was: meHostPtr)
-
-    devRnarray   = devMakeUnique<fptype>( nRnarray ); // AOSOA[npagR][nparf][np4][neppR] (NB: nevt=npagR*neppR)
-    devMomenta   = devMakeUnique<fptype>( nMomenta ); // (previously was: allMomenta)
-    devIsGoodHel = devMakeUnique<bool  >( mgOnGpu::ncomb );
-    devWeights   = devMakeUnique<fptype>( nWeights ); // (previously was: meDevPtr)
-    devMEs       = devMakeUnique<fptype>( nMEs ); // (previously was: meDevPtr)
   }
-
   virtual ~CUDATest() { }
 
-  void prepareRandomNumbers(int iiter) override {
+
+  void prepareRandomNumbers(unsigned int iiter) override {
     std::vector<fptype> rnd = CommonRandomNumbers::generate<fptype>(nRnarray, 1337 + iiter);
     std::copy(rnd.begin(), rnd.end(), hstRnarray.get());
     checkCuda( cudaMemcpy( devRnarray.get(), hstRnarray.get(), nRnarray * sizeof(decltype(devRnarray)::element_type), cudaMemcpyHostToDevice ) );
@@ -236,6 +160,7 @@ struct CUDATest : public BaseTest {
     checkCuda( cudaMemcpy( hstMomenta.get(), devMomenta.get(), nMomenta * sizeof(decltype(hstMomenta)::element_type), cudaMemcpyDeviceToHost ) );
   }
 
+
   void runSigmaKin(std::size_t iiter) override {
     // --- 0d. SGoodHel
     if ( iiter == 0 )
@@ -261,140 +186,30 @@ struct CUDATest : public BaseTest {
     checkCuda( cudaMemcpy( hstMEs.get(), devMEs.get(), nMEs * sizeof(decltype(hstMEs)::element_type), cudaMemcpyDeviceToHost ) );
   }
 
+
+  double getMomentum(std::size_t evtNo, unsigned int particle, unsigned int component) const override {
+    assert(component < mgOnGpu::np4);
+    assert(particle  < mgOnGpu::npar);
+    const auto page  = evtNo / mgOnGpu::neppM; // #eventpage in this iteration
+    const auto ieppM = evtNo % mgOnGpu::neppM; // #event in the current eventpage in this iteration
+    return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*mgOnGpu::neppM + particle * mgOnGpu::neppM*mgOnGpu::np4 + component * mgOnGpu::neppM + ieppM];
+  };
+
+  double getMatrixElement(std::size_t evtNo) const override {
+    return hstMEs[evtNo];
+  }
 };
 #endif
 
 
 #ifdef __CUDACC__
-TEST_F(CUDATest, eemumu)
+INSTANTIATE_TEST_SUITE_P(EP2_CUDA_GPU, MadgraphTestDouble,
+    testing::Values( [](){ return new CUDATest; } )
+);
 #else
-TEST_F(CPUTest, eemumu)
+INSTANTIATE_TEST_SUITE_P(EP2_CUDA_CPU, MadgraphTestDouble,
+    testing::Values([](){ return new CPUTest; })
+);
 #endif
-{
-  // Set to dump events:
-  constexpr bool dumpEvents = false;
-  const std::string dumpFileName = dumpEvents ?
-      std::string("dump_") + testing::UnitTest::GetInstance()->current_test_info()->test_suite_name() + "." + testing::UnitTest::GetInstance()->current_test_info()->name() + ".txt" :
-      "";
-  const std::string refFileName = "dump_CPUTest.eemumu.txt";
-
-  const int neppR = mgOnGpu::neppR; // ASA layout: constant at compile-time
-  static_assert( gputhreads%neppR == 0, "ERROR! #threads/block should be a multiple of neppR" );
-
-  const int neppM = mgOnGpu::neppM; // ASA layout: constant at compile-time
-  static_assert( gputhreads%neppM == 0, "ERROR! #threads/block should be a multiple of neppM" );
-
-  using mgOnGpu::ntpbMAX;
-  static_assert( gputhreads <= ntpbMAX, "ERROR! #threads/block should be <= ntpbMAX" );
-
-  std::ofstream dumpFile;
-  if ( !dumpFileName.empty() )
-  {
-    dumpFile.open(dumpFileName, std::ios::trunc);
-  }
 
-  std::map<unsigned int, ReferenceData> referenceData = readReferenceData(refFileName);
-  ASSERT_FALSE(HasFailure()); // It doesn't make any sense to continue if we couldn't read the reference file.
 
-  constexpr fptype energy = 1500; // historical default, Ecms = 1500 GeV = 1.5 TeV (above the Z peak)
-
-
-  // **************************************
-  // *** START MAIN LOOP ON #ITERATIONS ***
-  // **************************************
-
-  for (unsigned int iiter = 0; iiter < niter; ++iiter)
-  {
-    prepareRandomNumbers(iiter);
-
-    prepareMomenta(energy);
-
-    runSigmaKin(iiter);
-
-    // --- Run checks on all events produced in this iteration
-    for (std::size_t ievt = 0; ievt < nevt && !HasFailure(); ++ievt)
-    {
-      auto getMomentum = [&](std::size_t evtNo, int particle, int component)
-      {
-        assert(component < mgOnGpu::np4);
-        assert(particle  < mgOnGpu::npar);
-        const auto page  = evtNo / neppM; // #eventpage in this iteration
-        const auto ieppM = evtNo % neppM; // #event in the current eventpage in this iteration
-        return hstMomenta[page * mgOnGpu::npar*mgOnGpu::np4*neppM + particle * neppM*mgOnGpu::np4 + component * neppM + ieppM];
-      };
-      auto dumpParticles = [&](std::ostream& stream, std::size_t evtNo, unsigned precision, bool dumpReference)
-      {
-        const auto width = precision + 8;
-        for (int ipar = 0; ipar < mgOnGpu::npar; ipar++)
-        {
-          // NB: 'setw' affects only the next field (of any type)
-          stream << std::scientific // fixed format: affects all floats (default precision: 6)
-                 << std::setprecision(precision)
-                 << std::setw(4) << ipar
-                 << std::setw(width) << getMomentum(ievt, ipar, 0)
-                 << std::setw(width) << getMomentum(ievt, ipar, 1)
-                 << std::setw(width) << getMomentum(ievt, ipar, 2)
-                 << std::setw(width) << getMomentum(ievt, ipar, 3)
-                 << "\n";
-          if (dumpReference) {
-            stream << "ref" << ipar;
-            if (ievt < referenceData[iiter].momenta.size()) {
-              stream << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][0]
-                     << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][1]
-                     << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][2]
-                     << std::setw(width) << referenceData[iiter].momenta[ievt][ipar][3]
-                     << "\n\n";
-            } else {
-              stream << "  --- No reference ---\n\n";
-            }
-          }
-          stream << std::flush << std::defaultfloat; // default format: affects all floats
-        }
-      };
-
-      if (dumpFile.is_open()) {
-        dumpFile << "Event " << std::setw(8) << ievt << "  "
-                 << "Batch " << std::setw(4) << iiter << "\n";
-        dumpParticles(dumpFile, ievt, 15, false);
-        // Dump matrix element
-        dumpFile << std::setw(4) << "ME" << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n" << std::endl << std::defaultfloat;
-        continue;
-      }
-
-      // This trace will only be printed in case of failures:
-      std::stringstream eventTrace;
-      eventTrace << "In comparing event " << ievt << " from iteration " << iiter << "\n";
-      dumpParticles(eventTrace, ievt, 15, true);
-      eventTrace << std::setw(4) << "ME"   << std::scientific << std::setw(15+8) << hstMEs[ievt] << "\n"
-                 << std::setw(4) << "r.ME" << std::scientific << std::setw(15+8) << referenceData[iiter].MEs[ievt] << std::endl << std::defaultfloat;
-      SCOPED_TRACE(eventTrace.str());
-
-      ASSERT_LT( ievt, referenceData[iiter].momenta.size() ) << "Don't have enough events in reference file #ref=" << referenceData[iiter].momenta.size();
-
-
-      // Compare Momenta
-      const fptype toleranceMomenta = 200. * std::pow(10., -std::numeric_limits<fptype>::digits10);
-      for (unsigned int ipar = 0; ipar < mgOnGpu::npar; ++ipar) {
-        std::stringstream momentumErrors;
-        for (unsigned int icomp = 0; icomp < mgOnGpu::np4; ++icomp) {
-          const double pMadg = getMomentum(ievt, ipar, icomp);
-          const double pOrig = referenceData[iiter].momenta[ievt][ipar][icomp];
-          const double relDelta = fabs( (pMadg - pOrig)/pOrig );
-          if (relDelta > toleranceMomenta) {
-            momentumErrors << std::setprecision(15) << std::scientific << "\nparticle " << ipar << "\tcomponent " << icomp
-                << "\n\t madGraph:  " << std::setw(22) << pMadg
-                << "\n\t reference: " << std::setw(22) << pOrig
-                << "\n\t rel delta: " << std::setw(22) << relDelta << " exceeds tolerance of " << toleranceMomenta;
-          }
-        }
-        ASSERT_TRUE(momentumErrors.str().empty()) << momentumErrors.str();
-      }
-
-      // Compare ME:
-      const fptype toleranceMEs = 500. * std::pow(10., -std::numeric_limits<fptype>::digits10);
-      EXPECT_NEAR(hstMEs[ievt], referenceData[iiter].MEs[ievt], toleranceMEs * referenceData[iiter].MEs[ievt]);
-    }
-
-
-  }
-}

From 041bdf1af14492e94dd0e4062e222f9a261d87c9 Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Tue, 1 Dec 2020 15:38:42 +0100
Subject: [PATCH 4/6] Implement epoch2 github action.

---
 .github/workflows/c-cpp.yml | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
index 66cbdf28ec..68cb15fe5e 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/c-cpp.yml
@@ -12,7 +12,20 @@ jobs:
     defaults:
       run:
         working-directory: epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum
-
+    steps:
+    - uses: actions/checkout@v2
+    - name: make gtest
+      working-directory: tools
+      run: make
+    - name: make
+      run: make
+    - name: make check
+      run: make check
+  epoch2_eemumu:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum
     steps:
     - uses: actions/checkout@v2
     - name: make gtest

From 33d908c2ad205b9e36c122ddc7140e9804411005 Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Wed, 16 Dec 2020 18:22:26 +0100
Subject: [PATCH 5/6] [ep2 cuda eemm] Port fixes in Makefile to epoch2.

---
 epoch2/cuda/ee_mumu/SubProcesses/Makefile | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/epoch2/cuda/ee_mumu/SubProcesses/Makefile b/epoch2/cuda/ee_mumu/SubProcesses/Makefile
index 598dc30d2d..60a772656a 100644
--- a/epoch2/cuda/ee_mumu/SubProcesses/Makefile
+++ b/epoch2/cuda/ee_mumu/SubProcesses/Makefile
@@ -1,6 +1,6 @@
 LIBDIR   = ../../lib
 TOOLSDIR = ../../../../../tools/
-TESTDIR  = ../../../../../test
+TESTDIR  = ../../../../../test/
 INCFLAGS = -I. -I../../src -I$(TOOLSDIR)
 MODELLIB = model_sm
 OPTFLAGS = -O3
@@ -25,7 +25,8 @@ ifdef CUDA_HOME
   CUARCHFLAGS= -arch=compute_$(CUARCHNUM)
   CUINC       = -I$(CUDA_HOME)/include/
   CULIBFLAGS  = -L$(CUDA_HOME)/lib64/ -lcuda -lcurand
-  CUFLAGS= $(OPTFLAGS) -std=c++14 $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math -lineinfo $(MGONGPU_CONFIG)
+  CUOPTFLAGS  = -lineinfo
+  CUFLAGS     = $(OPTFLAGS) $(CUOPTFLAGS) -std=c++14 $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math $(MGONGPU_CONFIG)
 
   cu_main     = gcheck.exe
   cu_objects  = gCPPProcess.o
@@ -54,9 +55,8 @@ endif
 
 all: ../../src $(cu_main) $(cxx_main) runTest.exe
 
-debug: OPTFLAGS = -g -O0 -DDEBUG2
-debug: CUFLAGS := $(filter-out -lineinfo,$(CUFLAGS))
-debug: CUFLAGS += -G
+debug: OPTFLAGS   = -g -O0 -DDEBUG2
+debug: CUOPTFLAGS = -G
 debug: MAKEDEBUG := debug
 debug: all
 
@@ -105,7 +105,7 @@ check: runTest.exe
 .PHONY: clean
 
 clean:
-	cd ../../src && make clean
+	make -C ../../src clean
 	rm -f *.o *.exe
 
 distclean: clean
@@ -125,7 +125,7 @@ test: force
 info:
 ifdef CUDA_HOME
 	$(NVCC) --version
-	echo ""
+	@echo ""
 endif
 	$(CXX) --version
 

From 2136265faf37a95b83feede882c4b04546fe39c0 Mon Sep 17 00:00:00 2001
From: Stephan Hageboeck <stephan.hageboeck@cern.ch>
Date: Wed, 16 Dec 2020 18:32:24 +0100
Subject: [PATCH 6/6] [CI] Add CI step for debug builds.

---
 .github/workflows/c-cpp.yml | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
index 68cb15fe5e..d3373d8ee6 100644
--- a/.github/workflows/c-cpp.yml
+++ b/.github/workflows/c-cpp.yml
@@ -14,13 +14,21 @@ jobs:
         working-directory: epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum
     steps:
     - uses: actions/checkout@v2
-    - name: make gtest
-      working-directory: tools
-      run: make
     - name: make
       run: make
     - name: make check
       run: make check
+  epoch1_eemumu_debug:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum
+    steps:
+    - uses: actions/checkout@v2
+    - name: make
+      run: make debug
+    - name: make check
+      run: make check
   epoch2_eemumu:
     runs-on: ubuntu-latest
     defaults:
@@ -28,10 +36,18 @@ jobs:
         working-directory: epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum
     steps:
     - uses: actions/checkout@v2
-    - name: make gtest
-      working-directory: tools
-      run: make
     - name: make
       run: make
     - name: make check
       run: make check
+  epoch2_eemumu_debug:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum
+    steps:
+    - uses: actions/checkout@v2
+    - name: make
+      run: make debug
+    - name: make check
+      run: make check