Merge pull request #709 from valassi/launch

first systematic 'launch'-like tests (and move to the latest select_color upstream)
madgraph5 · Jun 19, 2023 · 79d8c06 · 79d8c06
2 parents f0e7cde + d89aea8
commit 79d8c06
Show file tree

Hide file tree

Showing 344 changed files with 943,964 additions and 6,571 deletions.
diff --git a/epochX/cudacpp/CODEGEN/MG5aMC_patches/PROD/commit.GIT b/epochX/cudacpp/CODEGEN/MG5aMC_patches/PROD/commit.GIT
@@ -1 +1 @@
-ecf424f8f
+ebcb307fa
diff --git a/epochX/cudacpp/CODEGEN/MG5aMC_patches/PROD/patch.P1 b/epochX/cudacpp/CODEGEN/MG5aMC_patches/PROD/patch.P1
@@ -338,7 +338,7 @@ index beef47998..905badeff 100644
        ENDIF
        ANS=ANS/DBLE(IDEN)
 -
-       CALL SELECT_COLOR(RCOL, JAMP2, CHANNEL,1,  ICOL)
+       CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1,  ICOL)
 -
 +      call counters_smatrix1_stop()
        END

diff --git a/epochX/cudacpp/CODEGEN/generateAndCompare.sh b/epochX/cudacpp/CODEGEN/generateAndCompare.sh
@@ -296,6 +296,7 @@ function usage()
     # NB: all options with $SCRBCK=cudacpp use the 311 branch by default and always disable helicity recycling
     echo "Usage:   $0 [--nobrief] [--cpp|--gpu|--madnovec|--madonly|--mad|--madcpp*|--madgpu] [--nopatch|--upstream] [-c '<cmd>'] <proc>"
     echo "         (*Note: the --madcpp option exists but code generation fails for it)"
+    echo "         (**Note: <proc> will be used as a relative path in ${OUTDIR} and should not contain '/' characters"
     echo "Example: $0 gg_tt --mad"
     echo "Example: $0 gg_bb --mad -c 'generate g g > b b~'"
   fi
@@ -387,6 +388,8 @@ while [ "$1" != "" ]; do
   fi
   shift
 done
+if [ "$proc" == "" ]; then usage; fi
+if [ "${proc/\/}" != "${proc}" ]; then echo "ERROR! <proc> '${proc}' should not contain '/' characters"; usage; fi
 
 echo "SCRDIR=${SCRDIR}"
 echo "OUTDIR=${OUTDIR}"

diff --git a/epochX/cudacpp/README_CODEGEN.txt b/epochX/cudacpp/README_CODEGEN.txt
@@ -0,0 +1,61 @@
+# Copyright (C) 2020-2023 CERN and UCLouvain.
+# Licensed under the GNU Lesser General Public License (version 3 or later).
+# Created by: A. Valassi (Jun 2023) for the MG5aMC CUDACPP plugin.
+# Further modified by: A. Valassi (2023) for the MG5aMC CUDACPP plugin.
+
+[Last updated Mon 19 June 2023]
+
+This README contains **TEMPORARY** instructions for end users for generating CUDACPP code for MG5AMC.
+It relies on using both the madgraph4gpu and mg5amcnlo github repositories.
+Eventually, CUDACPP code generation will be fully integrated into the mg5amcnlo github repository.
+NB: gridpack generation is not documented/supported yet.
+
+0. Set up your favorite compilers
+
+Example:
+  echo $CXX
+    /cvmfs/sft.cern.ch/lcg/releases/gcc/11.2.0-ad950/x86_64-centos8/bin/g++
+  echo $FC
+    /cvmfs/sft.cern.ch/lcg/releases/gcc/11.2.0-ad950/x86_64-centos8/bin/gfortran
+  which nvcc
+    /usr/local/cuda-12.0/bin/nvcc
+
+1. Download mg5amcnlo
+
+cd <userdir>
+git clone -b gpucpp --single-branch git@github.com:mg5amcnlo/mg5amcnlo
+
+export MG5AMC_HOME=$(pwd)/mg5amcnlo
+
+2. Download madgraph4gpu
+
+cd <userdir>
+git clone -b master --single-branch git@github.com:madgraph5/madgraph4gpu.git
+
+cd madgraph4gpu/epochX/cudacpp/
+
+3. Generate your favorite process
+
+./CODEGEN/generateAndCompare.sh --mad USER_gg_tt -c 'generate g g > t t~'
+
+cd USER_gg_tt.mad
+
+4a. Launch your process for FORTRAN
+
+sed -i "s/.* = cudacpp_backend/FORTRAN = cudacpp_backend/" Cards/run_card.dat
+echo "r=21" > SubProcesses/randinit
+./bin/generate_events -f
+
+4b. Launch your process for CPP (with AVX2)
+
+sed -i "s/.* = cudacpp_backend/CPP = cudacpp_backend/" Cards/run_card.dat
+echo "r=21" > SubProcesses/randinit
+AVX=avx2 MG5AMC_CARD_PATH=$(pwd)/Cards ./bin/generate_events -f
+
+4c. Launch your process for CUDA
+
+sed -i "s/.* = cudacpp_backend/CUDA = cudacpp_backend/" Cards/run_card.dat
+echo "r=21" > SubProcesses/randinit
+MG5AMC_CARD_PATH=$(pwd)/Cards ./bin/generate_events -f
+
+
diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0046384334564208984 [0m
+[1;32mDEBUG: model prefixing  takes 0.0048596858978271484 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -176,7 +176,7 @@ INFO: Creating files in directory P1_epem_mupmum
 [1;32mDEBUG:  Entering PLUGIN_OneProcessExporter.__init__ [1;30m[model_handling.py at line 1028][0m [0m
 [1;32mDEBUG:  kwargs[prefix] = 0 [1;30m[model_handling.py at line 1029][0m [0m
 [1;32mDEBUG:  proc_id = [0m 1 [1;30m[model_handling.py at line 1034][0m [0m
-[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_SA_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f8f78624ac0> [1;30m[export_v4.py at line 6174][0m [0m
+[1;32mDEBUG:  process_exporter_cpp = [0m <PLUGIN.CUDACPP_SA_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7f4c3bdebca0> [1;30m[export_v4.py at line 6174][0m [0m
 INFO: Creating files in directory . 
 [1;32mDEBUG:  Entering PLUGIN_OneProcessExporter.generate_process_files [1;30m[model_handling.py at line 1286][0m [0m
 [1;32mDEBUG:  self.include_multi_channel is already defined: this is madevent+second_exporter mode [1;30m[model_handling.py at line 1288][0m [0m
@@ -210,19 +210,19 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.093 s
+Wrote files for 8 helas calls in 0.094 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.171 s
+ALOHA: aloha creates 3 routines in  0.173 s
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.convert_model (create the model) [1;30m[output.py at line 194][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.216 s
+ALOHA: aloha creates 7 routines in  0.223 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -260,6 +260,6 @@ Type "launch" to generate events from this process, or see
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m1.767s
-user	0m1.513s
-sys	0m0.232s
+real	0m1.864s
+user	0m1.552s
+sys	0m0.214s
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings.f b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings.f
@@ -47,13 +47,13 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+      INCLUDE '../vector.inc'
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
       EXTERNAL ALPHAS
 
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       READLHA = .FALSE.
 

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/dsample.f b/epochX/cudacpp/ee_mumu.mad/Source/dsample.f
@@ -666,8 +666,9 @@ subroutine sample_init(p1, p2, p3, p4, p5, VECSIZE_USED)
 c
       include 'genps.inc'
       include 'maxconfigs.inc'
+      include 'vector.inc'      ! defines VECSIZE_MEMMAX
       include 'run.inc'
-      include 'vector.inc' ! defines VECSIZE_MEMMAX
+
 c
 c     Arguments
 c

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/kin_functions.f b/epochX/cudacpp/ee_mumu.mad/Source/kin_functions.f
@@ -110,6 +110,7 @@ DOUBLE PRECISION  FUNCTION rap(p)
 c     Global
 c
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 
       double precision cm_rap
@@ -150,6 +151,7 @@ DOUBLE PRECISION  FUNCTION rap2(p)
 c     Global
 c
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 c-----
 c  Begin Code
@@ -247,6 +249,7 @@ double precision function DJ(p1,p2)
 c
 
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
       include 'cuts.inc'
 
@@ -356,6 +359,7 @@ double precision function DJ1(p1,p2)
 c
 
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 
       double precision pt1,pt2,ptm1,eta1,eta2,phi1,phi2,p1a,p2a,costh
@@ -409,6 +413,7 @@ double precision function DJB(p1)
 c
       double precision pm1
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 
 c-----

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/pawgraphs.f b/epochX/cudacpp/ee_mumu.mad/Source/pawgraphs.f
@@ -53,6 +53,7 @@ subroutine graph_point(p,dwgt)
 c     Global
 c
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 
 c

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/readgrid.f b/epochX/cudacpp/ee_mumu.mad/Source/readgrid.f
@@ -6,6 +6,7 @@ subroutine readgrid(lun)
       include 'sudgrid.inc'
       include 'PDF/pdf.inc'
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 
 c...arguments

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/run.inc b/epochX/cudacpp/ee_mumu.mad/Source/run.inc
@@ -49,12 +49,12 @@ c     Parameters for systematics variations info
 c
       logical use_syst
 c     Common block for systematics variations
-      DOUBLE PRECISION s_scale
-      INTEGER n_qcd,n_alpsem
-      DOUBLE PRECISION s_qalps(max_particles-2)
-      INTEGER n_pdfrw(2),i_pdgpdf(max_particles-2,2)
-      DOUBLE PRECISION s_xpdf(max_particles-2,2),s_qpdf(max_particles-2,2)
-      DOUBLE PRECISION s_rwfact
+      DOUBLE PRECISION s_scale(VECSIZE_MEMMAX)
+      INTEGER n_qcd(VECSIZE_MEMMAX),n_alpsem(VECSIZE_MEMMAX)
+      DOUBLE PRECISION s_qalps(max_particles-2,VECSIZE_MEMMAX)
+      INTEGER n_pdfrw(2,VECSIZE_MEMMAX),i_pdgpdf(max_particles-2,2,VECSIZE_MEMMAX)
+      DOUBLE PRECISION s_xpdf(max_particles-2,2,VECSIZE_MEMMAX),s_qpdf(max_particles-2,2,VECSIZE_MEMMAX)
+      DOUBLE PRECISION s_rwfact(VECSIZE_MEMMAX)
       COMMON/TO_SYST/use_syst,n_qcd,n_alpsem,n_pdfrw,i_pdgpdf,
      $               s_scale,s_qalps,s_xpdf,s_qpdf,s_rwfact
 c

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/run_printout.f b/epochX/cudacpp/ee_mumu.mad/Source/run_printout.f
@@ -13,6 +13,7 @@ subroutine run_printout
 c
       include 'PDF/pdf.inc'
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
       include 'alfas.inc'
 c

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/rw_events.f b/epochX/cudacpp/ee_mumu.mad/Source/rw_events.f
@@ -14,6 +14,7 @@ subroutine read_event(lun,P,wgt,nexternal,ic,ievent,sscale,
       implicit none
       include 'maxparticles.inc'
       include 'run_config.inc'
+      include 'vector.inc'
       include 'run.inc'
       double precision pi
       parameter (pi = 3.1415926d0)

diff --git a/epochX/cudacpp/ee_mumu.mad/Source/setrun.f b/epochX/cudacpp/ee_mumu.mad/Source/setrun.f
@@ -13,9 +13,9 @@ subroutine setrun
       include 'genps.inc'
       include 'run_config.inc'
       include 'PDF/pdf.inc'
+      include 'vector.inc'      ! defines VECSIZE_MEMMAX
       include 'run.inc'
       include 'alfas.inc'
-      include 'vector.inc' ! defines VECSIZE_MEMMAX
       include 'MODEL/coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
 
       double precision D

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f
@@ -55,6 +55,7 @@ SUBROUTINE PREPARE_GROUPING_CHOICE(PP, WGT, INIT)
       INTEGER LMAPPED
 
       DOUBLE PRECISION DSIGPROC
+      INCLUDE 'vector.inc'
       INCLUDE 'run.inc'
 C     To limit the number of calls to switchmom, use in DSIGPROC the
 C     cached variable last_iconfig. It is in this subroutine as well
@@ -173,6 +174,7 @@ SUBROUTINE SELECT_GROUPING(IMIRROR, IPROC, ICONF, WGT,
       INTEGER GROUPED_MC_GRID_STATUS
       REAL*8 MC_GROUPED_PROC_JACOBIAN
       INTEGER LMAPPED
+      INCLUDE 'vector.inc'
       INCLUDE 'run.inc'
 C     Perform the selection
       CALL RANMAR(R)

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/driver.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/driver.f
@@ -48,7 +48,8 @@ Program DRIVER
 c
       character*30 param_card_name
       common/to_param_card_name/param_card_name
-cc
+c     c
+      include 'vector.inc'
       include 'run.inc'
 
       integer           mincfig, maxcfig
@@ -71,7 +72,6 @@ Program DRIVER
 c      integer ncols,ncolflow(maxamps),ncolalt(maxamps),ic
 c      common/to_colstats/ncols,ncolflow,ncolalt,ic
 
-      include 'vector.inc' ! defines VECSIZE_MEMMAX
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       INTEGER VECSIZE_USED
 

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f
@@ -312,7 +312,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         ENDIF
       ENDIF
       ANS=ANS/DBLE(IDEN)
-      CALL SELECT_COLOR(RCOL, JAMP2, CHANNEL,1,  ICOL)
+      CALL SELECT_COLOR(RCOL, JAMP2, ICONFIG,1,  ICOL)
       call counters_smatrix1_stop()
       END
 

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.f
@@ -522,12 +522,11 @@ logical function cluster(p, ivec)
 c**************************************************************************
       implicit none
       include 'genps.inc'
-      include 'run.inc'
       include 'nexternal.inc'
       include 'maxamps.inc'
       include 'cluster.inc'
       include 'message.inc'
-
+      include 'run.inc'
       integer ivec
       real*8 p(0:3,nexternal), pcmsp(0:3), p1(0:3)
       real*8 pi(0:3), nr(0:3), pz(0:3)

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cuts.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cuts.f
@@ -69,9 +69,10 @@ LOGICAL FUNCTION PASSCUTS(P, VECSIZE_USED)
 C
 C     GLOBAL
 C
+      include '../../Source/vector.inc' ! defines VECSIZE_MEMMAX
       include 'run.inc'
       include 'cuts.inc'
-      include '../../Source/vector.inc' ! defines VECSIZE_MEMMAX
+
 
       double precision ptjet(nexternal)
       double precision ptheavyjet(nexternal)

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/dummy_fct.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/dummy_fct.f
@@ -42,6 +42,7 @@ logical FUNCTION dummy_cuts(P)
       subroutine get_dummy_x1(sjac, X1, R, pbeam1, pbeam2, stot, shat)
       implicit none
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 c      include 'genps.inc'
       double precision sjac ! jacobian. should be updated not reinit
@@ -66,6 +67,7 @@ subroutine get_dummy_x1(sjac, X1, R, pbeam1, pbeam2, stot, shat)
       subroutine get_dummy_x1_x2(sjac, X, R, pbeam1, pbeam2, stot,shat)
       implicit none
       include 'maxparticles.inc'
+      include 'vector.inc'
       include 'run.inc'
 c      include 'genps.inc'
       double precision sjac ! jacobian. should be updated not reinit
@@ -104,6 +106,7 @@ double precision function user_dynamical_scale(P)
       double precision P(0:3, nexternal)
 c     Commmon to have access to all variable defined in the run_card      
       include 'genps.inc'
+      include 'vector.inc'
       include 'run.inc'
       write(0,*) "dynamical scale set to 0"
       write(0,*) "need to be defined via user_hook method"

diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/genps.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/genps.f
@@ -171,7 +171,8 @@ subroutine gen_mom(iconfig,mincfig,maxcfig,invar,wgt,x,p1)
       integer mothup(2,nexternal)
       integer icolup(2,nexternal,maxflow,maxsproc)
       include 'leshouche.inc'
-
+
+      include 'vector.inc'
       include 'run.inc'
 
 
@@ -605,6 +606,7 @@ subroutine configure_integral(iconfig,mincfig,maxcfig,invar,maxwgt)
       include 'maxconfigs.inc'
       include 'nexternal.inc'
       include 'maxamps.inc'
+      include 'vector.inc'
       include 'run.inc'
 
 c     local
@@ -749,6 +751,7 @@ subroutine one_tree(itree,tstrategy,iconfig,nbranch,P,M,S,X,jac,pswgt)
       double precision stot,m1,m2
       common/to_stot/stot,m1,m2
 
+      include 'vector.inc'
       include 'run.inc'
 
 c-----