Feb 2023: tuning the downstream MLPF model [pyg] (#166)

* fix simulation seed setting * tuning the models * add focal loss, ee jet algo * run sim with PU * tuning * increase dropout
jpata · Feb 6, 2023 · bda3912 · bda3912
1 parent 73254b6
commit bda3912
Show file tree

Hide file tree

Showing 18 changed files with 746 additions and 233 deletions.
diff --git a/fcc/main19.cc b/fcc/main19.cc
@@ -0,0 +1,133 @@
+// main19.cc is a part of the PYTHIA event generator.
+// Copyright (C) 2022 Torbjorn Sjostrand.
+// PYTHIA is licenced under the GNU GPL v2 or later, see COPYING for details.
+// Please respect the MCnet Guidelines, see GUIDELINES for details.
+
+// Modified by Joosep Pata to keep only PU
+//g++ main19.cc -o main -I/home/joosep/pythia8308/include -I/home/joosep/HepMC3/hepmc3-install/include/ -L/home/joosep/HepMC3/hepmc3-install/lib/ -O2 -std=c++11 -pedantic -W -Wall -Wshadow -fPIC -pthread  -L/home/joosep/pythia8308/lib -Wl,-rpath,/home/joosep/pythia8308/lib -lpythia8 -ldl -lHepMC3
+
+#include "Pythia8/Pythia.h"
+#include "Pythia8Plugins/HepMC3.h"
+#include <string>
+using namespace Pythia8;
+
+//==========================================================================
+
+// Method to pick a number according to a Poissonian distribution.
+
+int poisson(double nAvg, Rndm& rndm) {
+
+  // Set maximum to avoid overflow.
+  const int NMAX = 100;
+
+  // Random number.
+  double rPoisson = rndm.flat() * exp(nAvg);
+
+  // Initialize.
+  double rSum  = 0.;
+  double rTerm = 1.;
+
+  // Add to sum and check whether done.
+  for (int i = 0; i < NMAX; ) {
+    rSum += rTerm;
+    if (rSum > rPoisson) return i;
+
+    // Evaluate next term.
+    ++i;
+    rTerm *= nAvg / i;
+  }
+
+  // Emergency return.
+  return NMAX;
+}
+
+//==========================================================================
+
+int main(int argc, char *argv[]) {
+
+  // Number of signal events to generate.
+  int nEvent = 100;
+
+  if (argc != 2) {
+    std::cerr << "./main SEED" << std::endl;
+    return 1;
+  }
+
+  std::string seedStr = std::string("Random:seed = ").append(std::string(argv[1]));
+
+  // Average number of pileup events per signal event.
+  double nPileupAvg = 10.0;
+
+  // Shift each PU event by this time delta in time to mimic ee overlay
+  double timeDelta = 0.5;
+
+  Pythia8ToHepMC ToHepMC;
+  ToHepMC.setNewFile("pythia.hepmc");
+
+  // Signal generator instance.
+  Pythia pythiaSignal;
+  pythiaSignal.readFile("card.cmd");
+  pythiaSignal.readString(seedStr.c_str());
+  pythiaSignal.init();
+
+  // Background generator instances copies settings and particle data.
+  Pythia pythiaPileup;
+  pythiaPileup.readFile("p8_ee_gg_ecm380.cmd");
+  pythiaPileup.readString(seedStr.c_str());
+  pythiaPileup.init();
+
+  // One object where all individual events are to be collected.
+  Event sumEvent;
+
+  // Loop over events.
+  for (int iEvent = 0; iEvent < nEvent; ++iEvent) {
+
+    HepMC3::GenEvent geneve;
+
+    // Generate a signal event. Copy this event into sumEvent.
+    if (!pythiaSignal.next()) continue;
+    sumEvent = pythiaSignal.event;
+    bool fill_result = ToHepMC.fill_next_event(pythiaSignal, &geneve);
+    if (!fill_result) {
+      std::cerr << "Error converting to HepMC" << std::endl;
+      return 1;
+    }
+
+    // Select the number of pileup events to generate.
+    int nPileup = poisson(nPileupAvg, pythiaPileup.rndm);
+
+    // Generate a number of pileup events. Add them to sumEvent.
+    for (int iPileup = 0; iPileup < nPileup; ++iPileup) {
+      pythiaPileup.next();
+      fill_result = ToHepMC.fill_next_event(pythiaPileup, &geneve);
+      if (!fill_result) {
+        std::cerr << "Error converting to HepMC" << std::endl;
+        return 1;
+      }
+      for (int iPtcl=0; iPtcl < pythiaPileup.event.size(); iPtcl++) {
+        auto& ptcl = pythiaPileup.event[iPtcl];
+        double timeOffset = iPileup * timeDelta;
+        ptcl.vProd(ptcl.xProd(), ptcl.yProd(), ptcl.zProd(), ptcl.tProd()+timeOffset);
+      }
+      sumEvent += pythiaPileup.event;
+    }
+
+    std::cout << "hepmc=" << geneve.particles().size() << " pythia=" << sumEvent.size() << std::endl;
+    ToHepMC.output().write_event(geneve);
+
+    // List first few events.
+    if (iEvent < 5) {
+      std::cout << "sumEvent" << std::endl;
+      sumEvent.list();
+    }
+
+
+  // End of event loop
+  }
+
+  // Statistics. Histograms.
+  pythiaSignal.stat();
+  pythiaPileup.stat();
+
+  return 0;
+}
diff --git a/fcc/p8_ee_ZH_Htautau_ecm380.cmd b/fcc/p8_ee_ZH_Htautau_ecm380.cmd
@@ -6,9 +6,9 @@ Main:timesAllowErrors = 5          ! how many aborts before run stops
 Init:showChangedSettings = on      ! list changed settings
 Init:showChangedParticleData = off ! list changed particle data
 Next:numberCount = 100             ! print message every n events
-Next:numberShowInfo = 1            ! print event information n times
-Next:numberShowProcess = 1         ! print process record n times
-Next:numberShowEvent = 0           ! print event record n times
+Next:numberShowInfo = 100          ! print event information n times
+Next:numberShowProcess = 100       ! print process record n times
+Next:numberShowEvent = 100         ! print event record n times
 
 Beams:idA = 11                   ! first beam, e+ = 11
 Beams:idB = -11                   ! second beam, e- = -11

diff --git a/fcc/p8_ee_gg_ecm365.cmd b/fcc/p8_ee_gg_ecm365.cmd
@@ -6,10 +6,10 @@ Main:timesAllowErrors = 5          ! how many aborts before run stops
 ! 2) Settings related to output in init(), next() and stat().
 Init:showChangedSettings = on      ! list changed settings
 Init:showChangedParticleData = off ! list changed particle data
-Next:numberCount = 100            ! print message every n events
-Next:numberShowInfo = 1            ! print event information n times
-Next:numberShowProcess = 1         ! print process record n times
-Next:numberShowEvent = 0           ! print event record n times
+Next:numberCount = 100             ! print message every n events
+Next:numberShowInfo = 100          ! print event information n times
+Next:numberShowProcess = 100       ! print process record n times
+Next:numberShowEvent = 100         ! print event record n times
 Stat:showPartonLevel = off
 
 ! 3) Beam parameter settings. Values below agree with default ones.
@@ -24,4 +24,3 @@ PhotonCollision:gmgm2bbbar = on
 
 PartonLevel:ISR = on               ! initial-state radiation
 PartonLevel:FSR = on               ! final-state radiation
-
diff --git a/fcc/p8_ee_gg_ecm380.cmd b/fcc/p8_ee_gg_ecm380.cmd
@@ -0,0 +1,26 @@
+Random:setSeed = on
+Main:numberOfEvents = 3000         ! number of events to generate
+Main:timesAllowErrors = 5          ! how many aborts before run stops
+
+
+! 2) Settings related to output in init(), next() and stat().
+Init:showChangedSettings = on      ! list changed settings
+Init:showChangedParticleData = off ! list changed particle data
+Next:numberCount = 100             ! print message every n events
+Next:numberShowInfo = 100          ! print event information n times
+Next:numberShowProcess = 100       ! print process record n times
+Next:numberShowEvent = 100         ! print event record n times
+Stat:showPartonLevel = off
+
+! 3) Beam parameter settings. Values below agree with default ones.
+Beams:idA = 11                   ! first beam, e- = 11
+Beams:idB = -11                  ! second beam, e+ = -11
+
+! 4) Hard process : photon collisions at 380
+Beams:eCM = 380
+PhotonCollision:gmgm2qqbar = on
+PhotonCollision:gmgm2ccbar = on
+PhotonCollision:gmgm2bbbar = on
+
+PartonLevel:ISR = on               ! initial-state radiation
+PartonLevel:FSR = on               ! final-state radiation
diff --git a/fcc/run_sim.sh b/fcc/run_sim.sh
@@ -9,7 +9,6 @@ set -x
 env
 df -h
 
-WORKDIR=/scratch/$USER/${SLURM_JOB_ID}
 OUTDIR=`pwd`
 PFDIR=/home/joosep/particleflow
 NEV=100
@@ -20,27 +19,34 @@ NUM=$1
 #SAMPLE=p8_ee_ZZ_fullhad_ecm365
 #SAMPLE=p8_ee_qcd_ecm365
 #SAMPLE=p8_ee_qcd_ecm380
-#SAMPLE=p8_ee_ZH_Htautau_ecm380
-SAMPLE=p8_ee_qcd_ecm380
+SAMPLE=p8_ee_ZH_Htautau_ecm380
+#SAMPLE=p8_ee_qcd_ecm380
+#SAMPLE=p8_ee_gg_ecm365
+
+WORKDIR=/scratch/$USER/${SAMPLE}_${SLURM_JOB_ID}
+FULLOUTDIR=${OUTDIR}/${SAMPLE}_PU10
 
-mkdir -p $OUTDIR/$SAMPLE
+mkdir -p $FULLOUTDIR
 
 mkdir -p $WORKDIR
 cd $WORKDIR
 
-ls -al /cvmfs
-ls -al /cvmfs/sw.hsf.org
-source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2022-12-23/x86_64-centos7-gcc11.2.0-opt/ll3gi/setup.sh
-
+cp $PFDIR/fcc/main ./
 cp $PFDIR/fcc/${SAMPLE}.cmd card.cmd
+cp $PFDIR/fcc/p8_ee_gg_ecm380.cmd ./
 cp $PFDIR/fcc/pythia.py ./
 cp $PFDIR/fcc/clic_steer.py ./
 cp -R $PFDIR/fcc/PandoraSettings ./
 cp -R $PFDIR/fcc/clicRec_e4h_input.py ./
 
-echo "Random:seed=${NUM}" >> card.cmd
+#k4run $PFDIR/fcc/pythia.py -n $NEV --Dumper.Filename out.hepmc --Pythia8.PythiaInterface.pythiacard card.cmd &> log1
+LD_LIBRARY_PATH=/home/joosep/HepMC3/hepmc3-install/lib/:/home/joosep/pythia8308/lib/ ./main $NUM &> log1
+mv pythia.hepmc out.hepmc
+
+ls -al /cvmfs
+ls -al /cvmfs/sw.hsf.org
+source /cvmfs/sw.hsf.org/spackages6/key4hep-stack/2022-12-23/x86_64-centos7-gcc11.2.0-opt/ll3gi/setup.sh
 
-k4run $PFDIR/fcc/pythia.py -n $NEV --Dumper.Filename out.hepmc --Pythia8.PythiaInterface.pythiacard card.cmd &> log1
 ddsim --compactFile $LCGEO/CLIC/compact/CLIC_o3_v14/CLIC_o3_v14.xml \
       --outputFile out_sim_edm4hep.root \
       --steeringFile clic_steer.py \
@@ -52,6 +58,6 @@ cp out_reco_edm4hep.root reco_${SAMPLE}_${NUM}.root
 #ddsim --steeringFile clic_steer.py --compactFile $LCGEO/CLIC/compact/CLIC_o3_v14/CLIC_o3_v14.xml --enableGun --gun.distribution uniform --gun.particle pi- --gun.energy 10*GeV --outputFile piminus_10GeV_edm4hep.root --numberOfEvents $NEV &> log_step1_piminus.txt
 #k4run clicRec_e4h_input.py -n $NEV --EventDataSvc.input piminus_10GeV_edm4hep.root --PodioOutput.filename piminus_reco.root &> log_step2_piminus.txt
 
-cp reco_${SAMPLE}_${NUM}.root $OUTDIR/$SAMPLE/
+cp reco_${SAMPLE}_${NUM}.root $FULLOUTDIR/
 
-rm -Rf $WORKDIR
+rm -Rf $WORKDIR/*.root
diff --git a/mlpf/pyg_ssl/args.py b/mlpf/pyg_ssl/args.py
@@ -23,9 +23,10 @@ def parse_args():
     parser.add_argument("--ssl", dest="ssl", action="store_true", help="Train ssl-based MLPF")
     parser.add_argument("--native", dest="native", action="store_true", help="Train native")
 
-    parser.add_argument("--prefix_VICReg", type=str, default="VICReg_model", help="directory to hold the VICReg model")
+    parser.add_argument("--prefix_VICReg", type=str, default=None, help="directory to hold the VICReg model")
     parser.add_argument("--prefix_mlpf", type=str, default="MLPF_model", help="directory to hold the mlpf model")
     parser.add_argument("--overwrite", dest="overwrite", action="store_true", help="overwrites the model if True")
+    parser.add_argument("--evaluate", default=False, action="store_true", help="Run model evaluation")
 
     # training hyperparameters
     parser.add_argument("--lmbd", type=float, default=0.01, help="the lambda term in the VICReg loss")
@@ -55,6 +56,8 @@ def parse_args():
 
     # MLPF architecture
     parser.add_argument("--width_mlpf", type=int, default=126, help="hidden dimension of mlpf")
+    parser.add_argument("--num_convs_mlpf", type=int, default=3, help="number of graph layers for mlpf")
+    parser.add_argument("--dropout_mlpf", type=float, default=0.4, help="dropout for MLPF model")
 
     args = parser.parse_args()
 

diff --git a/mlpf/pyg_ssl/evaluate.py b/mlpf/pyg_ssl/evaluate.py
@@ -48,7 +48,7 @@ def particle_array_to_awkward(batch_ids, arr_id, arr_p4):
 
 def evaluate(device, encoder, decoder, mlpf, batch_size_mlpf, mode, outpath, data_, save_as_):
 
-    jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)
+    jetdef = fastjet.JetDefinition(fastjet.ee_genkt_algorithm, 0.7, -1.0)
     jet_pt = 5.0
     jet_match_dr = 0.1