Skip to content

Commit

Permalink
MLPF datasets v2.0.0: track pythia-level genjets, genmet in datasets;…
Browse files Browse the repository at this point in the history
… add per-particle ispu flag (jpata#332)

* generate ttbar nopu events

* up

* update postprocessing

* small sample generation

* v3_1 run

* updates for CMSSE 14 generation

* [skip ci] cleanup postprocessing

* [skip ci] update pu gen

* update postprocessing with new truth definition based only on caloparticles

* remove pdb, switch genjet to energy

* [skip ci] prepare for v3_3

* [skip ci] fix flag

* added time and mem limits

* pu files from scratch

* 20240702_cptruthdef submission

* ttbar nopu v2

* up

* added genjet, genmet to clic postprocessing

* remove delphes

* update tests

* add postprocessing jobs

* update torch

* update dataset version

* propagate genjets, genmet

* shared memory error

* training on v2.0.0 for cms

* fix occasional root file load bug

* add jmenano

* fix qq

* clic training

* up
  • Loading branch information
jpata authored and erwulff committed Aug 9, 2024
1 parent 6186d30 commit f823575
Show file tree
Hide file tree
Showing 100 changed files with 1,102 additions and 12,343 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: pip3 install torch==2.2.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: pip3 install torch==2.3.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

pyg-unittests:
runs-on: ubuntu-22.04
Expand All @@ -40,7 +40,7 @@ jobs:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: pip3 install torch==2.2.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: pip3 install torch==2.3.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: PYTHONPATH=. python3 -m unittest tests/test_torch_and_tf.py

pyg-pipeline:
Expand All @@ -53,5 +53,5 @@ jobs:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: pip3 install torch==2.2.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: pip3 install torch==2.3.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: ./scripts/local_test_pyg.sh
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ repos:
# pre-commit's default_language_version, see
# https://pre-commit.com/#top_level-default_language_version
language_version: python3
args: [--line-length=125]
args: [--line-length=150]

- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
Expand All @@ -45,5 +45,5 @@ repos:

# E203 is not PEP8 compliant
# E402 due to logging.basicConfig in pipeline.py
args: ['--max-line-length=125', # github viewer width
args: ['--max-line-length=150',
'--extend-ignore=E203,E402,W605']
3 changes: 0 additions & 3 deletions mlpf/customizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,6 @@ def customize_pipeline_test(config):
# don't use dynamic batching, as that can result in weird stuff with very few events
config["batching"]["bucket_by_sequence_length"] = False

if "delphes_pf_ttbar" in config["datasets"]:
config["train_test_datasets"]["physical"]["datasets"] = ["delphes_pf_ttbar"]

# for cms, keep only ttbar
if "cms_pf_ttbar" in config["datasets"]:
config["train_test_datasets"]["physical"]["datasets"] = ["cms_pf_ttbar"]
Expand Down
25 changes: 15 additions & 10 deletions mlpf/data_cms/genjob_nopu.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash
#SBATCH --partition main
#SBATCH --partition short
#SBATCH --cpus-per-task 1
#SBATCH --mem-per-cpu 6G
#SBATCH -o slurm-%x-%j-%N.out
set -e
set -x

OUTDIR=/local/joosep/mlpf/cms/v3/nopu/
CMSSWDIR=/home/joosep/CMSSW_12_3_0_pre6
OUTDIR=/local/joosep/mlpf/cms/20240702_cptruthdef/nopu/
CMSSWDIR=/scratch/persistent/joosep/CMSSW_14_1_0_pre3
MLPF_PATH=/home/joosep/particleflow/

#seed must be greater than 0
Expand All @@ -22,7 +22,7 @@ mkdir -p $OUTDIR
PILEUP=NoPileUp
PILEUP_INPUT=

N=100
N=200

env
source /cvmfs/cms.cern.ch/cmsset_default.sh
Expand All @@ -38,11 +38,11 @@ cd $WORKDIR

#Generate the MC
cmsDriver.py $SAMPLE \
--conditions auto:phase1_2021_realistic \
--conditions auto:phase1_2023_realistic \
-n $N \
--era Run3 \
--era Run3_2023 \
--eventcontent FEVTDEBUGHLT \
-s GEN,SIM,DIGI,L1,DIGI2RAW,HLT \
-s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \
--datatier GEN-SIM \
--geometry DB:Extended \
--pileup $PILEUP \
Expand All @@ -53,8 +53,8 @@ cmsDriver.py $SAMPLE \

#Run the reco sequences
cmsDriver.py step3 \
--conditions auto:phase1_2021_realistic \
--era Run3 \
--conditions auto:phase1_2023_realistic \
--era Run3_2023 \
-n -1 \
--eventcontent FEVTDEBUGHLT \
--runUnscheduled \
Expand All @@ -75,8 +75,13 @@ cmsRun step2_phase1_new.py > /dev/null
cmsRun step3_phase1_new.py > /dev/null
#cmsRun $CMSSWDIR/src/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
mv pfntuple.root pfntuple_${SEED}.root
python3 ${MLPF_PATH}/mlpf/data_cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ --save-normalized-table
python3 ${MLPF_PATH}/mlpf/data_cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./
bzip2 -z pfntuple_${SEED}.pkl
cp *.pkl.bz2 $OUTDIR/$SAMPLE/raw/

#copy ROOT outputs
#cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root
#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root
#cp pfntuple_${SEED}.root $OUTDIR/$SAMPLE/root/

rm -Rf $WORKDIR
27 changes: 18 additions & 9 deletions mlpf/data_cms/genjob_pu55to75.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
#!/bin/bash
#SBATCH --partition main
#SBATCH --time 04:00:00
#SBATCH --cpus-per-task 1
#SBATCH --mem-per-cpu 6G
#SBATCH -o slurm-%x-%j-%N.out
set -e
set -x

OUTDIR=/local/joosep/mlpf/cms/v3/pu55to75/
CMSSWDIR=/home/joosep/CMSSW_12_3_0_pre6
OUTDIR=/local/joosep/mlpf/cms/20240702_cptruthdef/pu55to75/
CMSSWDIR=/scratch/persistent/joosep/CMSSW_14_1_0_pre3
MLPF_PATH=/home/joosep/particleflow/

#seed must be greater than 0
Expand All @@ -22,24 +23,27 @@ mkdir -p $OUTDIR
PILEUP=Run3_Flat55To75_PoissonOOTPU
PILEUP_INPUT=filelist:${MLPF_PATH}/mlpf/data_cms/pu_files_local.txt

N=20
N=50

env
source /cvmfs/cms.cern.ch/cmsset_default.sh

cd $CMSSWDIR
eval `scramv1 runtime -sh`
which python
which python3

env

cd $WORKDIR

#Generate the MC
cmsDriver.py $SAMPLE \
--conditions auto:phase1_2021_realistic \
--conditions auto:phase1_2023_realistic \
-n $N \
--era Run3 \
--era Run3_2023 \
--eventcontent FEVTDEBUGHLT \
-s GEN,SIM,DIGI,L1,DIGI2RAW,HLT \
-s GEN,SIM,DIGI:pdigi_valid,L1,DIGI2RAW,HLT:@relval2023 \
--datatier GEN-SIM \
--geometry DB:Extended \
--pileup $PILEUP \
Expand All @@ -51,8 +55,8 @@ cmsDriver.py $SAMPLE \

#Run the reco sequences
cmsDriver.py step3 \
--conditions auto:phase1_2021_realistic \
--era Run3 \
--conditions auto:phase1_2023_realistic \
--era Run3_2023 \
-n -1 \
--eventcontent FEVTDEBUGHLT \
--runUnscheduled \
Expand All @@ -73,8 +77,13 @@ cmsRun step2_phase1_new.py > /dev/null
cmsRun step3_phase1_new.py > /dev/null
#cmsRun $CMSSWDIR/src/Validation/RecoParticleFlow/test/pfanalysis_ntuple.py
mv pfntuple.root pfntuple_${SEED}.root
python3 ${MLPF_PATH}/mlpf/data_cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./ --save-normalized-table
python3 ${MLPF_PATH}/mlpf/data_cms/postprocessing2.py --input pfntuple_${SEED}.root --outpath ./
bzip2 -z pfntuple_${SEED}.pkl
cp *.pkl.bz2 $OUTDIR/$SAMPLE/raw/

#copy ROOT outputs
#cp step2_phase1_new.root $OUTDIR/$SAMPLE/root/step2_${SEED}.root
#cp step3_phase1_new.root $OUTDIR/$SAMPLE/root/step3_${SEED}.root
#cp pfntuple_${SEED}.root $OUTDIR/$SAMPLE/root/

rm -Rf $WORKDIR
Loading

0 comments on commit f823575

Please sign in to comment.