Skip to content

Commit

Permalink
CMS training instructions (jpata#336)
Browse files Browse the repository at this point in the history
* CMS training instructions
  • Loading branch information
jpata authored Jul 17, 2024
1 parent a0f4428 commit c5650aa
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 26 deletions.
30 changes: 29 additions & 1 deletion mlpf/data_cms/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,4 +80,32 @@ cd particleflow
The MINIAOD output will be in `$CMSSW_BASE/out/QCD_PU_mlpf` and `$CMSSW_BASE/out/QCD_PU_pf`.

## Generating MLPF training samples
TODO.

If you want to regenerate ML training samples from scratch with CMSSW, check the scripts
```
mlpf/data_cms/genjob_nopu.sh
mlpf/data_cms/genjob_pu55to75.sh
```

## pytorch training

Copy the datasets from EOS (about 500GB):
```
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms ./tensorflow_datasets
```

Download the pytorch distribution:
```
wget https://jpata.web.cern.ch/jpata/pytorch.simg
```

On a machine with a single GPU, the following is a quick test of the training workflow
```
singularity exec --env CUDA_VISIBLE_DEVICES=0 -B /scratch/persistent --nv \
--env PYTHONPATH=hep_tfds \
--env KERAS_BACKEND=torch \
pytorch.simg python3.10 mlpf/pyg_pipeline.py --dataset cms --gpus 1 \
--data-dir ./tensorflow_datasets --config parameters/pytorch/pyg-cms.yaml \
--train --test --make-plots --conv-type attention --num-epochs 10 --gpu-batch-multiplier 1 \
--num-workers 4 --prefetch-factor 100 --checkpoint-freq 1 --ntrain 1000 --ntest 1000 --nvalid 1000
```
5 changes: 2 additions & 3 deletions mlpf/data_cms/prepare_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@
outdir = "/local/joosep/mlpf/cms/20240702_cptruthdef"

samples = [
# ("TTbar_14TeV_TuneCUETP8M1_cfi", 100000, 110010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
("TTbar_14TeV_TuneCUETP8M1_cfi", 100000, 110010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
# ("ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi", 200000, 220010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
# ("QCDForPF_14TeV_TuneCUETP8M1_cfi", 300000, 310010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
# ("QCD_Pt_3000_7000_14TeV_TuneCUETP8M1_cfi", 400000, 420010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
("QCDForPF_14TeV_TuneCUETP8M1_cfi", 300000, 310010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
# ("SMS-T1tttt_mGl-1500_mLSP-100_TuneCP5_14TeV_pythia8_cfi", 500000, 520010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
# ("ZpTT_1500_14TeV_TuneCP5_cfi", 600000, 620010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
# ("VBF_TuneCP5_14TeV_pythia8_cfi", 1700000,1720010, "genjob_pu55to75.sh", outdir + "/pu55to75"),
Expand Down
157 changes: 157 additions & 0 deletions notebooks/cms/cms-dataset.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "0934d053-9a23-4b3b-a615-a6b65ba692e4",
"metadata": {},
"outputs": [],
"source": [
"import tensorflow_datasets as tfds\n",
"import vector\n",
"import awkward\n",
"import numpy as np\n",
"import fastjet\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e9bfd5e-57b7-47cb-9eba-3a85608fd689",
"metadata": {},
"outputs": [],
"source": [
"#from mlpf/heptfds/cms_pf/cms_utils.py\n",
"CLASS_LABELS_CMS = [0, 211, 130, 1, 2, 22, 11, 13]\n",
"Y_FEATURES = [\n",
" \"typ_idx\",\n",
" \"charge\",\n",
" \"pt\",\n",
" \"eta\",\n",
" \"sin_phi\",\n",
" \"cos_phi\",\n",
" \"e\",\n",
" \"ispu\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caa09c76-5700-49c4-985c-0b7e09949b14",
"metadata": {},
"outputs": [],
"source": [
"builder = tfds.builder(\"cms_pf_ttbar\", data_dir=\"/scratch/persistent/joosep/tensorflow_datasets/\")\n",
"ds_train = builder.as_data_source(split=\"train\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d134fd6-18be-4446-93fc-22b37b6c79d6",
"metadata": {},
"outputs": [],
"source": [
"all_genjets = []\n",
"all_genparticles = []\n",
"\n",
"#loop over some events in the dataset\n",
"for iev in range(100):\n",
" el = ds_train[iev]\n",
" print(len(el[\"X\"]), el.keys())\n",
" \n",
" genjets = vector.awk(awkward.zip({\"pt\": el[\"genjets\"][:, 0], \"eta\": el[\"genjets\"][:, 1], \"phi\": el[\"genjets\"][:, 2], \"e\": el[\"genjets\"][:, 3]}))\n",
" mask_genparticles = el[\"ygen\"][:, 0]!=0\n",
" genparticles = el[\"ygen\"][mask_genparticles]\n",
" \n",
" gp_phi = np.arctan2(genparticles[:, 4], genparticles[:, 5]) #sphi,cphi -> phi\n",
" genparticles_p4 = vector.awk(awkward.zip({\"pt\": genparticles[:, 2], \"eta\": genparticles[:, 3], \"phi\": gp_phi, \"e\": genparticles[:, 6]}))\n",
" gp_ispu = genparticles[:, 7]\n",
" gp_pid = np.array(CLASS_LABELS_CMS)[genparticles[:, 0].astype(np.int64)]\n",
" genparticles = awkward.Record({\n",
" \"pid\": gp_pid,\n",
" \"p4\": genparticles_p4,\n",
" \"ispu\": genparticles[:, 7],\n",
" })\n",
"\n",
" all_genjets.append(genjets)\n",
" all_genparticles.append(genparticles)\n",
"\n",
"all_genjets = awkward.from_iter(all_genjets)\n",
"all_genparticles = awkward.from_iter(all_genparticles)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6f44505-a012-40f1-9ec3-9193fe386b91",
"metadata": {},
"outputs": [],
"source": [
"p4 = vector.awk(\n",
" awkward.zip(\n",
" {\n",
" \"pt\": all_genparticles.p4.rho,\n",
" \"eta\": all_genparticles.p4.eta,\n",
" \"phi\": all_genparticles.p4.phi,\n",
" \"e\": all_genparticles.p4.t,\n",
" }\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db34fefb-3ddf-42c2-97d4-3feab9f2d1c4",
"metadata": {},
"outputs": [],
"source": [
"jetdef = fastjet.JetDefinition(fastjet.antikt_algorithm, 0.4)\n",
"cluster = fastjet.ClusterSequence(p4.to_xyzt(), jetdef)\n",
"jets = cluster.inclusive_jets(min_pt=10)\n",
"\n",
"cluster = fastjet.ClusterSequence(p4.to_xyzt()[all_genparticles.ispu==0], jetdef)\n",
"jets_nopu = cluster.inclusive_jets(min_pt=10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "32beedbc-1a71-41cf-ac4c-a9cd130017c6",
"metadata": {},
"outputs": [],
"source": [
"b = np.linspace(10,100,100)\n",
"plt.hist(awkward.flatten(all_genjets.rho), bins=b, histtype=\"step\", label=\"genjets\");\n",
"plt.hist(awkward.flatten(jets.pt), bins=b, histtype=\"step\", label=\"all gp jets\");\n",
"plt.hist(awkward.flatten(jets_nopu.pt), bins=b, histtype=\"step\", label=\"ispu=0 gp jets\");\n",
"plt.legend()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
12 changes: 6 additions & 6 deletions scripts/cmssw/validation_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ NJOB=$4
PREVDIR=`pwd`

#change this as needed, need enough space for outputs
#OUTDIR=$CMSSW_BASE/out/
#WORKDIR=$CMSSW_BASE/work_${SAMPLE}_${JOBTYPE}_${NJOB}
OUTDIR=$CMSSW_BASE/out/
WORKDIR=$CMSSW_BASE/work_${SAMPLE}_${JOBTYPE}_${NJOB}

# uncomment the following when running at T2_EE_Estonia
source /cvmfs/cms.cern.ch/cmsset_default.sh
cd /scratch/persistent/joosep/CMSSW_14_1_0_pre3
eval `scram runtime -sh`
cd $PREVDIR
# source /cvmfs/cms.cern.ch/cmsset_default.sh
# cd /scratch/persistent/joosep/CMSSW_14_1_0_pre3
# eval `scram runtime -sh`
# cd $PREVDIR

export OUTDIR=/local/joosep/mlpf/results/cms/${CMSSW_VERSION}_56e13b/
export WORKDIR=/scratch/local/$USER/${SLURM_JOB_ID}
Expand Down
26 changes: 10 additions & 16 deletions scripts/tallinn/submit_validate_cms.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
#!/bin/bash

sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU 1
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU 6
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU 11
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU 15
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU 39
END=`wc -l scripts/cmssw/qcd_pu.txt | cut -f1 -d' '`
for ifile in $(seq 1 $END); do
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU $ifile
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh pf scripts/cmssw/qcd_pu.txt QCD_PU $ifile
done

#END=`wc -l scripts/cmssw/qcd_pu.txt | cut -f1 -d' '`
#for ifile in $(seq 1 $END); do
# sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/qcd_pu.txt QCD_PU $ifile
# sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh pf scripts/cmssw/qcd_pu.txt QCD_PU $ifile
#done
#
#END=`wc -l scripts/cmssw/ttbar_pu.txt | cut -f1 -d' '`
#for ifile in $(seq 1 $END); do
# sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/ttbar_pu.txt TTbar_PU $ifile
# sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh pf scripts/cmssw/ttbar_pu.txt TTbar_PU $ifile
#done
END=`wc -l scripts/cmssw/ttbar_pu.txt | cut -f1 -d' '`
for ifile in $(seq 1 $END); do
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh mlpf scripts/cmssw/ttbar_pu.txt TTbar_PU $ifile
sbatch scripts/tallinn/cmssw-el8.sh scripts/cmssw/validation_job.sh pf scripts/cmssw/ttbar_pu.txt TTbar_PU $ifile
done

0 comments on commit c5650aa

Please sign in to comment.