Skip to content

Commit

Permalink
multi-bin loss in TF, plot fixes (jpata#234)
Browse files Browse the repository at this point in the history
* training with multi-bin loss component
  • Loading branch information
jpata authored and farakiko committed Jan 23, 2024
1 parent 76808a6 commit e8a8203
Show file tree
Hide file tree
Showing 26 changed files with 159 additions and 70 deletions.
2 changes: 1 addition & 1 deletion mlpf/data_cms/prepare_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# "SingleProtonMinusFlatPt0p7To1000_cfi",
# "SingleTauFlatPt1To1000_cfi",
# "MultiParticlePFGun_cfi",
("MultiParticlePFGun50_cfi", 100000, 102050),
("MultiParticlePFGun50_cfi", 100000, 110050),
]

samples_pu = [
Expand Down
3 changes: 1 addition & 2 deletions mlpf/heptfds/clic_pf_edm4hep_hits/qq_10k.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ class ClicEdmQqHitsPf10k(tfds.core.GeneratorBasedBuilder):
MANUAL_DOWNLOAD_INSTRUCTIONS = """
For the raw input files in ROOT EDM4HEP format, please see the citation above.
The processed tensorflow_dataset can also be downloaded from:
FIXME
The processed tensorflow_dataset can also be downloaded from: https://zenodo.org/record/8414225
"""

def __init__(self, *args, **kwargs):
Expand Down
3 changes: 1 addition & 2 deletions mlpf/heptfds/clic_pf_edm4hep_hits/ttbar_10k.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ class ClicEdmTtbarHitsPf10k(tfds.core.GeneratorBasedBuilder):
MANUAL_DOWNLOAD_INSTRUCTIONS = """
For the raw input files in ROOT EDM4HEP format, please see the citation above.
The processed tensorflow_dataset can also be downloaded from:
FIXME
The processed tensorflow dataset can also be downloaded from: https://zenodo.org/record/8414225
"""

def __init__(self, *args, **kwargs):
Expand Down
7 changes: 5 additions & 2 deletions mlpf/heptfds/cms_pf/multiparticlegun.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
class CmsPfMultiParticleGun(tfds.core.GeneratorBasedBuilder):
"""DatasetBuilder for cms_pf_multi_particle_gun dataset."""

VERSION = tfds.core.Version("1.6.0")
VERSION = tfds.core.Version("1.6.1")
RELEASE_NOTES = {
"1.6.0": "Initial release",
"1.6.1": "Additional stats",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/MultiParticlePFGun_cfi data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_multi_particle_gun \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion mlpf/heptfds/cms_pf/qcd.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class CmsPfQcd(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
FIXME
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_qcd ~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/qcd_high_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class CmsPfQcdHighPt(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
FIXME
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_qcd_high_pt \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/singleele.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class CmsPfSingleElectron(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SingleElectronFlatPt1To100_pythia8_cfi data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_electron \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/singlegamma.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class CmsPfSingleGamma(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SingleGammaFlatPt10To100_pythia8_cfi data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_gamma \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion mlpf/heptfds/cms_pf/singlemu.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class CmsPfSingleMu(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SingleMuFlatLogPt_100MeVto2TeV_cfi data/
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_mu ~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/singleneutron.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class CmsPfSingleNeutron(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SingleNeutronFlatPt0p7To1000_cfi/data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_neutron \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion mlpf/heptfds/cms_pf/singlepi.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class CmsPfSinglePi(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SinglePiMinusFlatPt0p7To1000_cfi data/
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_pi ~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/singlepi0.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ class CmsPfSinglePi0(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SinglePi0E10_pythia8_cfi data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_pi0 \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/singleproton.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class CmsPfSingleProton(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SingleProtonMinusFlatPt0p7To1000_cfi/data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_proton \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion mlpf/heptfds/cms_pf/singletau.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class CmsPfSingleTau(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/SingleTauFlatPt1To1000_cfi data/
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_single_tau \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
64 changes: 64 additions & 0 deletions mlpf/heptfds/cms_pf/smst1tttt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""CMS PF TTbar dataset."""
import cms_utils
import tensorflow as tf

import tensorflow_datasets as tfds

X_FEATURES = cms_utils.X_FEATURES
Y_FEATURES = cms_utils.Y_FEATURES

_DESCRIPTION = """
Dataset generated with CMSSW and full detector sim.
SMS-T1tttt events with PU~55 in a Run3 setup.
"""

# TODO(cms_pf): BibTeX citation
_CITATION = """
"""


class CmsPfSmsT1tttt(tfds.core.GeneratorBasedBuilder):
"""DatasetBuilder for cms_pf dataset."""

VERSION = tfds.core.Version("1.6.0")
RELEASE_NOTES = {
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress \
lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_sms_t1tttt \
~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
kwargs["file_format"] = tfds.core.FileFormat.ARRAY_RECORD
super(CmsPfSmsT1tttt, self).__init__(*args, **kwargs)

def _info(self) -> tfds.core.DatasetInfo:
"""Returns the dataset metadata."""
# TODO(cms_pf): Specifies the tfds.core.DatasetInfo object
return tfds.core.DatasetInfo(
builder=self,
description=_DESCRIPTION,
features=tfds.features.FeaturesDict(
{
"X": tfds.features.Tensor(shape=(None, len(X_FEATURES)), dtype=tf.float32),
"ygen": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
"ycand": tfds.features.Tensor(shape=(None, len(Y_FEATURES)), dtype=tf.float32),
}
),
supervised_keys=("X", "ycand"),
homepage="",
citation=_CITATION,
metadata=tfds.core.MetadataDict(x_features=X_FEATURES, y_features=Y_FEATURES),
)

def _split_generators(self, dl_manager: tfds.download.DownloadManager):
"""Returns SplitGenerators."""
path = dl_manager.manual_dir
sample_dir = "SMS-T1tttt_mGl-1500_mLSP-100_TuneCP5_14TeV_pythia8_cfi"
return cms_utils.split_sample(path / sample_dir / "raw")

def _generate_examples(self, files):
return cms_utils.generate_examples(files)
3 changes: 1 addition & 2 deletions mlpf/heptfds/cms_pf/ttbar.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ class CmsPfTtbar(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
mkdir -p data
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/TTbar_14TeV_TuneCUETP8M1_cfi data/
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ttbar ~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
3 changes: 1 addition & 2 deletions mlpf/heptfds/cms_pf/ztt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ class CmsPfZtt(tfds.core.GeneratorBasedBuilder):
"1.6.0": "Regenerate with ARRAY_RECORD",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
mkdir -p data
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/cms/ZTT_All_hadronic_14TeV_TuneCUETP8M1_cfi data/
rsync -r --progress lxplus.cern.ch:/eos/user/j/jpata/mlpf/tensorflow_datasets/cms/cms_pf_ztt ~/tensorflow_datasets/
"""

def __init__(self, *args, **kwargs):
Expand Down
16 changes: 7 additions & 9 deletions mlpf/plotting/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@

labels = {
"met": "$p_{\mathrm{T}}^{\mathrm{miss}}$ [GeV]",
"gen_met": "$p_{\mathrm{T,gen}}^\text{miss}$ [GeV]",
"gen_met": "$p_{\mathrm{T,gen}}^\mathrm{miss}$ [GeV]",
"gen_mom": "$p_{\mathrm{gen}}$ [GeV]",
"gen_jet": "jet $p_{\mathrm{T,gen}}$ [GeV]",
"gen_jet_eta": "jet $\eta_{\mathrm{gen}}$ [GeV]",
"reco_met": "$p_{\mathrm{T,reco}}^\text{miss}$ [GeV]",
"reco_met": "$p_{\mathrm{T,reco}}^\mathrm{miss}$ [GeV]",
"reco_gen_met_ratio": "$p_{\mathrm{T,reco}}^\mathrm{miss} / p_{\\mathrm{T,gen}}^\mathrm{miss}$",
"reco_gen_mom_ratio": "$p_{\mathrm{reco}} / p_{\\mathrm{gen}}$",
"reco_gen_jet_ratio": "jet $p_{\mathrm{T,reco}} / p_{\\mathrm{T,gen}}$",
Expand Down Expand Up @@ -1131,9 +1131,9 @@ def plot_jet_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=No
plt.figure()
plt.plot(x_vals, (pf_vals[:, 2] - pf_vals[:, 0]) / pf_vals[:, 1], marker="o", label="PF")
plt.plot(x_vals, (mlpf_vals[:, 2] - mlpf_vals[:, 0]) / mlpf_vals[:, 1], marker="o", label="MLPF")
plt.legend(loc=1, fontsize=16, title=title)
plt.ylabel("Response IQR / median")
plt.xlabel(labels["gen_jet"])

plt.tight_layout()
save_img(
"jet_response_med_iqr.png",
Expand Down Expand Up @@ -1220,6 +1220,7 @@ def plot_jet_response_binned_eta(yvals, epoch=None, cp_dir=None, comet_experimen
plt.ylabel("Response IQR / median")
plt.xlabel(labels["gen_jet_eta"])
plt.tight_layout()
plt.legend(loc=1, fontsize=16, title=title)
save_img(
"jet_response_med_iqr_eta.png",
epoch,
Expand Down Expand Up @@ -1300,11 +1301,8 @@ def plot_met_response_binned(yvals, epoch=None, cp_dir=None, comet_experiment=No
plt.plot(x_vals, (pf_vals[:, 2] - pf_vals[:, 0]) / pf_vals[:, 1], marker="o", label="PF")
plt.plot(x_vals, (mlpf_vals[:, 2] - mlpf_vals[:, 0]) / mlpf_vals[:, 1], marker="o", label="MLPF")
plt.ylabel("Response IQR / median")
plt.legend()
if title:
plt.title(title)
plt.legend(loc=1, fontsize=16, title=title)
plt.xlabel(labels["gen_met"])

plt.tight_layout()
save_img(
"met_response_med_iqr.png",
Expand Down Expand Up @@ -1385,9 +1383,9 @@ def plot_3dmomentum_response_binned(yvals, epoch=None, cp_dir=None, comet_experi
plt.figure()
plt.plot(x_vals, (pf_vals[:, 2] - pf_vals[:, 0]) / pf_vals[:, 1], marker="o", label="PF")
plt.plot(x_vals, (mlpf_vals[:, 2] - mlpf_vals[:, 0]) / mlpf_vals[:, 1], marker="o", label="MLPF")
plt.ylabel("Response IQR")
plt.ylabel("Response IQR / median")
plt.xlabel(labels["gen_mom"])

plt.legend(loc=1, fontsize=16, title=title)
plt.tight_layout()
save_img(
"mom_response_med_iqr.png",
Expand Down
4 changes: 0 additions & 4 deletions mlpf/pyg_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,7 @@
import os.path as osp
import pickle as pkl
from pathlib import Path

import yaml

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import fastjet
import torch
import torch.distributed as dist
Expand Down
11 changes: 11 additions & 0 deletions mlpf/tfmodel/datasets/BaseDatasetFactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ def unpack_target(y, num_output_classes, config):
tf.debugging.assert_less_equal(tf.math.abs(cos_phi), 1e5)
tf.debugging.assert_less_equal(tf.math.abs(energy), 1e5)

energy_bins_edges = tf.cast(tf.experimental.numpy.logspace(-1, 3, 500), dtype=tf.float32)
pt_bins_edges = tf.cast(tf.experimental.numpy.logspace(-1, 3, 500), dtype=tf.float32)

energy_bins = tf.searchsorted(energy_bins_edges, tf.squeeze(energy, axis=-1))
energy_bins = tf.one_hot(energy_bins, energy_bins_edges.shape[0])

pt_bins = tf.searchsorted(pt_bins_edges, tf.squeeze(pt, axis=-1))
pt_bins = tf.one_hot(pt_bins, pt_bins_edges.shape[0])

ret = {
"cls": tf.one_hot(type_as_int, num_output_classes),
"charge": tf.one_hot(charge_as_int, 3),
Expand All @@ -48,6 +57,8 @@ def unpack_target(y, num_output_classes, config):
"sin_phi": sin_phi,
"cos_phi": cos_phi,
"energy": energy,
"energy_bins": energy_bins,
"pt_bins": pt_bins,
}

if config["loss"]["event_loss"] != "none":
Expand Down
Loading

0 comments on commit e8a8203

Please sign in to comment.