Skip to content

Commit

Permalink
normalize loss, reparametrize network (jpata#297)
Browse files Browse the repository at this point in the history
* trainable configurable

* added additional plots

* switch to 1.7.1, relu

* pin tensorflow

* reduce pipeline net size

* remove TF from pipeline

---------

Co-authored-by: Joosep Pata <joosep.pata@kbfi.ee>
  • Loading branch information
jpata and Joosep Pata authored Mar 22, 2024
1 parent 8d9065c commit e1b439a
Show file tree
Hide file tree
Showing 23 changed files with 2,214 additions and 318 deletions.
122 changes: 61 additions & 61 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,68 +28,68 @@ jobs:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: pip3 install torch==2.2.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv torch_geometric -f https://data.pyg.org/whl/torch-2.2.0+cpu.html

tf-unittests:
runs-on: ubuntu-22.04
needs: [deps]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: PYTHONPATH=. python3 -m unittest tests/test_tf.py
- run: pip3 install torch==2.2.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
- run: pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv torch_geometric -f https://data.pyg.org/whl/torch-2.2.1+cpu.html

tf-clic-pipeline:
runs-on: ubuntu-22.04
needs: [tf-unittests]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: ./scripts/local_test_clic_pipeline.sh

tf-clic-hits-pipeline:
runs-on: ubuntu-22.04
needs: [tf-unittests]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: ./scripts/local_test_clic_hits_pipeline.sh

tf-delphes-pipeline:
runs-on: ubuntu-22.04
needs: [tf-unittests]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: ./scripts/local_test_delphes_pipeline.sh

tf-cms-pipeline:
runs-on: ubuntu-22.04
needs: [tf-unittests]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.10.12"
cache: "pip"
- run: pip install -r requirements.txt
- run: ./scripts/local_test_cms_pipeline.sh
# tf-unittests:
# runs-on: ubuntu-22.04
# needs: [deps]
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: "3.10.12"
# cache: "pip"
# - run: pip install -r requirements.txt
# - run: PYTHONPATH=. python3 -m unittest tests/test_tf.py
#
# tf-clic-pipeline:
# runs-on: ubuntu-22.04
# needs: [tf-unittests]
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: "3.10.12"
# cache: "pip"
# - run: pip install -r requirements.txt
# - run: ./scripts/local_test_clic_pipeline.sh
#
# tf-clic-hits-pipeline:
# runs-on: ubuntu-22.04
# needs: [tf-unittests]
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: "3.10.12"
# cache: "pip"
# - run: pip install -r requirements.txt
# - run: ./scripts/local_test_clic_hits_pipeline.sh
#
# tf-delphes-pipeline:
# runs-on: ubuntu-22.04
# needs: [tf-unittests]
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: "3.10.12"
# cache: "pip"
# - run: pip install -r requirements.txt
# - run: ./scripts/local_test_delphes_pipeline.sh
#
# tf-cms-pipeline:
# runs-on: ubuntu-22.04
# needs: [tf-unittests]
# steps:
# - uses: actions/checkout@v3
# - uses: actions/setup-python@v4
# with:
# python-version: "3.10.12"
# cache: "pip"
# - run: pip install -r requirements.txt
# - run: ./scripts/local_test_cms_pipeline.sh

pyg-unittests:
runs-on: ubuntu-22.04
Expand Down
2 changes: 1 addition & 1 deletion mlpf/data_cms/genjob_nopu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ MLPF_PATH=/home/joosep/particleflow/
SAMPLE=$1
SEED=$2

WORKDIR=/scratch/local/joosep/$SAMPLE/$SEED
WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED
#WORKDIR=`pwd`/$SAMPLE/$SEED
mkdir -p $WORKDIR
mkdir -p $OUTDIR
Expand Down
2 changes: 1 addition & 1 deletion mlpf/data_cms/genjob_pu55to75.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ MLPF_PATH=/home/joosep/particleflow/
SAMPLE=$1
SEED=$2

WORKDIR=/scratch/local/joosep/$SAMPLE/$SEED
WORKDIR=/scratch/local/joosep/$SLURM_JOBID/$SAMPLE/$SEED
#WORKDIR=`pwd`/$SAMPLE/$SEED
mkdir -p $WORKDIR
mkdir -p $OUTDIR
Expand Down
2 changes: 1 addition & 1 deletion mlpf/data_cms/prepare_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
("ZpTT_1500_14TeV_TuneCP5_cfi", 600000, 605010, "genjob_pu55to75.sh", outdir + "/pu55to75"),

("TTbar_14TeV_TuneCUETP8M1_cfi", 700000, 701000, "genjob_nopu.sh", outdir + "/nopu"),
("MultiParticlePFGun50_cfi", 800000, 810000, "genjob_nopu.sh", outdir + "/nopu"),
("MultiParticlePFGun50_cfi", 800000, 850000, "genjob_nopu.sh", outdir + "/nopu"),

("SingleElectronFlatPt1To1000_pythia8_cfi", 900000, 910000, "genjob_nopu.sh", outdir + "/nopu"),
("SingleGammaFlatPt1To1000_pythia8_cfi", 1000000,1010000, "genjob_nopu.sh", outdir + "/nopu"),
Expand Down
3 changes: 2 additions & 1 deletion mlpf/heptfds/cms_pf/multiparticlegun.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@
class CmsPfMultiParticleGun(tfds.core.GeneratorBasedBuilder):
"""DatasetBuilder for cms_pf_multi_particle_gun dataset."""

VERSION = tfds.core.Version("1.7.0")
VERSION = tfds.core.Version("1.7.1")
RELEASE_NOTES = {
"1.6.0": "Initial release",
"1.6.1": "Additional stats",
"1.7.0": "Add cluster shape vars",
"1.7.1": "Additional stats",
}
MANUAL_DOWNLOAD_INSTRUCTIONS = """
rsync -r --progress \
Expand Down
1 change: 1 addition & 0 deletions mlpf/jet_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np

import numba
import awkward
import vector
Expand Down
25 changes: 7 additions & 18 deletions mlpf/pyg/PFDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,14 @@ def __init__(
)


def next_power_of_2(x):
return 1 if x == 0 else 2 ** (x - 1).bit_length()


class Collater:
"""Based on the Collater found on torch_geometric docs we build our own."""

def __init__(self, keys_to_get, follow_batch=None, exclude_keys=None, pad_3d=True, pad_power_of_two=True):
def __init__(self, keys_to_get, follow_batch=None, exclude_keys=None, pad_3d=True):
self.follow_batch = follow_batch
self.exclude_keys = exclude_keys
self.keys_to_get = keys_to_get
self.pad_3d = pad_3d
self.pad_power_of_two = pad_power_of_two

def __call__(self, inputs):
num_samples_in_batch = len(inputs)
Expand All @@ -133,16 +128,7 @@ def __call__(self, inputs):
if not self.pad_3d:
return ret
else:
# pad to closest power of two
if self.pad_power_of_two:
sizes = [next_power_of_2(len(b.X)) for b in batch]
max_size = max(sizes)
else:
max_size = None
ret = {
k: torch_geometric.utils.to_dense_batch(getattr(ret, k), ret.batch, max_num_nodes=max_size)
for k in elem_keys
}
ret = {k: torch_geometric.utils.to_dense_batch(getattr(ret, k), ret.batch) for k in elem_keys}

ret["mask"] = ret["X"][1]

Expand All @@ -164,6 +150,8 @@ def __init__(self, data_loaders):
max_loader_size = max([len(dl) for dl in data_loaders])

self.loader_ds_indices = []

# iterate loaders interleaved
for i in range(max_loader_size):
for iloader, loader in enumerate(data_loaders):
if i < len(loader):
Expand Down Expand Up @@ -198,7 +186,7 @@ def __len__(self):
return len_


def get_interleaved_dataloaders(world_size, rank, config, use_cuda, pad_3d, pad_power_of_two, use_ray):
def get_interleaved_dataloaders(world_size, rank, config, use_cuda, pad_3d, use_ray):
loaders = {}
for split in ["train", "valid"]: # build train, valid dataset and dataloaders
loaders[split] = []
Expand Down Expand Up @@ -232,12 +220,13 @@ def get_interleaved_dataloaders(world_size, rank, config, use_cuda, pad_3d, pad_
loader = PFDataLoader(
dataset,
batch_size=batch_size,
collate_fn=Collater(["X", "ygen"], pad_3d=pad_3d, pad_power_of_two=pad_power_of_two),
collate_fn=Collater(["X", "ygen"], pad_3d=pad_3d),
sampler=sampler,
num_workers=config["num_workers"],
prefetch_factor=config["prefetch_factor"],
pin_memory=use_cuda,
pin_memory_device="cuda:{}".format(rank) if use_cuda else "",
drop_last=True,
)

if use_ray:
Expand Down
78 changes: 70 additions & 8 deletions mlpf/pyg/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,16 @@
compute_met_and_ratio,
format_dataset_name,
load_eval_data,
plot_jets,
plot_jet_ratio,
plot_jet_response_binned,
plot_jet_response_binned_eta,
plot_jet_response_binned_separate,
plot_met,
plot_met_ratio,
plot_met_response_binned,
plot_num_elements,
plot_particle_multiplicity,
plot_particles,
plot_sum_energy,
)
Expand All @@ -29,11 +35,15 @@


def predict_one_batch(conv_type, model, i, batch, rank, jetdef, jet_ptcut, jet_match_dr, outpath, dir_name, sample):
outfile = f"{outpath}/preds{dir_name}/{sample}/pred_{rank}_{i}.parquet"
if os.path.isfile(outfile):
return

if conv_type != "gravnet":
X_pad, mask = torch_geometric.utils.to_dense_batch(batch.X, batch.batch)
batch_pad = Batch(X=X_pad, mask=mask).to(rank)
ypred = model(batch_pad.X, batch_pad.mask)
ypred = ypred[0][mask], ypred[1][mask], ypred[2][mask]
ypred = ypred[0][mask], ypred[1][mask]
else:
_batch = batch.to(rank)
ypred = model(_batch.X, _batch.batch)
Expand Down Expand Up @@ -125,9 +135,13 @@ def predict_one_batch(conv_type, model, i, batch, rank, jetdef, jet_ptcut, jet_m
"matched_jets": matched_jets,
}
),
f"{outpath}/preds{dir_name}/{sample}/pred_{rank}_{i}.parquet",
outfile,
)
_logger.info(f"Saved predictions at {outpath}/preds{dir_name}/{sample}/pred_{rank}_{i}.parquet")
_logger.info(f"Saved predictions at {outfile}")


def predict_one_batch_args(args):
predict_one_batch(*args)


@torch.no_grad()
Expand Down Expand Up @@ -165,13 +179,61 @@ def make_plots(outpath, sample, dataset, dir_name=""):

yvals, X, _ = load_eval_data(str(pred_path / "*.parquet"), -1)

plot_num_elements(X, cp_dir=plots_path, title=format_dataset_name(sample))
plot_sum_energy(yvals, CLASS_NAMES[dataset], cp_dir=plots_path, title=format_dataset_name(sample))
title = format_dataset_name(sample)
plot_num_elements(X, cp_dir=plots_path, title=title)
plot_sum_energy(yvals, CLASS_NAMES[dataset], cp_dir=plots_path, title=title)
plot_particle_multiplicity(X, yvals, CLASS_NAMES[dataset], cp_dir=plots_path, title=title)

plot_jet_ratio(yvals, cp_dir=plots_path, title=format_dataset_name(sample))
plot_jets(
yvals,
cp_dir=plots_path,
title=title,
)
plot_jet_ratio(
yvals,
cp_dir=plots_path,
title=title,
bins=np.linspace(0, 5, 100),
logy=True,
)
plot_jet_ratio(
yvals,
cp_dir=plots_path,
title=title,
bins=np.linspace(0.5, 1.5, 100),
logy=False,
file_modifier="_bins_0p5_1p5",
)
plot_jet_response_binned(yvals, cp_dir=plots_path, title=title)
plot_jet_response_binned_eta(yvals, cp_dir=plots_path, title=title)
plot_jet_response_binned_separate(yvals, cp_dir=plots_path, title=title)

met_data = compute_met_and_ratio(yvals)
plot_met(met_data, cp_dir=plots_path, title=format_dataset_name(sample))
plot_met_ratio(met_data, cp_dir=plots_path, title=format_dataset_name(sample))
plot_met(met_data, cp_dir=plots_path, title=title)
plot_met_ratio(met_data, cp_dir=plots_path, title=title)
plot_met_ratio(
met_data,
cp_dir=plots_path,
title=title,
bins=np.linspace(0, 20, 100),
logy=True,
)
plot_met_ratio(
met_data,
cp_dir=plots_path,
title=title,
bins=np.linspace(0, 2, 100),
logy=False,
file_modifier="_bins_0_2",
)
plot_met_ratio(
met_data,
cp_dir=plots_path,
title=title,
bins=np.linspace(0, 5, 100),
logy=False,
file_modifier="_bins_0_5",
)
plot_met_response_binned(met_data, cp_dir=plots_path, title=title)

plot_particles(yvals, cp_dir=plots_path, title=format_dataset_name(sample))
Loading

0 comments on commit e1b439a

Please sign in to comment.