Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change parallelization strategy #11

Merged
merged 7 commits into from
Sep 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ yadage-run: yadage-clean
@echo "Launching Yadage..."
@yadage-run $(YADAGE_WORK_DIR) "workflow.yml" \
-p input_file="input.yml" \
-p num_generation_jobs="6" \
-p num_procs_per_job="1" \
-d initdir=$(YADAGE_INPUT_DIR) \
--toplevel $(YADAGE_SPEC_DIR)
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.0
0.5.0
223 changes: 223 additions & 0 deletions code/cards/me5_configuration.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
################################################################################
#
# Copyright (c) 2009 The MadGraph5_aMC@NLO Development team and Contributors
#
# This file is a part of the MadGraph5_aMC@NLO project, an application which
# automatically generates Feynman diagrams and matrix elements for arbitrary
# high-energy processes in the Standard Model and beyond.
#
# It is subject to the MadGraph5_aMC@NLO license which should accompany this
# distribution.
#
# For more information, visit madgraph.phys.ucl.ac.be and amcatnlo.web.cern.ch
#
################################################################################
#
# This File contains some configuration variable for MadGraph/MadEvent
#
# Line starting by #! are comment and should remain commented
# Line starting with # should be uncommented if you want to modify the default
# value.
# Current value for all options can seen by typing "display options"
# after either ./bin/mg5_aMC or ./bin/madevent
#
# You can place this files in ~/.mg5/mg5_configuration.txt if you have more than
# one version of MG5.
#
################################################################################
#! Prefered Fortran Compiler
#! If None: try to find g77 or gfortran on the system
#!
# fortran_compiler = None
# f2py_compiler_py2 = None
# f2py_compiler_py3 = None


#! Prefered C++ Compiler
#! If None: try to find g++ or clang on the system
#!
# cpp_compiler = None

#! Prefered Text Editor
#! Default: use the shell default Editor
#! or try to find one available on the system
#! Be careful: Only shell based editor are allowed
# text_editor = None

#! Prefered Web Browser
#! If None: try to find one available on the system
# web_browser = None

#! Prefered PS viewer
#! If None: try to find one available on the system
# eps_viewer = None

#! Time allowed to answer question (if no answer takes default value)
#! 0: No time limit
# timeout = 60

#! Pythia8 path.
#! Defines the path to the pythia8 installation directory (i.e. the
#! on containing the lib, bin and include directories) .
#! If using a relative path, that starts from the mg5 directory
# pythia8_path = ./HEPTools/pythia8

#! MG5aMC_PY8_interface path
#! Defines the path of the C++ driver file that is used by MG5_aMC to
#! steer the Pythia8 shower.
#! Can be installed directly from within MG5_aMC with the following command:
#! MG5_aMC> install mg5amc_py8_interface
# mg5amc_py8_interface_path = ./HEPTools/MG5aMC_PY8_interface

#! Herwig++/Herwig7 paths
#! specify here the paths also to HepMC ant ThePEG
#! define the path to the herwig++, thepeg and hepmc directories.
#! paths can be absolute or relative from mg5 directory
#! WARNING: if Herwig7 has been installed with the bootstrap script,
#! then please set thepeg_path and hepmc_path to the same value as
#! hwpp_path
# hwpp_path =
# thepeg_path =
# hepmc_path =

#! Control when MG5 checks if he is up-to-date.
#! Enter the number of day between two check (0 means never)
#! A question is always asked before any update
# auto_update = 7

################################################################################
# INFO FOR MADEVENT / aMC@NLO
################################################################################
# If this file is in a MADEVENT Template. 'main directory' is the directory
# containing the SubProcesses directory. Otherwise this is the MadGraph5_aMC@NLO main
# directory (containing the directories madgraph and Template)

#! Allow/Forbid the automatic opening of the web browser (on the status page)
#! when launching MadEvent [True/False]
# automatic_html_opening = True
#! allow notification of finished job in the notification center (Mac Only)
# notification_center = True


#! Default Running mode
#! 0: single machine/ 1: cluster / 2: multicore
run_mode = 2

#! Cluster Type [pbs|sge|condor|lsf|ge|slurm|htcaas|htcaas2] Use for cluster run only
#! And cluster queue (or partition for slurm)
#! And size of the cluster (some part of the code can adapt splitting accordingly)
# cluster_type = condor
# cluster_queue = madgraph
# cluster_size = 150

#! Path to a node directory to avoid direct writing on the central disk
#! Note that condor clusters avoid direct writing by default (therefore this
#! option does not affect condor clusters)
# cluster_temp_path = None

#! path to a node directory where local file can be found (typically pdf)
#! to avoid to send them to the node (if cluster_temp_path is on True or condor)
# cluster_local_path = None # example: /cvmfs/cp3.uclouvain.be/madgraph/

#! Cluster waiting time for status update
#! First number is when the number of waiting job is higher than the number
#! of running one (time in second). The second number is in the second case.
# cluster_status_update = 600 30

#! How to deal with failed submission (can occur on cluster mode)
#! 0: crash, -1: print error, hangs the program up to manual instructions, N(>0) retry up to N times.
# cluster_nb_retry = 1

#! How much time to wait for the output file before resubmission/crash (filesystem can be very slow)
# cluster_retry_wait = 300

#! Nb_core to use (None = all) This is use only for multicore run
#! This correspond also to the number core used for code compilation for cluster mode
nb_core = None

#! Pythia-PGS Package
#! relative path start from main directory
# pythia-pgs_path = ./pythia-pgs

#! Delphes Package
#! relative path start from main directory
# delphes_path = ./Delphes

#! MadAnalysis4 fortran-based package [for basic analysis]
#! relative path start from main directory
# madanalysis_path = ./MadAnalysis

#! MadAnalysis5 python-based Package [For advanced analysis]
#! relative path start from main directory
# madanalysis5_path = ./HEPTools/madanalysis5/madanalysis5

#! ExRootAnalysis Package
#! relative path start from main directory
# exrootanalysis_path = ./ExRootAnalysis

#! TOPDRAWER PATH
#! Path to the directory containing td executables
#! relative path start from main directory
# td_path = ./td

#! lhapdf-config --can be specified differently depending on your python version
#! If None: try to find one available on the system
# lhapdf_py2 = lhapdf-config
# lhapdf_py3 = lhapdf-config

#! fastjet-config
#! If None: try to find one available on the system
# fastjet = fastjet-config

#! MCatNLO-utilities
#! relative path starting from main directory
# MCatNLO-utilities_path = ./MCatNLO-utilities

#! Set what OLP to use for the loop ME generation
# OLP = MadLoop

#! Set the PJFRy++ directory containing pjfry's library
#! if auto: try to find it automatically on the system (default)
#! if '' or None: disabling pjfry
#! if pjfry=/PATH/TO/pjfry/lib: use that specific installation path for PJFry++
# pjfry = auto

#! Set the Golem95 directory containing golem's library
#! It only supports version higher than 1.3.0
#! if auto: try to find it automatically on the system (default)
#! if '' or None: disabling Golem95
#! if golem=/PATH/TO/golem/lib: use that specific installation path for Golem95
# golem = auto

#! Set the samurai directory containing samurai's library
#! It only supports version higher than 2.0.0
#! if auto: try to find it automatically on the system (default)
#! if '' or None: disabling samurai
#! if samurai=/PATH/TO/samurai/lib: use that specific installation path for samurai
# samurai = None

#! Set the Ninja directory containing ninja's library
#! if '' or None: disabling ninja
#! if ninja=/PATH/TO/ninja/lib: use that specific installation path for ninja
# ninja = ./HEPTools/lib

#! Set the COLLIER directory containing COLLIER's library
#! if '' or None: disabling COLLIER
#! if ninja=/PATH/TO/ninja/lib: use that specific installation path for COLLIER
# Note that it is necessary that you have generated a static library for COLLIER
# collier = ./HEPTools/lib

#! Set how MadLoop dependencies (such as CutTools) should be handled
#! > external : ML5 places a link to the MG5_aMC-wide libraries
#! > internal : ML5 copies all dependencies in the output so that it is independent
#! > environment_paths : ML5 searches for the dependencies in your environment path
# output_dependencies = external

#! SysCalc PATH
#! Path to the directory containing syscalc executables
#! relative path start from main directory
# syscalc_path = ./SysCalc

#! Absolute paths to config scripts in the bin directories for APPLgrid and aMCFast.
# applgrid = applgrid-config
# amcfast = amcfast-config
40 changes: 19 additions & 21 deletions code/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@
##########################

config_file = str(sys.argv[1])
number_jobs = int(sys.argv[2])
madgraph_dir = str(sys.argv[3])
output_dir = str(sys.argv[4])
madgraph_dir = str(sys.argv[2])
output_dir = str(sys.argv[3])

project_path = Path(__file__).parent.parent
output_path = Path(output_dir)
Expand All @@ -30,7 +29,6 @@
miner.load(config_file)

benchmarks = [str(i) for i in miner.benchmarks]
num_benchmarks = len(benchmarks)


##########################
Expand All @@ -54,46 +52,46 @@ def madminer_run_wrapper(sample_benchmarks, run_type):
raise ValueError("Invalid run type")

miner.run_multiple(
is_background=is_background,
only_prepare_script=True,
sample_benchmarks=sample_benchmarks,
mg_directory=madgraph_dir,
mg_process_directory=f"{proc_dir}/{run_type}",
proc_card_file=f"{card_dir}/proc_card_{run_type}.dat",
param_card_template_file=f"{card_dir}/param_card_template.dat",
run_card_files=[f"{card_dir}/run_card_{run_type}.dat"],
param_card_template_file=f"{card_dir}/param_card_template.dat",
mg_process_directory=f"{proc_dir}/{run_type}",
pythia8_card_file=f"{card_dir}/pythia8_card.dat",
configuration_file=f"{card_dir}/me5_configuration.txt",
log_directory=f"{logs_dir}/{run_type}",
sample_benchmarks=sample_benchmarks,
is_background=is_background,
only_prepare_script=True,
python_executable="python3",
)

# Create files to link benchmark_i to run_i.sh
for i in range(number_jobs):
index = i % num_benchmarks
for i, benchmark in enumerate(benchmarks):
file_path = f"{proc_dir}/{run_type}/madminer/cards/benchmark_{i}.dat"

with open(file_path, "w+") as f:
f.write(benchmarks[index])
f.write(benchmark)

print("generate.py", i, benchmarks[index])
print("Benchmark:", i, benchmark)


###########################
##### Run with signal #####
###########################

# Sample benchmarks from already stablished benchmarks in a democratic way
initial_list = benchmarks[0 : (number_jobs % num_benchmarks)]
others_list = benchmarks * (number_jobs // num_benchmarks)
sample_list = initial_list + others_list

madminer_run_wrapper(sample_benchmarks=sample_list, run_type="signal")
madminer_run_wrapper(
sample_benchmarks=benchmarks,
run_type="signal",
)


###########################
### Run with background ###
###########################

# Currently not used
# sample_list = ['sm' for i in range(number_jobs)]
# madminer_run_wrapper(sample_benchmarks=sample_list, run_type='background')
# madminer_run_wrapper(
# sample_benchmarks=["sm"] * len(benchmarks),
# run_type="background",
# )
24 changes: 21 additions & 3 deletions scripts/2_generate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ while [ "$#" -gt 0 ]; do
-p|--project_path) project_path="$2"; shift ;;
-m|--madgraph_dir) madgraph_dir="$2"; shift ;;
-c|--config_file) config_file="$2"; shift ;;
-j|--number_jobs) number_jobs="$2"; shift ;;
-j|--number_procs) number_procs="$2"; shift ;;
-o|--output_dir) output_dir="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
Expand All @@ -33,10 +33,27 @@ SIGNAL_ABS_PATH="${output_dir}/mg_processes/signal"
### translation file called "py.py" which needs to be written on disk.
(
cd "${output_dir}" && \
python3 "${project_path}/code/generate.py" "${config_file}" "${number_jobs}" "${MADGRAPH_ABS_PATH}" "${output_dir}"
python3 "${project_path}/code/generate.py" "${config_file}" "${MADGRAPH_ABS_PATH}" "${output_dir}"
)

for i in $(seq 0 $((number_jobs-1))); do

# Parametrize number of parallel processes per job
default_spec="nb_core = None"
custom_spec="nb_core = ${number_procs}"


# Count the number of benchmarks
run_cards_path="${SIGNAL_ABS_PATH}/madminer/scripts"
num_benchmarks=$(find "${run_cards_path}" -maxdepth 1 -name "run_*.sh" | wc -l)

for i in $(seq 0 $((num_benchmarks-1))); do

# Inject the number of processes in the configuration
sed -i \
-e "s/${default_spec}/${custom_spec}/" \
"${SIGNAL_ABS_PATH}/madminer/cards/me5_configuration_${i}.txt"

# Create the zip files
tar -czf "${output_dir}/folder_${i}.tar.gz" \
-C "${SIGNAL_ABS_PATH}" \
"bin" \
Expand All @@ -45,6 +62,7 @@ for i in $(seq 0 $((number_jobs-1))); do
"lib" \
"madminer/scripts/run_${i}.sh" \
"madminer/cards/benchmark_${i}.dat" \
"madminer/cards/me5_configuration_${i}.txt" \
"madminer/cards/mg_commands_${i}.dat" \
"madminer/cards/param_card_${i}.dat" \
"madminer/cards/pythia8_card_${i}.dat" \
Expand Down
4 changes: 2 additions & 2 deletions workflow/yadage/steps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
common_env_physics: &common_env_physics
environment_type: 'docker-encapsulated'
image: 'madminertool/madminer-workflow-ph'
imagetag: '0.4.0'
imagetag: '0.5.0'


########################################
Expand All @@ -26,7 +26,7 @@ generate:
environment: *common_env_physics
process:
process_type: string-interpolated-cmd
cmd: /madminer/scripts/2_generate.sh -p /madminer -m {mg5amc_dir} -c {config_file} -j {number_jobs} -o {output_dir}
cmd: /madminer/scripts/2_generate.sh -p /madminer -m {mg5amc_dir} -c {config_file} -j {number_procs} -o {output_dir}
publisher:
publisher_type: 'fromglob-pub'
outputkey: script_files
Expand Down
2 changes: 1 addition & 1 deletion workflow/yadage/workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ stages:
scheduler_type: singlestep-stage
parameters:
config_file: {step: configure, output: config_file}
number_jobs: {step: init, output: num_generation_jobs}
number_procs: {step: init, output: num_procs_per_job}
mg5amc_dir: 'software/MG5_aMC_v2_9_4'
output_dir: '{workdir}'
step: {$ref: 'steps.yml#/generate'}
Expand Down