Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Docker images #29

Merged
merged 6 commits into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .deepsource.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
version = 1

[[analyzers]]
name = "python"

[analyzers.meta]
runtime_version = "3.x.x"
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# KingMaker


[![Analysis Containers](https://github.com/KIT-CMS/kingmaker-images/actions/workflows/deploy-base-images.yml/badge.svg?branch=main)](https://github.com/KIT-CMS/kingmaker-images/actions/workflows/deploy-base-images.yml)

KingMaker is the workflow management for producing ntuples with the [CROWN](github.com/KIT-CMS/CROWN) framework. The workflow management is based on [law](github.com/riga/law), which is using [luigi](https://github.com/spotify/luigi) as backend.

Expand Down
4 changes: 0 additions & 4 deletions lawluigi_configs/KingMaker_luigi.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ htcondor_remote_job = True
# htcondor_request_gpus = 1
; for all cores in total
htcondor_universe = docker
;image without GPU libraries
htcondor_docker_image = mschnepf/slc7-condocker:latest
;image with GPU libraries
# htcondor_docker_image = tvoigtlaender/slc7-condocker-cuda-11.5-cudnn8:base
; create log files in htcondor jobs
transfer_logs = True
; set local scheduler
Expand Down
4 changes: 1 addition & 3 deletions lawluigi_configs/ML_train_luigi.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ htcondor_accounting_group = cms.higgs
htcondor_remote_job = True
htcondor_request_cpus = 1
; for all cores in total
htcondor_universe = docker
;image without GPU libraries
htcondor_docker_image = mschnepf/slc7-condocker:latest
htcondor_universe = docker
;image with GPU libraries
# htcondor_docker_image = tvoigtlaender/slc7-condocker-cuda-11.5-cudnn8:base
; create log files in htcondor jobs
Expand Down
64 changes: 62 additions & 2 deletions processor/framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from getpass import getuser
from law.target.collection import flatten_collections
from law.config import Config
import subprocess

law.contrib.load("wlcg")
law.contrib.load("htcondor")
Expand Down Expand Up @@ -269,7 +270,8 @@ class HTCondorWorkflow(Task, law.htcondor.HTCondorWorkflow):
description="Universe to be set in HTCondor job submission."
)
htcondor_docker_image = luigi.Parameter(
description="Docker image to be used in HTCondor job submission."
description="Docker image to be used in HTCondor job submission.",
default="Automatic",
)
htcondor_request_disk = luigi.Parameter(
description="Amount of scratch-space(kB) to be requested in HTCondor job submission."
Expand All @@ -285,6 +287,61 @@ class HTCondorWorkflow(Task, law.htcondor.HTCondorWorkflow):
# Use proxy file located in $X509_USER_PROXY or /tmp/x509up_u$(id) if empty
htcondor_user_proxy = law.wlcg.get_vomsproxy_file()

def get_submission_os(self):
# function to check, if running on centos7, centos8 or rhel9
# based on this, the correct docker image is chosen, overwriting the htcondor_docker_image parameter
# check if lsb_release is installed, if not, use the information from /etc/os-release
try:
distro = (
subprocess.check_output("lsb_release -i | cut -f2", shell=True)
.decode()
.strip()
)
os_version = (
subprocess.check_output("lsb_release -r | cut -f2", shell=True)
.decode()
.strip()
)
except subprocess.CalledProcessError:
distro = (
subprocess.check_output(
"cat /etc/os-release | grep '^NAME=' | cut -f2 -d=''", shell=True
)
.decode()
.strip()
)
os_version = (
subprocess.check_output(
"cat /etc/os-release | grep '^VERSION_ID=' | cut -f2 -d=''",
shell=True,
)
.decode()
.strip()
)

image_name = None

if distro == "CentOS":
if os_version[0] == "7":
image_name = "centos7"
elif distro == "RedHatEnterprise" or distro == "AlmaLinux":
if os_version[0] == "8":
image_name = "centos8"
elif os_version[0] == "9":
image_name = "rhel9"
elif distro == "Ubuntu":
if os_version[0:2] == "20":
image_name = "ubuntu2004"
elif os_version[0:2] == "22":
image_name = "ubuntu2204"
else:
raise Exception(
f"Unknown OS {distro} {os_version}, CROWN will not run without changes"
)
image = f"ghcr.io/kit-cms/kingmaker-images-{image_name}-{str(self.ENV_NAME).lower()}:main"
# print(f"Running on {distro} {os_version}, using image {image}")
return image

def htcondor_create_job_manager(self, **kwargs):
kwargs = merge_dicts(self.htcondor_job_manager_defaults, kwargs)
return HTCondorJobManager(**kwargs)
Expand Down Expand Up @@ -339,7 +396,10 @@ def htcondor_job_config(self, config, job_num, branches):
config.custom_content.append(("Requirements", self.htcondor_requirements))
config.custom_content.append(("+RemoteJob", self.htcondor_remote_job))
config.custom_content.append(("universe", self.htcondor_universe))
config.custom_content.append(("docker_image", self.htcondor_docker_image))
if self.htcondor_docker_image != "Automatic":
config.custom_content.append(("docker_image", self.htcondor_docker_image))
else:
config.custom_content.append(("docker_image", self.get_submission_os()))
config.custom_content.append(("+RequestWalltime", self.htcondor_walltime))
config.custom_content.append(("x509userproxy", self.htcondor_user_proxy))
config.custom_content.append(("request_cpus", self.htcondor_request_cpus))
Expand Down
42 changes: 7 additions & 35 deletions processor/setup_law_remote.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/sh
action(){
action() {

_addpy() {
[ ! -z "$1" ] && export PYTHONPATH="$1:${PYTHONPATH}" && echo "Add $1 to PYTHONPATH"
Expand All @@ -14,48 +14,20 @@ action(){
# Set USER as local USER
export USER={{USER}}
export LUIGIPORT={{LUIGIPORT}}
export X509_CERT_DIR=/cvmfs/grid.cern.ch/etc/grid-security/certificates
export X509_VOMS_DIR=/cvmfs/grid.cern.ch/etc/grid-security/vomsdir
echo "------------------------------------------"
echo " | USER = ${USER}"
echo " | HOSTNAME = $(hostname)"
echo " | ANA_NAME = {{ANA_NAME}}"
echo " | ENV_NAME = {{ENV_NAME}}"
echo " | TAG = {{TAG}}"
echo " | USE_CVMFS = {{USE_CVMFS}}"
echo " | TARBALL_PATH = {{TARBALL_PATH}}"

if [[ "{{USE_CVMFS}}" == "True" ]]; then
ENV_PATH=/cvmfs/etp.kit.edu/LAW_envs/conda_envs/miniconda/bin/activate
echo " | ENV_PATH = ${ENV_PATH}"
else
ENV_PATH=${SPAWNPOINT}/miniconda/envs/{{ENV_NAME}}
echo " | ENV_PATH = $ENV_PATH"
echo " | TARBALL_ENV_PATH = {{TARBALL_ENV_PATH}}"
fi
echo "------------------------------------------"

# copy and untar process (and environment if necessary)
if [[ "{{USE_CVMFS}}" == "True" ]]; then
# Activate environment from cvmfs
source ${ENV_PATH} {{ENV_NAME}}
echo "gfal-copy {{TARBALL_PATH}} ${SPAWNPOINT}"
gfal-copy {{TARBALL_PATH}} ${SPAWNPOINT}
else
# Copy tarballs (only works for Centos/RHEL 7/8/9)
(
export X509_CERT_DIR=/cvmfs/grid.cern.ch/etc/grid-security/certificates
export X509_VOMS_DIR=/cvmfs/grid.cern.ch/etc/grid-security/vomsdir
MAJOR_RELEASE="$(. /etc/os-release; echo "${VERSION_ID%.*}")"
source /cvmfs/sft.cern.ch/lcg/views/LCG_102/x86_64-centos${MAJOR_RELEASE}-gcc11-opt/setup.sh
xrdcp {{TARBALL_PATH}} ${SPAWNPOINT}
xrdcp {{TARBALL_ENV_PATH}} ${SPAWNPOINT}
)
mkdir -p ${ENV_PATH}
tar -xzf {{ENV_NAME}}.tar.gz -C ${ENV_PATH} && rm {{ENV_NAME}}.tar.gz
# Activate environment from tarball
source ${ENV_PATH}/bin/activate
conda-unpack
fi

source /opt/conda/etc/profile.d/conda.sh
conda activate env
echo "gfal-copy {{TARBALL_PATH}} ${SPAWNPOINT}"
gfal-copy {{TARBALL_PATH}} ${SPAWNPOINT}
tar -xzf processor.tar.gz && rm processor.tar.gz

# # add law to path
Expand Down
5 changes: 3 additions & 2 deletions processor/tasks/CROWNBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,12 @@ def set_sample_data(self, samples):
table.add_column("Era", justify="left")
table.add_column("Sampletype", justify="left")

with open(str(self.dataset_database), "r") as stream:
sample_db = yaml.safe_load(stream)

for nick in samples:
data["details"][nick] = {}
# check if sample exists in datasets.yaml
with open(str(self.dataset_database), "r") as stream:
sample_db = yaml.safe_load(stream)
if nick not in sample_db:
console.log(
"Sample {} not found in {}".format(nick, self.dataset_database)
Expand Down
11 changes: 10 additions & 1 deletion processor/tasks/CROWNRun.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ def run(self):
tar = tarfile.open(_tarballpath, "r:gz")
tar.extractall(_workdir)
os.remove(_tempfile)
# test running the source command
console.rule("Testing Source command for CROWN")
self.run_command(
command=["source", "{}/init.sh".format(_workdir)],
silent=False,
)
console.rule("Finished testing Source command for CROWN")
# set environment using env script
my_env = self.set_environment("{}/init.sh".format(_workdir))
_crown_args = [_outputfile] + _inputfiles
Expand All @@ -143,8 +150,10 @@ def run(self):
console.log("inputfile {}".format(_inputfiles))
console.log("outputfile {}".format(_outputfile))
console.log("workdir {}".format(_workdir)) # run CROWN
command = [_executable] + _crown_args
console.log(f"Running command: {command}")
with subprocess.Popen(
[_executable] + _crown_args,
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=1,
Expand Down
1 change: 1 addition & 0 deletions processor/tasks/scripts/compile_crown.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ if cmake $CROWNFOLDER \
-DSHIFTS=$SHIFTS \
-DTHREADS=$EXECUTALBE_THREADS \
-DINSTALLDIR=$INSTALLDIR \
-DPRODUCTION=True \
-B$BUILDDIR 2>&1 |tee $BUILDDIR/cmake.log; then
echo "CMake finished successfully"
else
Expand Down
Loading