From d328485be92c291a77da854931e99571540a8d7a Mon Sep 17 00:00:00 2001 From: Tom Viering Date: Fri, 18 Aug 2023 12:59:29 +0200 Subject: [PATCH 1/5] different sampling of hps per dataset option --- publications/2023-neurips/lcdb/cli/_create.py | 7 ++ .../2023-neurips/lcdb/workflow/_util.py | 87 +++++++++++-------- 2 files changed, 56 insertions(+), 38 deletions(-) diff --git a/publications/2023-neurips/lcdb/cli/_create.py b/publications/2023-neurips/lcdb/cli/_create.py index 4fefb0d..09f31bd 100644 --- a/publications/2023-neurips/lcdb/cli/_create.py +++ b/publications/2023-neurips/lcdb/cli/_create.py @@ -43,6 +43,11 @@ def add_subparser(subparsers): subparser.add_argument('--LHS', action='store_true') subparser.add_argument('--no-LHS', dest='LHS', action='store_false') subparser.set_defaults(LHS=True) + + subparser.add_argument('--random_hps_per_dataset', action='store_true') + subparser.add_argument('--no-random_hps_per_dataset', dest='random_hps_per_dataset', action='store_false') + subparser.set_defaults(random_hps_per_dataset=True) + subparser.set_defaults(func=function_to_call) @@ -58,6 +63,7 @@ def main( seed: int, max_num_anchors_per_row: int, LHS: bool, + random_hps_per_dataset: bool, *args, **kwargs ): @@ -72,6 +78,7 @@ def main( seed=seed, max_num_anchors_per_row=max_num_anchors_per_row, LHS=LHS, + random_hps_per_dataset=random_hps_per_dataset, ) # filter experiments diff --git a/publications/2023-neurips/lcdb/workflow/_util.py b/publications/2023-neurips/lcdb/workflow/_util.py index a7b4096..d75b663 100644 --- a/publications/2023-neurips/lcdb/workflow/_util.py +++ b/publications/2023-neurips/lcdb/workflow/_util.py @@ -240,6 +240,7 @@ def get_all_experiments( seed: int, max_num_anchors_per_row: int, LHS: bool, + random_hps_per_dataset: bool, ) -> List[Dict]: """Create a sample of experimental configurations for a given workflow. @@ -259,45 +260,55 @@ def get_all_experiments( max_num_anchors_per_row=max_num_anchors_per_row, ) - # import the workflow class - workflow_path = config.get("PY_EXPERIMENTER", "workflow") - workflow_class = import_attr_from_module(workflow_path) - - config_space = workflow_class.get_config_space() - default_config = get_default_config(config_space) - - config_space.seed(seed) + df_experiments_grouped = df_experiments.groupby("openmlid") + + experiments = [] + + for name, group in df_experiments_grouped: + print('working on dataset %d...' % name) + # import the workflow class + workflow_path = config.get("PY_EXPERIMENTER", "workflow") + workflow_class = import_attr_from_module(workflow_path) + + config_space = workflow_class.get_config_space() + default_config = get_default_config(config_space) + + seed_post_processed = seed + if random_hps_per_dataset: + seed_post_processed = seed_post_processed + int(name) + config_space.seed(seed_post_processed) + + if LHS: + print('using LHS with seed %d...' % seed_post_processed) + lhs_generator = LHSGenerator(config_space, n=num_configs, seed=seed) + hp_samples = lhs_generator.generate() + else: + print('using random sampling with seed %d...' % seed_post_processed) + hp_samples = config_space.sample_configuration(num_configs) + if num_configs == 1: + hp_samples = [hp_samples] + hp_samples.insert(0, default_config) + + # create all rows for the experiments + experiments = experiments + [ + { + "workflow": workflow_path, + "openmlid": openmlid, + "valid_prop": v_p, + "test_prop": t_p, + "seed_outer": s_o, + "seed_inner": s_i, + "train_sizes": train_sizes, + "maxruntime": maxruntime, + "hyperparameters": dict(hp), + "monotonic": mon, + "measure_memory": measure_memory, + } + for (openmlid, v_p, t_p, s_o, s_i, train_sizes, mon, maxruntime, measure_memory), hp in it.product( + group.values, hp_samples + ) + ] - if LHS: - print('using LHS...') - lhs_generator = LHSGenerator(config_space, n=num_configs, seed=seed) - hp_samples = lhs_generator.generate() - else: - print('using random sampling...') - hp_samples = config_space.sample_configuration(num_configs) - if num_configs == 1: - hp_samples = [hp_samples] - hp_samples.insert(0, default_config) - - # create all rows for the experiments - experiments = [ - { - "workflow": workflow_path, - "openmlid": openmlid, - "valid_prop": v_p, - "test_prop": t_p, - "seed_outer": s_o, - "seed_inner": s_i, - "train_sizes": train_sizes, - "maxruntime": maxruntime, - "hyperparameters": dict(hp), - "monotonic": mon, - "measure_memory": measure_memory, - } - for (openmlid, v_p, t_p, s_o, s_i, train_sizes, mon, maxruntime, measure_memory), hp in it.product( - df_experiments.values, hp_samples - ) - ] return workflow_class, experiments From 2b0b04170ef56cf082d37a9af07ad7be2e14c64e Mon Sep 17 00:00:00 2001 From: Tom Viering Date: Fri, 18 Aug 2023 18:10:29 +0200 Subject: [PATCH 2/5] use 2 cores --- publications/2023-neurips/config/knn_large.cfg | 2 ++ publications/2023-neurips/config/knn_medium.cfg | 2 ++ publications/2023-neurips/config/knn_small.cfg | 2 ++ publications/2023-neurips/config/liblinear_large.cfg | 2 ++ publications/2023-neurips/config/liblinear_medium.cfg | 2 ++ publications/2023-neurips/config/liblinear_small.cfg | 2 ++ publications/2023-neurips/config/libsvm_large.cfg | 2 ++ publications/2023-neurips/config/libsvm_medium.cfg | 2 ++ publications/2023-neurips/config/libsvm_small.cfg | 2 ++ 9 files changed, 18 insertions(+) diff --git a/publications/2023-neurips/config/knn_large.cfg b/publications/2023-neurips/config/knn_large.cfg index 8bc759d..08ea93d 100644 --- a/publications/2023-neurips/config/knn_large.cfg +++ b/publications/2023-neurips/config/knn_large.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_large +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.KNNWorkflow diff --git a/publications/2023-neurips/config/knn_medium.cfg b/publications/2023-neurips/config/knn_medium.cfg index dad3abd..c898c44 100644 --- a/publications/2023-neurips/config/knn_medium.cfg +++ b/publications/2023-neurips/config/knn_medium.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_medium +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.KNNWorkflow diff --git a/publications/2023-neurips/config/knn_small.cfg b/publications/2023-neurips/config/knn_small.cfg index 0ad1332..c1fe781 100644 --- a/publications/2023-neurips/config/knn_small.cfg +++ b/publications/2023-neurips/config/knn_small.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_small +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.KNNWorkflow diff --git a/publications/2023-neurips/config/liblinear_large.cfg b/publications/2023-neurips/config/liblinear_large.cfg index 292463b..3b5f521 100644 --- a/publications/2023-neurips/config/liblinear_large.cfg +++ b/publications/2023-neurips/config/liblinear_large.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_large +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.LibLinearWorkflow diff --git a/publications/2023-neurips/config/liblinear_medium.cfg b/publications/2023-neurips/config/liblinear_medium.cfg index 0ade862..1b817dc 100644 --- a/publications/2023-neurips/config/liblinear_medium.cfg +++ b/publications/2023-neurips/config/liblinear_medium.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_medium +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.LibLinearWorkflow diff --git a/publications/2023-neurips/config/liblinear_small.cfg b/publications/2023-neurips/config/liblinear_small.cfg index 6aa2405..ce667f9 100644 --- a/publications/2023-neurips/config/liblinear_small.cfg +++ b/publications/2023-neurips/config/liblinear_small.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_small +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.LibLinearWorkflow diff --git a/publications/2023-neurips/config/libsvm_large.cfg b/publications/2023-neurips/config/libsvm_large.cfg index 3749fc8..66a35a7 100644 --- a/publications/2023-neurips/config/libsvm_large.cfg +++ b/publications/2023-neurips/config/libsvm_large.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_large +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.LibSVMWorkflow diff --git a/publications/2023-neurips/config/libsvm_medium.cfg b/publications/2023-neurips/config/libsvm_medium.cfg index c19013d..a70e59d 100644 --- a/publications/2023-neurips/config/libsvm_medium.cfg +++ b/publications/2023-neurips/config/libsvm_medium.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_medium +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.LibSVMWorkflow diff --git a/publications/2023-neurips/config/libsvm_small.cfg b/publications/2023-neurips/config/libsvm_small.cfg index b743885..7ad275a 100644 --- a/publications/2023-neurips/config/libsvm_small.cfg +++ b/publications/2023-neurips/config/libsvm_small.cfg @@ -4,6 +4,8 @@ provider = mysql database = db_lcdb table = jobs_small +n_jobs = 2 + # train_size and hyperparameters are omitted since they are computed automatically keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean, workflow = lcdb.workflow.sklearn.LibSVMWorkflow From c016aa2edfcfb10694e914aafc1eb1af548b4b40 Mon Sep 17 00:00:00 2001 From: Tom Viering Date: Tue, 15 Aug 2023 17:52:21 +0200 Subject: [PATCH 3/5] Revert "postprocess bug?" This reverts commit 0eaa1ea8d86c471b15405c6ea51ab20249ae12c8. --- publications/2023-neurips/config/knn_large.cfg | 2 +- publications/2023-neurips/config/knn_medium.cfg | 2 +- publications/2023-neurips/config/knn_small.cfg | 2 +- publications/2023-neurips/config/liblinear_large.cfg | 2 +- publications/2023-neurips/config/liblinear_medium.cfg | 2 +- publications/2023-neurips/config/liblinear_small.cfg | 2 +- publications/2023-neurips/config/libsvm_large.cfg | 2 +- publications/2023-neurips/config/libsvm_medium.cfg | 2 +- publications/2023-neurips/config/libsvm_small.cfg | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/publications/2023-neurips/config/knn_large.cfg b/publications/2023-neurips/config/knn_large.cfg index 08ea93d..a510cac 100644 --- a/publications/2023-neurips/config/knn_large.cfg +++ b/publications/2023-neurips/config/knn_large.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/knn_medium.cfg b/publications/2023-neurips/config/knn_medium.cfg index c898c44..723ab4e 100644 --- a/publications/2023-neurips/config/knn_medium.cfg +++ b/publications/2023-neurips/config/knn_medium.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/knn_small.cfg b/publications/2023-neurips/config/knn_small.cfg index c1fe781..a18c459 100644 --- a/publications/2023-neurips/config/knn_small.cfg +++ b/publications/2023-neurips/config/knn_small.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/liblinear_large.cfg b/publications/2023-neurips/config/liblinear_large.cfg index 3b5f521..e8d73a4 100644 --- a/publications/2023-neurips/config/liblinear_large.cfg +++ b/publications/2023-neurips/config/liblinear_large.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/liblinear_medium.cfg b/publications/2023-neurips/config/liblinear_medium.cfg index 1b817dc..cded162 100644 --- a/publications/2023-neurips/config/liblinear_medium.cfg +++ b/publications/2023-neurips/config/liblinear_medium.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/liblinear_small.cfg b/publications/2023-neurips/config/liblinear_small.cfg index ce667f9..3e786a1 100644 --- a/publications/2023-neurips/config/liblinear_small.cfg +++ b/publications/2023-neurips/config/liblinear_small.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/libsvm_large.cfg b/publications/2023-neurips/config/libsvm_large.cfg index 66a35a7..2fca392 100644 --- a/publications/2023-neurips/config/libsvm_large.cfg +++ b/publications/2023-neurips/config/libsvm_large.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/libsvm_medium.cfg b/publications/2023-neurips/config/libsvm_medium.cfg index a70e59d..d36b4f1 100644 --- a/publications/2023-neurips/config/libsvm_medium.cfg +++ b/publications/2023-neurips/config/libsvm_medium.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file diff --git a/publications/2023-neurips/config/libsvm_small.cfg b/publications/2023-neurips/config/libsvm_small.cfg index 7ad275a..94e41af 100644 --- a/publications/2023-neurips/config/libsvm_small.cfg +++ b/publications/2023-neurips/config/libsvm_small.cfg @@ -21,5 +21,5 @@ monotonic = 1 maxruntime = 1800 measure_memory = 0 -resultfields = result:LONGTEXT +resultfields = result:LONGTEXT, postprocess:boolean resultfields.timestamps = false \ No newline at end of file From f27bbd93c8d20a0d7e7c04f8291f28021129deda Mon Sep 17 00:00:00 2001 From: Tom Viering Date: Mon, 21 Aug 2023 16:01:49 +0200 Subject: [PATCH 4/5] print process id --- publications/2023-neurips/lcdb/cli/_run.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/publications/2023-neurips/lcdb/cli/_run.py b/publications/2023-neurips/lcdb/cli/_run.py index e05063f..a174490 100644 --- a/publications/2023-neurips/lcdb/cli/_run.py +++ b/publications/2023-neurips/lcdb/cli/_run.py @@ -2,6 +2,7 @@ import json import logging +import os from time import time from py_experimenter.result_processor import ResultProcessor @@ -48,7 +49,7 @@ def run_experiment( keyfields: dict, result_processor: ResultProcessor, custom_config: dict ): - print('**** starting experiment ****') + print('**** starting experiment on process id %d ****' % os.getpid()) print(time()) # activate logger ch = logging.StreamHandler() From e105db0abd3dce99fa3e350618cc6b736a7aec0c Mon Sep 17 00:00:00 2001 From: Tom Viering Date: Mon, 21 Aug 2023 17:01:25 +0200 Subject: [PATCH 5/5] scripts for server --- .../cluster/slurm/apptainer/batchjoblarge.sh | 14 ++++ .../cluster/slurm/apptainer/batchjobmedium.sh | 14 ++++ .../cluster/slurm/apptainer/batchjobsmall.sh | 14 ++++ .../slurm/apptainer/ssh_killjobs_server.py | 71 +++++++++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 publications/2023-neurips/cluster/slurm/apptainer/batchjoblarge.sh create mode 100644 publications/2023-neurips/cluster/slurm/apptainer/batchjobmedium.sh create mode 100644 publications/2023-neurips/cluster/slurm/apptainer/batchjobsmall.sh create mode 100644 publications/2023-neurips/cluster/slurm/apptainer/ssh_killjobs_server.py diff --git a/publications/2023-neurips/cluster/slurm/apptainer/batchjoblarge.sh b/publications/2023-neurips/cluster/slurm/apptainer/batchjoblarge.sh new file mode 100644 index 0000000..28b6429 --- /dev/null +++ b/publications/2023-neurips/cluster/slurm/apptainer/batchjoblarge.sh @@ -0,0 +1,14 @@ +#!/bin/sh +#SBATCH --partition=general --qos=long +#SBATCH --time=168:00:00 +#SBATCH --mincpus=2 +#SBATCH --mem=36000 +#SBATCH --job-name=lcdbL +#SBATCH --output=lcdbL%a.txt +#SBATCH --error=lcdbL%a.txt +#SBATCH --array=1-83 +ulimit -n 8000 +cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/ +rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing +cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/ +srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_large.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}" diff --git a/publications/2023-neurips/cluster/slurm/apptainer/batchjobmedium.sh b/publications/2023-neurips/cluster/slurm/apptainer/batchjobmedium.sh new file mode 100644 index 0000000..34c8af3 --- /dev/null +++ b/publications/2023-neurips/cluster/slurm/apptainer/batchjobmedium.sh @@ -0,0 +1,14 @@ +#!/bin/sh +#SBATCH --partition=general --qos=long +#SBATCH --time=168:00:00 +#SBATCH --mincpus=2 +#SBATCH --mem=12000 +#SBATCH --job-name=lcdbM +#SBATCH --output=lcdbM%a.txt +#SBATCH --error=lcdbM%a.txt +#SBATCH --array=1-146 +ulimit -n 8000 +cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/ +rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing +cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/ +srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_medium.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}" diff --git a/publications/2023-neurips/cluster/slurm/apptainer/batchjobsmall.sh b/publications/2023-neurips/cluster/slurm/apptainer/batchjobsmall.sh new file mode 100644 index 0000000..65b287d --- /dev/null +++ b/publications/2023-neurips/cluster/slurm/apptainer/batchjobsmall.sh @@ -0,0 +1,14 @@ +#!/bin/sh +#SBATCH --partition=general --qos=long +#SBATCH --time=168:00:00 +#SBATCH --mincpus=2 +#SBATCH --mem=6000 +#SBATCH --job-name=lcdbS +#SBATCH --output=lcdbS%a.txt +#SBATCH --error=lcdbS%a.txt +#SBATCH --array=1-115 +ulimit -n 8000 +cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/ +rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing +cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/ +srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_small.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}" diff --git a/publications/2023-neurips/cluster/slurm/apptainer/ssh_killjobs_server.py b/publications/2023-neurips/cluster/slurm/apptainer/ssh_killjobs_server.py new file mode 100644 index 0000000..e1bdff8 --- /dev/null +++ b/publications/2023-neurips/cluster/slurm/apptainer/ssh_killjobs_server.py @@ -0,0 +1,71 @@ +import json +import pymysql +import pandas as pd +import time + +pw2 = 'database_password' + +def postprocess_table(table_name): + + cnx = pymysql.connect(host='lcdb_experiments.ewi.tudelft.nl', user='lcdb', passwd=pw2, db='db_lcdb') + query = '''select * from %s where postprocess=1;''' % table_name + to_process = pd.read_sql_query(query, cnx) + + print('found %d rows for processing...' % len(to_process)) + + query_list = [] + + for i in range(0, len(to_process)): + print('working on row %d' % i) + + row = to_process.iloc[i] + + query = '''select * from %s where workflow='%s' and openmlid=%d and hyperparameters='%s' and status='created';''' % ( + table_name, row.workflow, row.openmlid, row.hyperparameters) + + datas = pd.read_sql_query(query, cnx) + if len(datas) < 1: + print('this row doesnt have any jobs remaining... too bad!') + else: + trainsize_small = json.loads(row.train_sizes)[0] + + trainsizes_todo = [] + for train_size in datas['train_sizes'].unique(): + train_size_ = json.loads(train_size) + if train_size_[0] > trainsize_small: + trainsizes_todo.append(train_size) + + for trainsize in trainsizes_todo: + query_list.append( + '''update %s set status='skipped' where workflow='%s' and openmlid=%d and hyperparameters='%s' and status='created' and train_sizes='%s';''' % ( + table_name, row.workflow, row.openmlid, row.hyperparameters, trainsize)) + + query_list.append('''update %s set postprocess=0 where id=%d''' % (table_name, row.ID)) + + print('I have to execute %d queries... Lets get to work!' % len(query_list)) + + affected_rows = [] + if len(query_list) > 0: + cursor = cnx.cursor() + for query in query_list: + print('performing query: %s' % query) + tmp = (cursor.execute(query)) + print('rows affected: %d' % tmp) + affected_rows.append(tmp) + cursor.close() + cnx.commit() + cnx.close() + + +while True: + try: + print('trying small...') + postprocess_table('jobs_small') + print('trying medium...') + postprocess_table('jobs_medium') + print('trying large...') + postprocess_table('jobs_large') + except Exception as e: + print('failed with error %s' % str(e)) + print('going to sleep for 5 min...') + time.sleep(60*5) \ No newline at end of file