Merge branch 'main' into deephyper

fmohr · Nov 18, 2024 · 9c018e8 · 9c018e8
2 parents 20cb057 + e105db0
commit 9c018e8
Show file tree

Hide file tree

Showing 15 changed files with 189 additions and 18 deletions.
diff --git a/publications/2023-neurips/cluster/slurm/apptainer/batchjoblarge.sh b/publications/2023-neurips/cluster/slurm/apptainer/batchjoblarge.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+#SBATCH --partition=general --qos=long
+#SBATCH --time=168:00:00
+#SBATCH --mincpus=2
+#SBATCH --mem=36000
+#SBATCH --job-name=lcdbL
+#SBATCH --output=lcdbL%a.txt
+#SBATCH --error=lcdbL%a.txt
+#SBATCH --array=1-83
+ulimit -n 8000
+cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/
+rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing
+cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/
+srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_large.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}"
diff --git a/publications/2023-neurips/cluster/slurm/apptainer/batchjobmedium.sh b/publications/2023-neurips/cluster/slurm/apptainer/batchjobmedium.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+#SBATCH --partition=general --qos=long
+#SBATCH --time=168:00:00
+#SBATCH --mincpus=2
+#SBATCH --mem=12000
+#SBATCH --job-name=lcdbM
+#SBATCH --output=lcdbM%a.txt
+#SBATCH --error=lcdbM%a.txt
+#SBATCH --array=1-146
+ulimit -n 8000
+cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/
+rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing
+cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/
+srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_medium.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}"
diff --git a/publications/2023-neurips/cluster/slurm/apptainer/batchjobsmall.sh b/publications/2023-neurips/cluster/slurm/apptainer/batchjobsmall.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+#SBATCH --partition=general --qos=long
+#SBATCH --time=168:00:00
+#SBATCH --mincpus=2
+#SBATCH --mem=6000
+#SBATCH --job-name=lcdbS
+#SBATCH --output=lcdbS%a.txt
+#SBATCH --error=lcdbS%a.txt
+#SBATCH --array=1-115
+ulimit -n 8000
+cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips/
+rsync openml_cache /tmp/tjviering/ -r -v --ignore-existing
+cd /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/
+srun apptainer exec -c --bind /tudelft.net/staff-bulk/ewi/insy/PRLab/Staff/tjviering/lcdbpyexp/code/publications/2023-neurips:/mnt,/tmp:/tmp test6_re2.sif /bin/bash -c "mkdir -p ~/.config/ && mkdir -p ~/.config/openml/ && echo 'cachedir=/tmp/tjviering/openml_cache/' > ~/.config/openml/config && source activate /opt/conda/envs/lcdb && pip install py_experimenter==1.2 pynisher && mkdir -p /tmp/tjviering/ && mkdir -p /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && rm -rf /tmp/tjviering/${SLURM_ARRAY_TASK_ID}/lcdb && cd /tmp/tjviering/${SLURM_ARRAY_TASK_ID} && git clone https://github.com/fmohr/lcdb.git && source activate /opt/conda/envs/lcdb && cd lcdb/publications/2023-neurips && pip install . && cd /mnt && ~/.local/bin/lcdb run --config config/knn_small.cfg --executor-name B{$SLURM_ARRAY_TASK_ID}"
diff --git a/publications/2023-neurips/cluster/slurm/apptainer/ssh_killjobs_server.py b/publications/2023-neurips/cluster/slurm/apptainer/ssh_killjobs_server.py
@@ -0,0 +1,71 @@
+import json
+import pymysql
+import pandas as pd
+import time
+
+pw2 = 'database_password'
+
+def postprocess_table(table_name):
+
+	cnx = pymysql.connect(host='lcdb_experiments.ewi.tudelft.nl', user='lcdb', passwd=pw2, db='db_lcdb')
+	query = '''select * from %s where postprocess=1;''' % table_name
+	to_process = pd.read_sql_query(query, cnx)
+
+	print('found %d rows for processing...' % len(to_process))
+
+	query_list = []
+
+	for i in range(0, len(to_process)):
+		print('working on row %d' % i)
+
+		row = to_process.iloc[i]
+
+		query = '''select * from %s where workflow='%s' and openmlid=%d and hyperparameters='%s' and status='created';''' % (
+		table_name, row.workflow, row.openmlid, row.hyperparameters)
+
+		datas = pd.read_sql_query(query, cnx)
+		if len(datas) < 1:
+			print('this row doesnt have any jobs remaining... too bad!')
+		else:
+			trainsize_small = json.loads(row.train_sizes)[0]
+
+			trainsizes_todo = []
+			for train_size in datas['train_sizes'].unique():
+				train_size_ = json.loads(train_size)
+				if train_size_[0] > trainsize_small:
+					trainsizes_todo.append(train_size)
+
+			for trainsize in trainsizes_todo:
+				query_list.append(
+					'''update %s set status='skipped' where workflow='%s' and openmlid=%d and hyperparameters='%s' and status='created' and train_sizes='%s';''' % (
+					table_name, row.workflow, row.openmlid, row.hyperparameters, trainsize))
+
+		query_list.append('''update %s set postprocess=0 where id=%d''' % (table_name, row.ID))
+
+	print('I have to execute %d queries... Lets get to work!' % len(query_list))
+
+	affected_rows = []
+	if len(query_list) > 0:
+		cursor = cnx.cursor()
+		for query in query_list:
+			print('performing query: %s' % query)
+			tmp = (cursor.execute(query))
+			print('rows affected: %d' % tmp)
+			affected_rows.append(tmp)
+		cursor.close()
+		cnx.commit()
+	cnx.close()
+
+
+while True:
+	try:
+		print('trying small...')
+		postprocess_table('jobs_small')
+		print('trying medium...')
+		postprocess_table('jobs_medium')
+		print('trying large...')
+		postprocess_table('jobs_large')
+	except Exception as e:
+		print('failed with error %s' % str(e))
+	print('going to sleep for 5 min...')
+	time.sleep(60*5)
diff --git a/publications/2023-neurips/config/knn_large.cfg b/publications/2023-neurips/config/knn_large.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_large
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.KNNWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/knn_medium.cfg b/publications/2023-neurips/config/knn_medium.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_medium
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.KNNWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/knn_small.cfg b/publications/2023-neurips/config/knn_small.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_small
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.KNNWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/liblinear_large.cfg b/publications/2023-neurips/config/liblinear_large.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_large
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.LibLinearWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/liblinear_medium.cfg b/publications/2023-neurips/config/liblinear_medium.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_medium
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.LibLinearWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/liblinear_small.cfg b/publications/2023-neurips/config/liblinear_small.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_small
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.LibLinearWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/libsvm_large.cfg b/publications/2023-neurips/config/libsvm_large.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_large
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.LibSVMWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/libsvm_medium.cfg b/publications/2023-neurips/config/libsvm_medium.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_medium
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.LibSVMWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/config/libsvm_small.cfg b/publications/2023-neurips/config/libsvm_small.cfg
@@ -4,6 +4,8 @@ provider = mysql
 database = db_lcdb
 table = jobs_small
 
+n_jobs = 2
+
 # train_size and hyperparameters are omitted since they are computed automatically
 keyfields = workflow:text, openmlid:int, valid_prop: float, test_prop: float, seed_outer:int, seed_inner:int, train_sizes:text, hyperparameters:text, monotonic:boolean, maxruntime:int, measure_memory:boolean,
 workflow = lcdb.workflow.sklearn.LibSVMWorkflow
@@ -19,5 +21,5 @@ monotonic = 1
 maxruntime = 1800
 measure_memory = 0
 
-resultfields = result:LONGTEXT
+resultfields = result:LONGTEXT, postprocess:boolean
 resultfields.timestamps = false
diff --git a/publications/2023-neurips/lcdb/cli/_create.py b/publications/2023-neurips/lcdb/cli/_create.py
@@ -88,6 +88,5 @@ def main(
     pd.DataFrame(configs, columns=skopt_space.dimension_names).to_csv(
         output_file, index=False
     )
-
     if verbose:
         print(f"Experiments written to {output_file}")
diff --git a/publications/2023-neurips/lcdb/workflow/_util.py b/publications/2023-neurips/lcdb/workflow/_util.py
@@ -245,6 +245,7 @@ def get_all_experiments(
     seed: int,
     max_num_anchors_per_row: int,
     LHS: bool,
+    random_hps_per_dataset: bool,
 ) -> List[Dict]:
     """Create a sample of experimental configurations for a given workflow.
 
@@ -264,14 +265,54 @@ def get_all_experiments(
         max_num_anchors_per_row=max_num_anchors_per_row,
     )
 
-    # import the workflow class
-    workflow_path = config.get("PY_EXPERIMENTER", "workflow")
-    workflow_class = import_attr_from_module(workflow_path)
-
-    config_space = workflow_class.get_config_space()
-    default_config = get_default_config(config_space)
-
-    config_space.seed(seed)
+    df_experiments_grouped = df_experiments.groupby("openmlid")
+
+    experiments = []
+
+    for name, group in df_experiments_grouped:
+        print('working on dataset %d...' % name)
+        # import the workflow class
+        workflow_path = config.get("PY_EXPERIMENTER", "workflow")
+        workflow_class = import_attr_from_module(workflow_path)
+
+        config_space = workflow_class.get_config_space()
+        default_config = get_default_config(config_space)
+
+        seed_post_processed = seed
+        if random_hps_per_dataset:
+            seed_post_processed = seed_post_processed + int(name)
+        config_space.seed(seed_post_processed)
+
+        if LHS:
+            print('using LHS with seed %d...' % seed_post_processed)
+            lhs_generator = LHSGenerator(config_space, n=num_configs, seed=seed)
+            hp_samples = lhs_generator.generate()
+        else:
+            print('using random sampling with seed %d...' % seed_post_processed)
+            hp_samples = config_space.sample_configuration(num_configs)
+            if num_configs == 1:
+                hp_samples = [hp_samples]
+        hp_samples.insert(0, default_config)
+
+        # create all rows for the experiments
+        experiments = experiments + [
+            {
+                "workflow": workflow_path,
+                "openmlid": openmlid,
+                "valid_prop": v_p,
+                "test_prop": t_p,
+                "seed_outer": s_o,
+                "seed_inner": s_i,
+                "train_sizes": train_sizes,
+                "maxruntime": maxruntime,
+                "hyperparameters": dict(hp),
+                "monotonic": mon,
+                "measure_memory": measure_memory,
+            }
+            for (openmlid, v_p, t_p, s_o, s_i, train_sizes, mon, maxruntime, measure_memory), hp in it.product(
+                group.values, hp_samples
+            )
+        ]
 
     if LHS:
         print("using LHS...")