From 12b982ecc24e0843562c646c9d9862694dcd030f Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 26 May 2023 09:41:33 +0200 Subject: [PATCH 1/7] stash changes --- zntrack/project/zntrack_project.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/zntrack/project/zntrack_project.py b/zntrack/project/zntrack_project.py index 3269f13b..f4c0fa93 100644 --- a/zntrack/project/zntrack_project.py +++ b/zntrack/project/zntrack_project.py @@ -9,6 +9,7 @@ import shutil import subprocess import typing +import uuid import git import yaml @@ -228,8 +229,14 @@ def create_experiment(self, name: str = None, queue: bool = True) -> Experiment: exp = Experiment(name, project=self) - yield exp + stash_uuid = uuid.uuid4() + + repo = git.Repo() + dirty = repo.is_dirty() + if dirty: + repo.git.stash("push", "--include-untracked", "-m", str(stash_uuid)) + yield exp for node_uuid in self.graph.get_sorted_nodes(): node: Node = self.graph.nodes[node_uuid]["value"] node.save(results=False) @@ -244,6 +251,12 @@ def create_experiment(self, name: str = None, queue: bool = True) -> Experiment: # "Reproducing", "Experiment", "'exp-name'" exp.name = proc.stdout.decode("utf-8").split()[2].replace("'", "") + repo.git.reset("--hard") + if dirty: + repo.git.stash("apply", f"stash^{{/{stash_uuid}}}") + if not queue: + exp.apply(quiet=True) + def run_exp(self, jobs: int = 1) -> None: """Run all queued experiments.""" run_dvc_cmd(["exp", "run", "--run-all", "--jobs", str(jobs)]) @@ -264,6 +277,10 @@ class Experiment: nodes: dict = dataclasses.field(default_factory=dict, init=False, repr=False) + def apply(self, quiet=False) -> None: + """Apply the experiment.""" + run_dvc_cmd(["exp", "apply", self.name] + ["--quiet"] if quiet else []) + def load(self) -> None: """Load the nodes from this experiment.""" self.nodes = { From 28399195b0858ed36ba11d3b430de7e6606ee772 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 26 May 2023 11:08:16 +0200 Subject: [PATCH 2/7] allow changes in the graph --- examples/docs/parameter_optimization.ipynb | 1030 ++++++++++++++++++-- zntrack/core/node.py | 3 + zntrack/fields/zn/__init__.py | 2 +- zntrack/project/zntrack_project.py | 36 +- 4 files changed, 951 insertions(+), 120 deletions(-) diff --git a/examples/docs/parameter_optimization.ipynb b/examples/docs/parameter_optimization.ipynb index 72f69523..b2818ff2 100644 --- a/examples/docs/parameter_optimization.ipynb +++ b/examples/docs/parameter_optimization.ipynb @@ -21,7 +21,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Initialized empty Git repository in /tmp/tmpkl238sx6/.git/\n", + "Initialized empty Git repository in /tmp/tmp_1cbhh6v/.git/\n", "Initialized DVC repository.\n", "\n", "You can now commit the changes to git.\n", @@ -91,6 +91,7 @@ "import sklearn.datasets\n", "import sklearn.ensemble\n", "import sklearn.model_selection\n", + "import sklearn.metrics\n", "\n", "\n", "class HousingDataSet(zntrack.Node):\n", @@ -155,6 +156,20 @@ " self.model.fit(self.train_features, self.train_labels)\n", "\n", "\n", + "class SVR(zntrack.Node):\n", + " \"\"\"Train a SVR model.\"\"\"\n", + "\n", + " train_features = zntrack.zn.deps()\n", + " train_labels = zntrack.zn.deps()\n", + " C = zntrack.zn.params()\n", + "\n", + " model = zntrack.zn.outs()\n", + "\n", + " def run(self) -> None:\n", + " self.model = sklearn.svm.LinearSVR(C=self.C)\n", + " self.model.fit(self.train_features, self.train_labels)\n", + "\n", + "\n", "class Evaluate(zntrack.Node):\n", " \"\"\"Evaluate the model on a test set.\"\"\"\n", "\n", @@ -165,7 +180,8 @@ " score = zntrack.zn.metrics()\n", "\n", " def run(self) -> None:\n", - " self.score = self.model.score(self.test_features, self.test_labels)" + " prediction = self.model.predict(self.test_features)\n", + " self.score = sklearn.metrics.mean_squared_error(self.test_labels, prediction)" ] }, { @@ -219,7 +235,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Running DVC command: 'stage add --name RandomForest --force ...'\n", + "Running DVC command: 'stage add --name model --force ...'\n", "\u0000" ] }, @@ -258,9 +274,16 @@ " data = HousingDataSet()\n", " split = TrainTestSplit(labels=data.labels, features=data.features)\n", " model = RandomForest(\n", - " train_features=split.train_features, train_labels=split.train_labels, max_depth=2\n", + " train_features=split.train_features,\n", + " train_labels=split.train_labels,\n", + " max_depth=2,\n", + " name=\"model\",\n", + " )\n", + " evaluate = Evaluate(\n", + " model=model.model,\n", + " test_features=split.test_features,\n", + " test_labels=split.test_labels,\n", " )\n", - " evaluate = Evaluate(model.model, split.test_features, split.test_labels)\n", "\n", "project.run()" ] @@ -284,12 +307,32 @@ "\u0000" ] }, + { + "data": { + "text/plain": [ + "NodeStatus(loaded=True, results=, remote=None, rev=None)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "RandomForest.from_rev(name=\"model\").state" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[main (root-commit) e3c690e] initial commit\n", - " 20 files changed, 1013 insertions(+)\n", + "[main (root-commit) 2f436cc] initial commit\n", + " 20 files changed, 1271 insertions(+)\n", " create mode 100644 .dvc/.gitignore\n", " create mode 100644 .dvc/config\n", " create mode 100644 .dvcignore\n", @@ -297,8 +340,8 @@ " create mode 100644 dvc.lock\n", " create mode 100644 dvc.yaml\n", " create mode 100644 nodes/Evaluate/score.json\n", - " create mode 100644 nodes/RandomForest/.gitignore\n", " create mode 100644 nodes/TrainTestSplit/.gitignore\n", + " create mode 100644 nodes/model/.gitignore\n", " create mode 100644 parameter_optimization.ipynb\n", " create mode 100644 params.yaml\n", " create mode 100644 src/Evaluate.py\n", @@ -332,119 +375,892 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[I 2023-05-26 08:45:32,961] A new study created in memory with name: no-name-f3881c32-2572-4052-ab4c-6fa17b5a6de6\n", - "[I 2023-05-26 08:45:50,192] Trial 0 finished with value: 0.8063654034274534 and parameters: {'max_depth': 23}. Best is trial 0 with value: 0.8063654034274534.\n", - "[I 2023-05-26 08:46:06,579] Trial 1 finished with value: 0.8056940421942 and parameters: {'max_depth': 18}. Best is trial 0 with value: 0.8063654034274534.\n", - "[I 2023-05-26 08:46:24,109] Trial 2 finished with value: 0.8057630780610348 and parameters: {'max_depth': 26}. Best is trial 0 with value: 0.8063654034274534.\n", - "[I 2023-05-26 08:46:40,780] Trial 3 finished with value: 0.8057761677289834 and parameters: {'max_depth': 19}. Best is trial 0 with value: 0.8063654034274534.\n", - "[I 2023-05-26 08:46:57,965] Trial 4 finished with value: 0.8062920194093484 and parameters: {'max_depth': 24}. Best is trial 0 with value: 0.8063654034274534.\n", - "[I 2023-05-26 08:47:04,346] Trial 5 finished with value: 0.8062920194093484 and parameters: {'max_depth': 24}. Best is trial 0 with value: 0.8063654034274534.\n", - "[I 2023-05-26 08:47:24,027] Trial 6 finished with value: 0.8065939316861175 and parameters: {'max_depth': 25}. Best is trial 6 with value: 0.8065939316861175.\n", - "[I 2023-05-26 08:47:39,145] Trial 7 finished with value: 0.6885681580040235 and parameters: {'max_depth': 6}. Best is trial 6 with value: 0.8065939316861175.\n", - "[I 2023-05-26 08:47:54,066] Trial 8 finished with value: 0.7881269558646984 and parameters: {'max_depth': 11}. Best is trial 6 with value: 0.8065939316861175.\n", - "[I 2023-05-26 08:48:10,127] Trial 9 finished with value: 0.8051355813484385 and parameters: {'max_depth': 17}. Best is trial 6 with value: 0.8065939316861175.\n" + "[I 2023-05-26 11:04:06,134] A new study created in memory with name: no-name-bc908ca1-9f30-41c2-aee5-0cbc01977124\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", + "\u0000" ] - } - ], - "source": [ - "def objective(trial):\n", - " with project.create_experiment(queue=False, name=f\"exp-{trial.number}\") as exp:\n", - " model.max_depth = trial.suggest_int(\"max_depth\", 2, 32)\n", - "\n", - " return exp[evaluate].score\n", - "\n", - "\n", - "study = optuna.create_study(direction=\"maximize\")\n", - "study.optimize(objective, n_trials=10)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Evaluate\n", - "\n", - "We can now investigate the best parameters via `study.best_params`.\n", - "Additionally, because we used DVC experiments we can directly access the experiment with the best parameters, by the name we used." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "{'max_depth': 25}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "study.best_params" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, { - "data": { - "text/plain": [ - "25" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "best_model = model.from_rev(rev=f\"exp-{study.best_trial.number}\")\n", - "best_model.max_depth" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Best score: 0.807 compared to initial score: 0.445\n" + "\u0000" ] - } - ], - "source": [ - "# we load split data into memory to compute the score.\n", - "split.load()\n", - "\n", - "best_score = best_model.model.score(split.test_features, split.test_labels)\n", - "# best_score == evaluate.from_rev(rev=f\"exp-{study.best_trial.number}\").score\n", - "initial_score = evaluate.from_rev(rev=\"HEAD\").score\n", - "print(f\"Best score: {best_score:.3f} compared to initial score: {initial_score:.3f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-0'\n", + "\u0000[I 2023-05-26 11:04:19,287] Trial 0 finished with value: 0.4213089843807455 and parameters: {'classifier': 'RandomForest', 'max_depth': 6}. Best is trial 0 with value: 0.4213089843807455.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-1'\n", + "\u0000[I 2023-05-26 11:04:37,128] Trial 1 finished with value: 0.26252015770191933 and parameters: {'classifier': 'RandomForest', 'max_depth': 32}. Best is trial 0 with value: 0.4213089843807455.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-2'\n", + "\u0000[I 2023-05-26 11:04:55,013] Trial 2 finished with value: 0.26234815407181117 and parameters: {'classifier': 'RandomForest', 'max_depth': 30}. Best is trial 0 with value: 0.4213089843807455.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-3'\n", + "\u0000[I 2023-05-26 11:05:02,292] Trial 3 finished with value: 0.26234815407181117 and parameters: {'classifier': 'RandomForest', 'max_depth': 30}. Best is trial 0 with value: 0.4213089843807455.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-4'\n", + "\u0000[I 2023-05-26 11:05:12,452] Trial 4 finished with value: 1.444035767903653 and parameters: {'classifier': 'SVR', 'svr_c': 1.6099878945741007e-07}. Best is trial 4 with value: 1.444035767903653.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-5'\n", + "\u0000[I 2023-05-26 11:05:23,430] Trial 5 finished with value: 0.849576283199183 and parameters: {'classifier': 'SVR', 'svr_c': 2.3232051445430775e-05}. Best is trial 4 with value: 1.444035767903653.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-6'\n", + "\u0000[I 2023-05-26 11:05:33,832] Trial 6 finished with value: 2.966193426725978 and parameters: {'classifier': 'SVR', 'svr_c': 6.992476753852309e-10}. Best is trial 6 with value: 2.966193426725978.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-7'\n", + "\u0000[I 2023-05-26 11:05:44,651] Trial 7 finished with value: 4.757328981717488 and parameters: {'classifier': 'SVR', 'svr_c': 30391.935720981484}. Best is trial 7 with value: 4.757328981717488.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-8'\n", + "\u0000[I 2023-05-26 11:06:01,212] Trial 8 finished with value: 0.2663470933213184 and parameters: {'classifier': 'RandomForest', 'max_depth': 15}. Best is trial 7 with value: 4.757328981717488.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name model --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'stage add --name Evaluate --force ...'\n", + "\u0000" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Running DVC command: 'exp apply exp-9'\n", + "\u0000[I 2023-05-26 11:06:11,967] Trial 9 finished with value: 2.7417979663371472 and parameters: {'classifier': 'SVR', 'svr_c': 0.06460090438909487}. Best is trial 7 with value: 4.757328981717488.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u0000" + ] + } + ], + "source": [ + "def objective(trial):\n", + " with project.create_experiment(queue=False, name=f\"exp-{trial.number}\") as exp:\n", + " regressor_name = trial.suggest_categorical(\"classifier\", [\"SVR\", \"RandomForest\"])\n", + "\n", + " # we need to replace the existing model on the graph with a new model.\n", + "\n", + " project.remove(\"model\")\n", + "\n", + " if regressor_name == \"SVR\":\n", + " svr_c = trial.suggest_float(\"svr_c\", 1e-10, 1e10, log=True)\n", + " model = SVR(\n", + " train_features=split.train_features,\n", + " train_labels=split.train_labels,\n", + " C=svr_c,\n", + " name=\"model\",\n", + " )\n", + " else:\n", + " max_depth = trial.suggest_int(\"max_depth\", 2, 32)\n", + " model = RandomForest(\n", + " train_features=split.train_features,\n", + " train_labels=split.train_labels,\n", + " max_depth=max_depth,\n", + " name=\"model\",\n", + " )\n", + "\n", + " # need to let the evaluate node know which model to evaluate\n", + " evaluate.model = model.model\n", + "\n", + " return exp[evaluate].score\n", + "\n", + "\n", + "study = optuna.create_study(direction=\"maximize\")\n", + "study.optimize(objective, n_trials=10)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate\n", + "\n", + "We can now investigate the best parameters via `study.best_params`.\n", + "Additionally, because we used DVC experiments we can directly access the experiment with the best parameters, by the name we used." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'classifier': 'SVR', 'svr_c': 30391.935720981484}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "study.best_params" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<__main__.SVR object at 0x14ff5c5b36d0>" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "exp = project.get_experiment(f\"exp-{study.best_trial.number}\")\n", + "best_model = exp[\"model\"]\n", + "best_model" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/tmp/tmp_1cbhh6v\n" + ] + } + ], + "source": [ + "!pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Best score: 4.757 compared to initial score: 0.750\n" + ] + } + ], + "source": [ + "# we load split data into memory to compute the score.\n", + "split.load()\n", + "\n", + "best_score = evaluate.from_rev(rev=f\"exp-{study.best_trial.number}\").score\n", + "initial_score = evaluate.from_rev(rev=\"HEAD\").score\n", + "print(f\"Best score: {best_score:.3f} compared to initial score: {initial_score:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ "temp_dir.cleanup()" ] diff --git a/zntrack/core/node.py b/zntrack/core/node.py index 8a503fc2..306ae759 100644 --- a/zntrack/core/node.py +++ b/zntrack/core/node.py @@ -169,6 +169,9 @@ def nwd(self) -> pathlib.Path: def save(self, parameter: bool = True, results: bool = True) -> None: """Save the node's output to disk.""" # TODO have an option to save and run dvc commit afterwards. + + # TODO: check if there is a difference in saving + # a loaded node vs a new node and why from zntrack.fields import Field, FieldGroup # Jupyter Notebook diff --git a/zntrack/fields/zn/__init__.py b/zntrack/fields/zn/__init__.py index 29bf9126..6714bf52 100644 --- a/zntrack/fields/zn/__init__.py +++ b/zntrack/fields/zn/__init__.py @@ -145,7 +145,7 @@ def get_data(self, instance: "Node") -> any: """Get the value of the field from the file.""" file = self.get_files(instance)[0] params_dict = yaml.safe_load(instance.state.fs.read_text(file)) - value = params_dict[instance.name].get(self.name, None) + value = params_dict[instance.name][self.name] return json.loads(json.dumps(value), cls=znjson.ZnDecoder) def get_stage_add_argument(self, instance: "Node") -> typing.List[tuple]: diff --git a/zntrack/project/zntrack_project.py b/zntrack/project/zntrack_project.py index f4c0fa93..98b593e2 100644 --- a/zntrack/project/zntrack_project.py +++ b/zntrack/project/zntrack_project.py @@ -9,7 +9,6 @@ import shutil import subprocess import typing -import uuid import git import yaml @@ -210,6 +209,14 @@ def get_nodes(self) -> dict[str, znflow.Node]: nodes[node.name] = node return nodes + def remove(self, name): + """Remove all nodes with the given name from the project.""" + # TODO there should never be multiple nodes with the same name + for node_uuid in self.graph.get_sorted_nodes(): + node = self.graph.nodes[node_uuid]["value"] + if node.name == name: + self.graph.remove_node(node_uuid) + @property def nodes(self) -> dict[str, znflow.Node]: """Get the nodes in the project.""" @@ -229,17 +236,17 @@ def create_experiment(self, name: str = None, queue: bool = True) -> Experiment: exp = Experiment(name, project=self) - stash_uuid = uuid.uuid4() - repo = git.Repo() dirty = repo.is_dirty() if dirty: - repo.git.stash("push", "--include-untracked", "-m", str(stash_uuid)) + repo.git.stash("save", "--include-untracked") - yield exp - for node_uuid in self.graph.get_sorted_nodes(): - node: Node = self.graph.nodes[node_uuid]["value"] - node.save(results=False) + force = self.force + self.force = True + with self: + yield exp + self.run(repro=False) # save nodes and update dvc.yaml + self.force = force cmd = ["dvc", "exp", "run"] if queue: @@ -252,10 +259,15 @@ def create_experiment(self, name: str = None, queue: bool = True) -> Experiment: exp.name = proc.stdout.decode("utf-8").split()[2].replace("'", "") repo.git.reset("--hard") + repo.git.clean("-fd") if dirty: - repo.git.stash("apply", f"stash^{{/{stash_uuid}}}") + repo.git.stash("pop") if not queue: - exp.apply(quiet=True) + exp.apply() + + def get_experiment(self, name: str) -> Experiment: + """Get an experiment.""" + return Experiment(name, project=self) def run_exp(self, jobs: int = 1) -> None: """Run all queued experiments.""" @@ -277,9 +289,9 @@ class Experiment: nodes: dict = dataclasses.field(default_factory=dict, init=False, repr=False) - def apply(self, quiet=False) -> None: + def apply(self) -> None: """Apply the experiment.""" - run_dvc_cmd(["exp", "apply", self.name] + ["--quiet"] if quiet else []) + run_dvc_cmd(["exp", "apply", self.name]) def load(self) -> None: """Load the nodes from this experiment.""" From 29972597b2748f70cae69482ce22bd1fc215fa21 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 26 May 2023 14:30:55 +0200 Subject: [PATCH 3/7] wait for #628 --- examples/docs/parameter_optimization.ipynb | 82 +++++++++++++--------- zntrack/project/zntrack_project.py | 15 +++- 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/examples/docs/parameter_optimization.ipynb b/examples/docs/parameter_optimization.ipynb index b2818ff2..048d9194 100644 --- a/examples/docs/parameter_optimization.ipynb +++ b/examples/docs/parameter_optimization.ipynb @@ -21,7 +21,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Initialized empty Git repository in /tmp/tmp_1cbhh6v/.git/\n", + "Initialized empty Git repository in /tmp/tmpexwfpu4k/.git/\n", "Initialized DVC repository.\n", "\n", "You can now commit the changes to git.\n", @@ -68,7 +68,8 @@ "- Train a RandomForest model on the train data\n", "- Evaluate the model on the test data\n", "\n", - "We want to optimize the `max_depth` of the Model and use the `Evaluate` Node to compute a score that Optuna optimizes.\n", + "We want to optimize using two differen Models: RandomForest and LinearSVR with their respective hyperparameters.\n", + "We want to optimize them and use the `Evaluate` Node to compute a RMSE that Optuna optimizes.\n", "We will use DVC [Experiments](https://dvc.org/doc/start/experiments) to track each run.\n", "In combination with Optuna, this allows us not only to optimize the parameters but also easily store and access the trained models afterwards.\n" ] @@ -156,7 +157,7 @@ " self.model.fit(self.train_features, self.train_labels)\n", "\n", "\n", - "class SVR(zntrack.Node):\n", + "class LinearSVR(zntrack.Node):\n", " \"\"\"Train a SVR model.\"\"\"\n", "\n", " train_features = zntrack.zn.deps()\n", @@ -331,8 +332,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "[main (root-commit) 2f436cc] initial commit\n", - " 20 files changed, 1271 insertions(+)\n", + "[main (root-commit) 9a1b65d] initial commit\n", + " 20 files changed, 1994 insertions(+)\n", " create mode 100644 .dvc/.gitignore\n", " create mode 100644 .dvc/config\n", " create mode 100644 .dvcignore\n", @@ -382,7 +383,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[I 2023-05-26 11:04:06,134] A new study created in memory with name: no-name-bc908ca1-9f30-41c2-aee5-0cbc01977124\n", + "[I 2023-05-26 14:19:10,122] A new study created in memory with name: no-name-47e7cf69-b7c1-425e-acf9-396a86518b6a\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", "\u0000" ] @@ -444,7 +445,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-0'\n", - "\u0000[I 2023-05-26 11:04:19,287] Trial 0 finished with value: 0.4213089843807455 and parameters: {'classifier': 'RandomForest', 'max_depth': 6}. Best is trial 0 with value: 0.4213089843807455.\n", + "\u0000[I 2023-05-26 14:19:21,149] Trial 0 finished with value: 11.100880417561175 and parameters: {'classifier': 'SVR', 'svr_c': 12078.934744589767}. Best is trial 0 with value: 11.100880417561175.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -519,7 +520,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-1'\n", - "\u0000[I 2023-05-26 11:04:37,128] Trial 1 finished with value: 0.26252015770191933 and parameters: {'classifier': 'RandomForest', 'max_depth': 32}. Best is trial 0 with value: 0.4213089843807455.\n", + "\u0000[I 2023-05-26 14:19:31,962] Trial 1 finished with value: 526.9862708662988 and parameters: {'classifier': 'SVR', 'svr_c': 5051.58980837076}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -594,7 +595,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-2'\n", - "\u0000[I 2023-05-26 11:04:55,013] Trial 2 finished with value: 0.26234815407181117 and parameters: {'classifier': 'RandomForest', 'max_depth': 30}. Best is trial 0 with value: 0.4213089843807455.\n", + "\u0000[I 2023-05-26 14:19:47,429] Trial 2 finished with value: 0.2779238523214554 and parameters: {'classifier': 'RandomForest', 'max_depth': 12}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -669,7 +670,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-3'\n", - "\u0000[I 2023-05-26 11:05:02,292] Trial 3 finished with value: 0.26234815407181117 and parameters: {'classifier': 'RandomForest', 'max_depth': 30}. Best is trial 0 with value: 0.4213089843807455.\n", + "\u0000[I 2023-05-26 14:20:05,182] Trial 3 finished with value: 0.2627596918267919 and parameters: {'classifier': 'RandomForest', 'max_depth': 27}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -744,7 +745,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-4'\n", - "\u0000[I 2023-05-26 11:05:12,452] Trial 4 finished with value: 1.444035767903653 and parameters: {'classifier': 'SVR', 'svr_c': 1.6099878945741007e-07}. Best is trial 4 with value: 1.444035767903653.\n", + "\u0000[I 2023-05-26 14:20:21,223] Trial 4 finished with value: 0.2663470933213184 and parameters: {'classifier': 'RandomForest', 'max_depth': 15}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -819,7 +820,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-5'\n", - "\u0000[I 2023-05-26 11:05:23,430] Trial 5 finished with value: 0.849576283199183 and parameters: {'classifier': 'SVR', 'svr_c': 2.3232051445430775e-05}. Best is trial 4 with value: 1.444035767903653.\n", + "\u0000[I 2023-05-26 14:20:31,632] Trial 5 finished with value: 0.9854179960940935 and parameters: {'classifier': 'SVR', 'svr_c': 1.0818236715785393e-05}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -894,7 +895,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-6'\n", - "\u0000[I 2023-05-26 11:05:33,832] Trial 6 finished with value: 2.966193426725978 and parameters: {'classifier': 'SVR', 'svr_c': 6.992476753852309e-10}. Best is trial 6 with value: 2.966193426725978.\n", + "\u0000[I 2023-05-26 14:20:44,455] Trial 6 finished with value: 0.47413665705821306 and parameters: {'classifier': 'RandomForest', 'max_depth': 5}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -969,7 +970,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-7'\n", - "\u0000[I 2023-05-26 11:05:44,651] Trial 7 finished with value: 4.757328981717488 and parameters: {'classifier': 'SVR', 'svr_c': 30391.935720981484}. Best is trial 7 with value: 4.757328981717488.\n", + "\u0000[I 2023-05-26 14:20:54,709] Trial 7 finished with value: 3.12196099352187 and parameters: {'classifier': 'SVR', 'svr_c': 1.0824013441742968e-10}. Best is trial 1 with value: 526.9862708662988.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -1044,7 +1045,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-8'\n", - "\u0000[I 2023-05-26 11:06:01,212] Trial 8 finished with value: 0.2663470933213184 and parameters: {'classifier': 'RandomForest', 'max_depth': 15}. Best is trial 7 with value: 4.757328981717488.\n" + "\u0000[I 2023-05-26 14:21:05,653] Trial 8 finished with value: 5.3304504202911795 and parameters: {'classifier': 'SVR', 'svr_c': 35.29665207527377}. Best is trial 1 with value: 526.9862708662988.\n" ] }, { @@ -1119,7 +1120,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-9'\n", - "\u0000[I 2023-05-26 11:06:11,967] Trial 9 finished with value: 2.7417979663371472 and parameters: {'classifier': 'SVR', 'svr_c': 0.06460090438909487}. Best is trial 7 with value: 4.757328981717488.\n" + "\u0000[I 2023-05-26 14:21:13,172] Trial 9 finished with value: 0.2627596918267919 and parameters: {'classifier': 'RandomForest', 'max_depth': 27}. Best is trial 1 with value: 526.9862708662988.\n" ] }, { @@ -1141,7 +1142,7 @@ "\n", " if regressor_name == \"SVR\":\n", " svr_c = trial.suggest_float(\"svr_c\", 1e-10, 1e10, log=True)\n", - " model = SVR(\n", + " model = LinearSVR(\n", " train_features=split.train_features,\n", " train_labels=split.train_labels,\n", " C=svr_c,\n", @@ -1179,16 +1180,16 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'classifier': 'SVR', 'svr_c': 30391.935720981484}" + "{'classifier': 'SVR', 'svr_c': 5051.58980837076}" ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1199,46 +1200,61 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "<__main__.SVR object at 0x14ff5c5b36d0>" + "dict_keys([None, 'exp-9', 'exp-8', 'exp-7', 'exp-6', 'exp-5', 'exp-4', 'exp-3', 'exp-2', 'exp-1', 'exp-0'])" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "exp = project.get_experiment(f\"exp-{study.best_trial.number}\")\n", - "best_model = exp[\"model\"]\n", - "best_model" + "project.experiments.keys()" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "/tmp/tmp_1cbhh6v\n" + "ename": "NodeNotAvailableError", + "evalue": "Node model is not available.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/node.py:219\u001b[0m, in \u001b[0;36mNode.load\u001b[0;34m(self, lazy, results)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[39mcontinue\u001b[39;00m\n\u001b[0;32m--> 219\u001b[0m attr\u001b[39m.\u001b[39;49mload(\u001b[39mself\u001b[39;49m)\n\u001b[1;32m 220\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/fields/field.py:87\u001b[0m, in \u001b[0;36mField.load\u001b[0;34m(self, instance, lazy)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 87\u001b[0m instance\u001b[39m.\u001b[39m\u001b[39m__dict__\u001b[39m[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mname] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_data(instance)\n\u001b[1;32m 88\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 89\u001b[0m \u001b[39m# if something was not loaded, we set the loaded state to False\u001b[39;00m\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/fields/zn/__init__.py:148\u001b[0m, in \u001b[0;36mParams.get_data\u001b[0;34m(self, instance)\u001b[0m\n\u001b[1;32m 147\u001b[0m params_dict \u001b[39m=\u001b[39m yaml\u001b[39m.\u001b[39msafe_load(instance\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mfs\u001b[39m.\u001b[39mread_text(file))\n\u001b[0;32m--> 148\u001b[0m value \u001b[39m=\u001b[39m params_dict[instance\u001b[39m.\u001b[39;49mname][\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname]\n\u001b[1;32m 149\u001b[0m \u001b[39mreturn\u001b[39;00m json\u001b[39m.\u001b[39mloads(json\u001b[39m.\u001b[39mdumps(value), \u001b[39mcls\u001b[39m\u001b[39m=\u001b[39mznjson\u001b[39m.\u001b[39mZnDecoder)\n", + "\u001b[0;31mKeyError\u001b[0m: 'max_depth'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mNodeNotAvailableError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m exp \u001b[39m=\u001b[39m project\u001b[39m.\u001b[39mexperiments[\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexp-\u001b[39m\u001b[39m{\u001b[39;00mstudy\u001b[39m.\u001b[39mbest_trial\u001b[39m.\u001b[39mnumber\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m]\n\u001b[0;32m----> 2\u001b[0m best_model \u001b[39m=\u001b[39m exp[\u001b[39m\"\u001b[39;49m\u001b[39mmodel\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n\u001b[1;32m 3\u001b[0m best_model\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/project/zntrack_project.py:309\u001b[0m, in \u001b[0;36mExperiment.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Get the Node from the experiment.\"\"\"\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnodes) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m--> 309\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mload()\n\u001b[1;32m 310\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, Node):\n\u001b[1;32m 311\u001b[0m key \u001b[39m=\u001b[39m key\u001b[39m.\u001b[39mname\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/project/zntrack_project.py:301\u001b[0m, in \u001b[0;36mExperiment.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mload\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 300\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Load the nodes from this experiment.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 301\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnodes \u001b[39m=\u001b[39m {\n\u001b[1;32m 302\u001b[0m name: node\u001b[39m.\u001b[39mfrom_rev(name\u001b[39m=\u001b[39mname, rev\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mname)\n\u001b[1;32m 303\u001b[0m \u001b[39mfor\u001b[39;00m name, node \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproject\u001b[39m.\u001b[39mget_nodes()\u001b[39m.\u001b[39mitems()\n\u001b[1;32m 304\u001b[0m }\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/project/zntrack_project.py:302\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mload\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 300\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Load the nodes from this experiment.\"\"\"\u001b[39;00m\n\u001b[1;32m 301\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnodes \u001b[39m=\u001b[39m {\n\u001b[0;32m--> 302\u001b[0m name: node\u001b[39m.\u001b[39;49mfrom_rev(name\u001b[39m=\u001b[39;49mname, rev\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname)\n\u001b[1;32m 303\u001b[0m \u001b[39mfor\u001b[39;00m name, node \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproject\u001b[39m.\u001b[39mget_nodes()\u001b[39m.\u001b[39mitems()\n\u001b[1;32m 304\u001b[0m }\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/node.py:250\u001b[0m, in \u001b[0;36mNode.from_rev\u001b[0;34m(cls, name, remote, rev, lazy, results)\u001b[0m\n\u001b[1;32m 248\u001b[0m kwargs \u001b[39m=\u001b[39m {} \u001b[39mif\u001b[39;00m lazy \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39melse\u001b[39;00m {\u001b[39m\"\u001b[39m\u001b[39mlazy\u001b[39m\u001b[39m\"\u001b[39m: lazy}\n\u001b[1;32m 249\u001b[0m \u001b[39mwith\u001b[39;00m config\u001b[39m.\u001b[39mupdated_config(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m--> 250\u001b[0m node\u001b[39m.\u001b[39;49mload(results\u001b[39m=\u001b[39;49mresults)\n\u001b[1;32m 252\u001b[0m \u001b[39mreturn\u001b[39;00m node\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/node.py:221\u001b[0m, in \u001b[0;36mNode.load\u001b[0;34m(self, lazy, results)\u001b[0m\n\u001b[1;32m 219\u001b[0m attr\u001b[39m.\u001b[39mload(\u001b[39mself\u001b[39m)\n\u001b[1;32m 220\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m--> 221\u001b[0m \u001b[39mraise\u001b[39;00m exceptions\u001b[39m.\u001b[39mNodeNotAvailableError(\u001b[39mself\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39merr\u001b[39;00m\n\u001b[1;32m 223\u001b[0m \u001b[39m# TODO: documentation about _post_init and _post_load_ and when they are called\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_post_load_()\n", + "\u001b[0;31mNodeNotAvailableError\u001b[0m: Node model is not available." ] } ], "source": [ - "!pwd" + "exp = project.experiments[f\"exp-{study.best_trial.number}\"]\n", + "best_model = exp[\"model\"]\n", + "best_model" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { diff --git a/zntrack/project/zntrack_project.py b/zntrack/project/zntrack_project.py index 98b593e2..d823033b 100644 --- a/zntrack/project/zntrack_project.py +++ b/zntrack/project/zntrack_project.py @@ -10,6 +10,7 @@ import subprocess import typing +import dvc.api import git import yaml import znflow @@ -265,9 +266,14 @@ def create_experiment(self, name: str = None, queue: bool = True) -> Experiment: if not queue: exp.apply() - def get_experiment(self, name: str) -> Experiment: - """Get an experiment.""" - return Experiment(name, project=self) + @property + def experiments(self, *args, **kwargs) -> dict[str, Experiment]: + """List all experiments.""" + experiments = dvc.api.exp_show(*args, **kwargs) + return { + experiment["Experiment"]: Experiment(experiment["rev"], project=self) + for experiment in experiments + } def run_exp(self, jobs: int = 1) -> None: """Run all queued experiments.""" @@ -286,6 +292,9 @@ class Experiment: name: str project: Project + # TODO the project can not be used. The graph could be different. + # Project must be loaded from rev. + # TODO name / rev / remote ... nodes: dict = dataclasses.field(default_factory=dict, init=False, repr=False) From 2f441e1e81fb39785163d0dffb80781e0c8908cb Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Mon, 12 Jun 2023 13:29:50 +0200 Subject: [PATCH 4/7] update notebook --- examples/docs/parameter_optimization.ipynb | 151 ++++++++++++++------- 1 file changed, 101 insertions(+), 50 deletions(-) diff --git a/examples/docs/parameter_optimization.ipynb b/examples/docs/parameter_optimization.ipynb index 048d9194..81fd92bc 100644 --- a/examples/docs/parameter_optimization.ipynb +++ b/examples/docs/parameter_optimization.ipynb @@ -14,14 +14,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Initialized empty Git repository in /tmp/tmpexwfpu4k/.git/\n", + "Initialized empty Git repository in /tmp/tmphi4dsmja/.git/\n", "Initialized DVC repository.\n", "\n", "You can now commit the changes to git.\n", @@ -79,12 +79,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[![](https://mermaid.ink/img/pako:eNp1jz0PgjAQhv8KuVkG1InBCY2zsFmHCz2wSXslpdUYwn_3YpSwuF2e572vCVqvCUrorH-2dwwxayrFKrLg4qrg-ECbMJKC2w9vBZ99Gg33FUasKa7kTuQFWXt38oHGtdqLagIaboTXgzVruc3zwye0DPqCYmn_B3aKYQOOgkOj5ZVJcZYpiHdycnYppaYOk5V1imeJYoq-fnELZQyJNpAGLR9WBvuADsoO7UjzG6bTY5I?type=png)](https://mermaid.live/edit#pako:eNp1jz0PgjAQhv8KuVkG1InBCY2zsFmHCz2wSXslpdUYwn_3YpSwuF2e572vCVqvCUrorH-2dwwxayrFKrLg4qrg-ECbMJKC2w9vBZ99Gg33FUasKa7kTuQFWXt38oHGtdqLagIaboTXgzVruc3zwye0DPqCYmn_B3aKYQOOgkOj5ZVJcZYpiHdycnYppaYOk5V1imeJYoq-fnELZQyJNpAGLR9WBvuADsoO7UjzG6bTY5I)" + "[![](https://mermaid.ink/img/pako:eNp1j7sOgkAQRX-FTC0FYEVhhYmNFXSuxQQG2GQfZJnVGMK_OzFKaKxmcu6981ig9R1BCb3xz3bEwElTKafYCc5uCs4PNBGZFNx_OBd88XHWbqiQsSbeiYWIV6lmx47CmoDaNTRzPRm9D-RpevqYtglfkG3xf6BQDg5gKVjUnfywKJckCngkK_eW0nbUYzSyTrlVrBjZ1y_XQskh0gHi1MlrlcYhoIWyRzPT-gaiDmCv?type=png)](https://mermaid.live/edit#pako:eNp1j7sOgkAQRX-FTC0FYEVhhYmNFXSuxQQG2GQfZJnVGMK_OzFKaKxmcu6981ig9R1BCb3xz3bEwElTKafYCc5uCs4PNBGZFNx_OBd88XHWbqiQsSbeiYWIV6lmx47CmoDaNTRzPRm9D-RpevqYtglfkG3xf6BQDg5gKVjUnfywKJckCngkK_eW0nbUYzSyTrlVrBjZ1y_XQskh0gHi1MlrlcYhoIWyRzPT-gaiDmCv)" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -197,7 +197,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -291,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -314,7 +314,7 @@ "NodeStatus(loaded=True, results=, remote=None, rev=None)" ] }, - "execution_count": 4, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -325,15 +325,15 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[main (root-commit) 9a1b65d] initial commit\n", - " 20 files changed, 1994 insertions(+)\n", + "[main (root-commit) b5218c7] initial commit\n", + " 20 files changed, 2037 insertions(+)\n", " create mode 100644 .dvc/.gitignore\n", " create mode 100644 .dvc/config\n", " create mode 100644 .dvcignore\n", @@ -376,14 +376,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[I 2023-05-26 14:19:10,122] A new study created in memory with name: no-name-47e7cf69-b7c1-425e-acf9-396a86518b6a\n", + "[I 2023-05-30 14:28:03,838] A new study created in memory with name: no-name-15e1292e-d549-44d4-9bf0-c84394860091\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", "\u0000" ] @@ -445,7 +445,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-0'\n", - "\u0000[I 2023-05-26 14:19:21,149] Trial 0 finished with value: 11.100880417561175 and parameters: {'classifier': 'SVR', 'svr_c': 12078.934744589767}. Best is trial 0 with value: 11.100880417561175.\n", + "\u0000[I 2023-05-30 14:28:15,273] Trial 0 finished with value: 3.7564340357379606 and parameters: {'classifier': 'SVR', 'svr_c': 2.08432428140348}. Best is trial 0 with value: 3.7564340357379606.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -520,7 +520,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-1'\n", - "\u0000[I 2023-05-26 14:19:31,962] Trial 1 finished with value: 526.9862708662988 and parameters: {'classifier': 'SVR', 'svr_c': 5051.58980837076}. Best is trial 1 with value: 526.9862708662988.\n", + "\u0000[I 2023-05-30 14:28:33,847] Trial 1 finished with value: 0.2616422061795712 and parameters: {'classifier': 'RandomForest', 'max_depth': 25}. Best is trial 0 with value: 3.7564340357379606.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -595,7 +595,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-2'\n", - "\u0000[I 2023-05-26 14:19:47,429] Trial 2 finished with value: 0.2779238523214554 and parameters: {'classifier': 'RandomForest', 'max_depth': 12}. Best is trial 1 with value: 526.9862708662988.\n", + "\u0000[I 2023-05-30 14:28:44,898] Trial 2 finished with value: 7.4389215152612564 and parameters: {'classifier': 'SVR', 'svr_c': 2546.6335503243536}. Best is trial 2 with value: 7.4389215152612564.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -670,7 +670,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-3'\n", - "\u0000[I 2023-05-26 14:20:05,182] Trial 3 finished with value: 0.2627596918267919 and parameters: {'classifier': 'RandomForest', 'max_depth': 27}. Best is trial 1 with value: 526.9862708662988.\n", + "\u0000[I 2023-05-30 14:28:55,985] Trial 3 finished with value: 1.6959671890954449 and parameters: {'classifier': 'SVR', 'svr_c': 0.001856702030839791}. Best is trial 2 with value: 7.4389215152612564.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -745,7 +745,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-4'\n", - "\u0000[I 2023-05-26 14:20:21,223] Trial 4 finished with value: 0.2663470933213184 and parameters: {'classifier': 'RandomForest', 'max_depth': 15}. Best is trial 1 with value: 526.9862708662988.\n", + "\u0000[I 2023-05-30 14:29:07,179] Trial 4 finished with value: 11.490470659940513 and parameters: {'classifier': 'SVR', 'svr_c': 166.57184086434526}. Best is trial 4 with value: 11.490470659940513.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -820,7 +820,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-5'\n", - "\u0000[I 2023-05-26 14:20:31,632] Trial 5 finished with value: 0.9854179960940935 and parameters: {'classifier': 'SVR', 'svr_c': 1.0818236715785393e-05}. Best is trial 1 with value: 526.9862708662988.\n", + "\u0000[I 2023-05-30 14:29:18,690] Trial 5 finished with value: 1.7831062788089902 and parameters: {'classifier': 'SVR', 'svr_c': 36616.95363147719}. Best is trial 4 with value: 11.490470659940513.\n", "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, @@ -895,8 +895,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-6'\n", - "\u0000[I 2023-05-26 14:20:44,455] Trial 6 finished with value: 0.47413665705821306 and parameters: {'classifier': 'RandomForest', 'max_depth': 5}. Best is trial 1 with value: 526.9862708662988.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + "\u0000" ] }, { @@ -910,6 +909,8 @@ "name": "stderr", "output_type": "stream", "text": [ + "[I 2023-05-30 14:29:36,954] Trial 6 finished with value: 0.2620241680440442 and parameters: {'classifier': 'RandomForest', 'max_depth': 22}. Best is trial 4 with value: 11.490470659940513.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", "\u0000" ] }, @@ -970,8 +971,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-7'\n", - "\u0000[I 2023-05-26 14:20:54,709] Trial 7 finished with value: 3.12196099352187 and parameters: {'classifier': 'SVR', 'svr_c': 1.0824013441742968e-10}. Best is trial 1 with value: 526.9862708662988.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" + "\u0000[I 2023-05-30 14:29:53,483] Trial 7 finished with value: 0.2681221506501657 and parameters: {'classifier': 'RandomForest', 'max_depth': 14}. Best is trial 4 with value: 11.490470659940513.\n" ] }, { @@ -985,6 +985,7 @@ "name": "stderr", "output_type": "stream", "text": [ + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", "\u0000" ] }, @@ -1045,7 +1046,8 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-8'\n", - "\u0000[I 2023-05-26 14:21:05,653] Trial 8 finished with value: 5.3304504202911795 and parameters: {'classifier': 'SVR', 'svr_c': 35.29665207527377}. Best is trial 1 with value: 526.9862708662988.\n" + "\u0000[I 2023-05-30 14:30:12,684] Trial 8 finished with value: 0.26234815407181117 and parameters: {'classifier': 'RandomForest', 'max_depth': 30}. Best is trial 4 with value: 11.490470659940513.\n", + "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" ] }, { @@ -1059,7 +1061,6 @@ "name": "stderr", "output_type": "stream", "text": [ - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", "\u0000" ] }, @@ -1120,7 +1121,7 @@ "output_type": "stream", "text": [ "Running DVC command: 'exp apply exp-9'\n", - "\u0000[I 2023-05-26 14:21:13,172] Trial 9 finished with value: 0.2627596918267919 and parameters: {'classifier': 'RandomForest', 'max_depth': 27}. Best is trial 1 with value: 526.9862708662988.\n" + "\u0000[I 2023-05-30 14:30:24,324] Trial 9 finished with value: 3.1497504942833037 and parameters: {'classifier': 'SVR', 'svr_c': 275021882.81023085}. Best is trial 4 with value: 11.490470659940513.\n" ] }, { @@ -1180,16 +1181,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'classifier': 'SVR', 'svr_c': 5051.58980837076}" + "{'classifier': 'SVR', 'svr_c': 166.57184086434526}" ] }, - "execution_count": 7, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -1200,7 +1201,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1209,7 +1210,7 @@ "dict_keys([None, 'exp-9', 'exp-8', 'exp-7', 'exp-6', 'exp-5', 'exp-4', 'exp-3', 'exp-2', 'exp-1', 'exp-0'])" ] }, - "execution_count": 8, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -1218,50 +1219,100 @@ "project.experiments.keys()" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can either load the Node via the experiment or by it's name using `zntrack.from_rev()`.\n", + "The node should not be loaded via `model.load()` because the `model` instance could be `RandomForest` and the best model would be `LinearSVR` or *vice versa*." + ] + }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 16, "metadata": {}, "outputs": [ { - "ename": "NodeNotAvailableError", - "evalue": "Node model is not available.", + "ename": "ModuleNotFoundError", + "evalue": "No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/node.py:219\u001b[0m, in \u001b[0;36mNode.load\u001b[0;34m(self, lazy, results)\u001b[0m\n\u001b[1;32m 218\u001b[0m \u001b[39mcontinue\u001b[39;00m\n\u001b[0;32m--> 219\u001b[0m attr\u001b[39m.\u001b[39;49mload(\u001b[39mself\u001b[39;49m)\n\u001b[1;32m 220\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/fields/field.py:87\u001b[0m, in \u001b[0;36mField.load\u001b[0;34m(self, instance, lazy)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 87\u001b[0m instance\u001b[39m.\u001b[39m\u001b[39m__dict__\u001b[39m[\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mname] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mget_data(instance)\n\u001b[1;32m 88\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mFileNotFoundError\u001b[39;00m:\n\u001b[1;32m 89\u001b[0m \u001b[39m# if something was not loaded, we set the loaded state to False\u001b[39;00m\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/fields/zn/__init__.py:148\u001b[0m, in \u001b[0;36mParams.get_data\u001b[0;34m(self, instance)\u001b[0m\n\u001b[1;32m 147\u001b[0m params_dict \u001b[39m=\u001b[39m yaml\u001b[39m.\u001b[39msafe_load(instance\u001b[39m.\u001b[39mstate\u001b[39m.\u001b[39mfs\u001b[39m.\u001b[39mread_text(file))\n\u001b[0;32m--> 148\u001b[0m value \u001b[39m=\u001b[39m params_dict[instance\u001b[39m.\u001b[39;49mname][\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname]\n\u001b[1;32m 149\u001b[0m \u001b[39mreturn\u001b[39;00m json\u001b[39m.\u001b[39mloads(json\u001b[39m.\u001b[39mdumps(value), \u001b[39mcls\u001b[39m\u001b[39m=\u001b[39mznjson\u001b[39m.\u001b[39mZnDecoder)\n", - "\u001b[0;31mKeyError\u001b[0m: 'max_depth'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mNodeNotAvailableError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m exp \u001b[39m=\u001b[39m project\u001b[39m.\u001b[39mexperiments[\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexp-\u001b[39m\u001b[39m{\u001b[39;00mstudy\u001b[39m.\u001b[39mbest_trial\u001b[39m.\u001b[39mnumber\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m]\n\u001b[0;32m----> 2\u001b[0m best_model \u001b[39m=\u001b[39m exp[\u001b[39m\"\u001b[39;49m\u001b[39mmodel\u001b[39;49m\u001b[39m\"\u001b[39;49m]\n\u001b[1;32m 3\u001b[0m best_model\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/project/zntrack_project.py:309\u001b[0m, in \u001b[0;36mExperiment.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Get the Node from the experiment.\"\"\"\u001b[39;00m\n\u001b[1;32m 308\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnodes) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m--> 309\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mload()\n\u001b[1;32m 310\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(key, Node):\n\u001b[1;32m 311\u001b[0m key \u001b[39m=\u001b[39m key\u001b[39m.\u001b[39mname\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/project/zntrack_project.py:301\u001b[0m, in \u001b[0;36mExperiment.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mload\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 300\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Load the nodes from this experiment.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 301\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnodes \u001b[39m=\u001b[39m {\n\u001b[1;32m 302\u001b[0m name: node\u001b[39m.\u001b[39mfrom_rev(name\u001b[39m=\u001b[39mname, rev\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mname)\n\u001b[1;32m 303\u001b[0m \u001b[39mfor\u001b[39;00m name, node \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproject\u001b[39m.\u001b[39mget_nodes()\u001b[39m.\u001b[39mitems()\n\u001b[1;32m 304\u001b[0m }\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/project/zntrack_project.py:302\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mload\u001b[39m(\u001b[39mself\u001b[39m) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 300\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Load the nodes from this experiment.\"\"\"\u001b[39;00m\n\u001b[1;32m 301\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnodes \u001b[39m=\u001b[39m {\n\u001b[0;32m--> 302\u001b[0m name: node\u001b[39m.\u001b[39;49mfrom_rev(name\u001b[39m=\u001b[39;49mname, rev\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mname)\n\u001b[1;32m 303\u001b[0m \u001b[39mfor\u001b[39;00m name, node \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mproject\u001b[39m.\u001b[39mget_nodes()\u001b[39m.\u001b[39mitems()\n\u001b[1;32m 304\u001b[0m }\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/node.py:250\u001b[0m, in \u001b[0;36mNode.from_rev\u001b[0;34m(cls, name, remote, rev, lazy, results)\u001b[0m\n\u001b[1;32m 248\u001b[0m kwargs \u001b[39m=\u001b[39m {} \u001b[39mif\u001b[39;00m lazy \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m \u001b[39melse\u001b[39;00m {\u001b[39m\"\u001b[39m\u001b[39mlazy\u001b[39m\u001b[39m\"\u001b[39m: lazy}\n\u001b[1;32m 249\u001b[0m \u001b[39mwith\u001b[39;00m config\u001b[39m.\u001b[39mupdated_config(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m--> 250\u001b[0m node\u001b[39m.\u001b[39;49mload(results\u001b[39m=\u001b[39;49mresults)\n\u001b[1;32m 252\u001b[0m \u001b[39mreturn\u001b[39;00m node\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/node.py:221\u001b[0m, in \u001b[0;36mNode.load\u001b[0;34m(self, lazy, results)\u001b[0m\n\u001b[1;32m 219\u001b[0m attr\u001b[39m.\u001b[39mload(\u001b[39mself\u001b[39m)\n\u001b[1;32m 220\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m--> 221\u001b[0m \u001b[39mraise\u001b[39;00m exceptions\u001b[39m.\u001b[39mNodeNotAvailableError(\u001b[39mself\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39merr\u001b[39;00m\n\u001b[1;32m 223\u001b[0m \u001b[39m# TODO: documentation about _post_init and _post_load_ and when they are called\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_post_load_()\n", - "\u001b[0;31mNodeNotAvailableError\u001b[0m: Node model is not available." + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m exp \u001b[39m=\u001b[39m project\u001b[39m.\u001b[39mexperiments[\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexp-\u001b[39m\u001b[39m{\u001b[39;00mstudy\u001b[39m.\u001b[39mbest_trial\u001b[39m.\u001b[39mnumber\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 2\u001b[0m best_model \u001b[39m=\u001b[39m exp[\u001b[39m\"\u001b[39m\u001b[39mmodel\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m----> 3\u001b[0m best_model \u001b[39m=\u001b[39m zntrack\u001b[39m.\u001b[39;49mfrom_rev(\u001b[39m\"\u001b[39;49m\u001b[39mmodel\u001b[39;49m\u001b[39m\"\u001b[39;49m, rev\u001b[39m=\u001b[39;49m\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mexp-\u001b[39;49m\u001b[39m{\u001b[39;49;00mstudy\u001b[39m.\u001b[39;49mbest_trial\u001b[39m.\u001b[39;49mnumber\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m)\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/load.py:110\u001b[0m, in \u001b[0;36mfrom_rev\u001b[0;34m(name, remote, rev, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[39mif\u001b[39;00m module \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 109\u001b[0m module_name \u001b[39m=\u001b[39m package_and_module\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)[\u001b[39m0\u001b[39m]\n\u001b[0;32m--> 110\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mModuleNotFoundError\u001b[39;00m(\n\u001b[1;32m 111\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNo module named \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. The package might be available via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m install \u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m or from the remote via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip install git+\u001b[39m\u001b[39m{\u001b[39;00mremote\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m )\n\u001b[1;32m 115\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(module, cls_name)\n\u001b[1;32m 117\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mfrom_rev(name, remote, rev, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'." ] } ], "source": [ "exp = project.experiments[f\"exp-{study.best_trial.number}\"]\n", "best_model = exp[\"model\"]\n", - "best_model" + "best_model = zntrack.from_rev(\"model\", rev=f\"exp-{study.best_trial.number}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'exp-4'" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f\"exp-{study.best_trial.number}\"" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "import zntrack" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m zntrack\u001b[39m.\u001b[39;49mfrom_rev(\u001b[39m\"\u001b[39;49m\u001b[39mmodel\u001b[39;49m\u001b[39m\"\u001b[39;49m, rev\u001b[39m=\u001b[39;49m\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mexp-4\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", + "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/load.py:110\u001b[0m, in \u001b[0;36mfrom_rev\u001b[0;34m(name, remote, rev, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[39mif\u001b[39;00m module \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 109\u001b[0m module_name \u001b[39m=\u001b[39m package_and_module\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)[\u001b[39m0\u001b[39m]\n\u001b[0;32m--> 110\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mModuleNotFoundError\u001b[39;00m(\n\u001b[1;32m 111\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNo module named \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. The package might be available via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m install \u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m or from the remote via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip install git+\u001b[39m\u001b[39m{\u001b[39;00mremote\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m )\n\u001b[1;32m 115\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(module, cls_name)\n\u001b[1;32m 117\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mfrom_rev(name, remote, rev, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'." + ] + } + ], + "source": [ + "zntrack.from_rev(\"model\", rev=f\"exp-4\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Best score: 4.757 compared to initial score: 0.750\n" + "Best score: 9.748 compared to initial score: 0.750\n" ] } ], From db8cac977737795e207413f71eb6bae140a887e9 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Thu, 15 Jun 2023 13:51:46 +0200 Subject: [PATCH 5/7] update tests --- tests/integration/test_project.py | 24 ++++++++++++++++++++++++ zntrack/project/__init__.py | 4 ++-- zntrack/project/zntrack_project.py | 1 + 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_project.py b/tests/integration/test_project.py index 7172b067..b6a1bbdb 100644 --- a/tests/integration/test_project.py +++ b/tests/integration/test_project.py @@ -1,6 +1,9 @@ +import pathlib + import pytest import zntrack +from zntrack.project import Experiment class WriteIO(zntrack.Node): @@ -22,15 +25,25 @@ def test_WriteIO(tmp_path_2, assert_before_exp): if assert_before_exp: assert node.outputs == "Hello World" + # write a non-tracked file using pathlib + pathlib.Path("test.txt").write_text("Hello World") + with project.create_experiment(name="exp1") as exp1: node.inputs = "Hello World" + # check that the file is still there + assert pathlib.Path("test.txt").read_text() == "Hello World" + with project.create_experiment(name="exp2") as exp2: node.inputs = "Lorem Ipsum" assert exp1.name == "exp1" assert exp2.name == "exp2" + assert project.experiments.keys() == {"exp1", "exp2"} + + assert isinstance(project.experiments["exp1"], Experiment) + project.run_exp() assert node.from_rev(rev="exp1").inputs == "Hello World" assert node.from_rev(rev="exp1").outputs == "Hello World" @@ -38,6 +51,17 @@ def test_WriteIO(tmp_path_2, assert_before_exp): assert node.from_rev(rev="exp2").inputs == "Lorem Ipsum" assert node.from_rev(rev="exp2").outputs == "Lorem Ipsum" + exp2.apply() + assert ( + zntrack.from_rev("WriteIO").inputs + == zntrack.from_rev("WriteIO", rev=exp2.name).inputs + ) + exp1.apply() + assert ( + zntrack.from_rev("WriteIO").inputs + == zntrack.from_rev("WriteIO", rev=exp1.name).inputs + ) + @pytest.mark.parametrize("assert_before_exp", [True, False]) def test_WriteIO_no_name(tmp_path_2, assert_before_exp): diff --git a/zntrack/project/__init__.py b/zntrack/project/__init__.py index c72467a1..37020012 100644 --- a/zntrack/project/__init__.py +++ b/zntrack/project/__init__.py @@ -1,4 +1,4 @@ """Providing a project class that can run experiments.""" -from zntrack.project.zntrack_project import Project +from zntrack.project.zntrack_project import Experiment, Project -__all__ = ["Project"] +__all__ = ["Project", "Experiment"] diff --git a/zntrack/project/zntrack_project.py b/zntrack/project/zntrack_project.py index d823033b..785b2634 100644 --- a/zntrack/project/zntrack_project.py +++ b/zntrack/project/zntrack_project.py @@ -273,6 +273,7 @@ def experiments(self, *args, **kwargs) -> dict[str, Experiment]: return { experiment["Experiment"]: Experiment(experiment["rev"], project=self) for experiment in experiments + if experiment["Experiment"] is not None } def run_exp(self, jobs: int = 1) -> None: From ec3008a9b50118f4a227ea92885a79188079bbe8 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Thu, 15 Jun 2023 13:53:19 +0200 Subject: [PATCH 6/7] use finally --- zntrack/project/zntrack_project.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/zntrack/project/zntrack_project.py b/zntrack/project/zntrack_project.py index 785b2634..b0d4a3f4 100644 --- a/zntrack/project/zntrack_project.py +++ b/zntrack/project/zntrack_project.py @@ -254,15 +254,15 @@ def create_experiment(self, name: str = None, queue: bool = True) -> Experiment: cmd.append("--queue") if name is not None: cmd.extend(["--name", name]) - - proc = subprocess.run(cmd, capture_output=True, check=True) - # "Reproducing", "Experiment", "'exp-name'" - exp.name = proc.stdout.decode("utf-8").split()[2].replace("'", "") - - repo.git.reset("--hard") - repo.git.clean("-fd") - if dirty: - repo.git.stash("pop") + try: + proc = subprocess.run(cmd, capture_output=True, check=True) + # "Reproducing", "Experiment", "'exp-name'" + exp.name = proc.stdout.decode("utf-8").split()[2].replace("'", "") + finally: + repo.git.reset("--hard") + repo.git.clean("-fd") + if dirty: + repo.git.stash("pop") if not queue: exp.apply() From 8dad175cccd335a6877ee5ee1e226c35df027252 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Thu, 15 Jun 2023 15:06:35 +0200 Subject: [PATCH 7/7] update notebook --- examples/docs/parameter_optimization.ipynb | 1033 +------------------- 1 file changed, 23 insertions(+), 1010 deletions(-) diff --git a/examples/docs/parameter_optimization.ipynb b/examples/docs/parameter_optimization.ipynb index 81fd92bc..6b2f3d30 100644 --- a/examples/docs/parameter_optimization.ipynb +++ b/examples/docs/parameter_optimization.ipynb @@ -14,34 +14,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Initialized empty Git repository in /tmp/tmphi4dsmja/.git/\n", - "Initialized DVC repository.\n", - "\n", - "You can now commit the changes to git.\n", - "\n", - "+---------------------------------------------------------------------+\n", - "| |\n", - "| DVC has enabled anonymous aggregate usage analytics. |\n", - "| Read the analytics documentation (and how to opt-out) here: |\n", - "| |\n", - "| |\n", - "+---------------------------------------------------------------------+\n", - "\n", - "What's next?\n", - "------------\n", - "- Check out the documentation: \n", - "- Get help and share ideas: \n", - "- Star us on GitHub: \n" - ] - } - ], + "outputs": [], "source": [ "# Setup temporary directory and initialize git and dvc\n", "from zntrack import config\n", @@ -84,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -197,79 +172,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", - "Jupyter support is an experimental feature! Please save your notebook before running this command!\n", - "Submit issues to https://github.com/zincware/ZnTrack.\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'repro'\n" - ] - } - ], + "outputs": [], "source": [ "with zntrack.Project() as project:\n", " data = HousingDataSet()\n", @@ -291,72 +196,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "data": { - "text/plain": [ - "NodeStatus(loaded=True, results=, remote=None, rev=None)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "RandomForest.from_rev(name=\"model\").state" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[main (root-commit) b5218c7] initial commit\n", - " 20 files changed, 2037 insertions(+)\n", - " create mode 100644 .dvc/.gitignore\n", - " create mode 100644 .dvc/config\n", - " create mode 100644 .dvcignore\n", - " create mode 100644 .gitignore\n", - " create mode 100644 dvc.lock\n", - " create mode 100644 dvc.yaml\n", - " create mode 100644 nodes/Evaluate/score.json\n", - " create mode 100644 nodes/TrainTestSplit/.gitignore\n", - " create mode 100644 nodes/model/.gitignore\n", - " create mode 100644 parameter_optimization.ipynb\n", - " create mode 100644 params.yaml\n", - " create mode 100644 src/Evaluate.py\n", - " create mode 100644 src/HousingDataSet.py\n", - " create mode 100644 src/RandomForest.py\n", - " create mode 100644 src/TrainTestSplit.py\n", - " create mode 100644 src/__pycache__/Evaluate.cpython-310.pyc\n", - " create mode 100644 src/__pycache__/HousingDataSet.cpython-310.pyc\n", - " create mode 100644 src/__pycache__/RandomForest.cpython-310.pyc\n", - " create mode 100644 src/__pycache__/TrainTestSplit.cpython-310.pyc\n", - " create mode 100644 zntrack.json\n" - ] - } - ], + "outputs": [], "source": [ "!git add .\n", "!git commit -m \"initial commit\"" @@ -376,762 +227,9 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[I 2023-05-30 14:28:03,838] A new study created in memory with name: no-name-15e1292e-d549-44d4-9bf0-c84394860091\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-0'\n", - "\u0000[I 2023-05-30 14:28:15,273] Trial 0 finished with value: 3.7564340357379606 and parameters: {'classifier': 'SVR', 'svr_c': 2.08432428140348}. Best is trial 0 with value: 3.7564340357379606.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-1'\n", - "\u0000[I 2023-05-30 14:28:33,847] Trial 1 finished with value: 0.2616422061795712 and parameters: {'classifier': 'RandomForest', 'max_depth': 25}. Best is trial 0 with value: 3.7564340357379606.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-2'\n", - "\u0000[I 2023-05-30 14:28:44,898] Trial 2 finished with value: 7.4389215152612564 and parameters: {'classifier': 'SVR', 'svr_c': 2546.6335503243536}. Best is trial 2 with value: 7.4389215152612564.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-3'\n", - "\u0000[I 2023-05-30 14:28:55,985] Trial 3 finished with value: 1.6959671890954449 and parameters: {'classifier': 'SVR', 'svr_c': 0.001856702030839791}. Best is trial 2 with value: 7.4389215152612564.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-4'\n", - "\u0000[I 2023-05-30 14:29:07,179] Trial 4 finished with value: 11.490470659940513 and parameters: {'classifier': 'SVR', 'svr_c': 166.57184086434526}. Best is trial 4 with value: 11.490470659940513.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-5'\n", - "\u0000[I 2023-05-30 14:29:18,690] Trial 5 finished with value: 1.7831062788089902 and parameters: {'classifier': 'SVR', 'svr_c': 36616.95363147719}. Best is trial 4 with value: 11.490470659940513.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-6'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[I 2023-05-30 14:29:36,954] Trial 6 finished with value: 0.2620241680440442 and parameters: {'classifier': 'RandomForest', 'max_depth': 22}. Best is trial 4 with value: 11.490470659940513.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-7'\n", - "\u0000[I 2023-05-30 14:29:53,483] Trial 7 finished with value: 0.2681221506501657 and parameters: {'classifier': 'RandomForest', 'max_depth': 14}. Best is trial 4 with value: 11.490470659940513.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-8'\n", - "\u0000[I 2023-05-30 14:30:12,684] Trial 8 finished with value: 0.26234815407181117 and parameters: {'classifier': 'RandomForest', 'max_depth': 30}. Best is trial 4 with value: 11.490470659940513.\n", - "Running DVC command: 'stage add --name HousingDataSet --force ...'\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name TrainTestSplit --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name model --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'stage add --name Evaluate --force ...'\n", - "\u0000" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Running DVC command: 'exp apply exp-9'\n", - "\u0000[I 2023-05-30 14:30:24,324] Trial 9 finished with value: 3.1497504942833037 and parameters: {'classifier': 'SVR', 'svr_c': 275021882.81023085}. Best is trial 4 with value: 11.490470659940513.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u0000" - ] - } - ], + "outputs": [], "source": [ "def objective(trial):\n", " with project.create_experiment(queue=False, name=f\"exp-{trial.number}\") as exp:\n", @@ -1165,7 +263,7 @@ "\n", "\n", "study = optuna.create_study(direction=\"maximize\")\n", - "study.optimize(objective, n_trials=10)" + "study.optimize(objective, n_trials=3)" ] }, { @@ -1181,40 +279,18 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'classifier': 'SVR', 'svr_c': 166.57184086434526}" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "study.best_params" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "dict_keys([None, 'exp-9', 'exp-8', 'exp-7', 'exp-6', 'exp-5', 'exp-4', 'exp-3', 'exp-2', 'exp-1', 'exp-0'])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "project.experiments.keys()" ] @@ -1230,92 +306,28 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[16], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m exp \u001b[39m=\u001b[39m project\u001b[39m.\u001b[39mexperiments[\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mexp-\u001b[39m\u001b[39m{\u001b[39;00mstudy\u001b[39m.\u001b[39mbest_trial\u001b[39m.\u001b[39mnumber\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m]\n\u001b[1;32m 2\u001b[0m best_model \u001b[39m=\u001b[39m exp[\u001b[39m\"\u001b[39m\u001b[39mmodel\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m----> 3\u001b[0m best_model \u001b[39m=\u001b[39m zntrack\u001b[39m.\u001b[39;49mfrom_rev(\u001b[39m\"\u001b[39;49m\u001b[39mmodel\u001b[39;49m\u001b[39m\"\u001b[39;49m, rev\u001b[39m=\u001b[39;49m\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mexp-\u001b[39;49m\u001b[39m{\u001b[39;49;00mstudy\u001b[39m.\u001b[39;49mbest_trial\u001b[39m.\u001b[39;49mnumber\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m)\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/load.py:110\u001b[0m, in \u001b[0;36mfrom_rev\u001b[0;34m(name, remote, rev, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[39mif\u001b[39;00m module \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 109\u001b[0m module_name \u001b[39m=\u001b[39m package_and_module\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)[\u001b[39m0\u001b[39m]\n\u001b[0;32m--> 110\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mModuleNotFoundError\u001b[39;00m(\n\u001b[1;32m 111\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNo module named \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. The package might be available via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m install \u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m or from the remote via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip install git+\u001b[39m\u001b[39m{\u001b[39;00mremote\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m )\n\u001b[1;32m 115\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(module, cls_name)\n\u001b[1;32m 117\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mfrom_rev(name, remote, rev, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'." - ] - } - ], + "outputs": [], "source": [ "exp = project.experiments[f\"exp-{study.best_trial.number}\"]\n", - "best_model = exp[\"model\"]\n", - "best_model = zntrack.from_rev(\"model\", rev=f\"exp-{study.best_trial.number}\")" + "best_model = exp[\"model\"]" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'exp-4'" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "f\"exp-{study.best_trial.number}\"" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import zntrack" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m zntrack\u001b[39m.\u001b[39;49mfrom_rev(\u001b[39m\"\u001b[39;49m\u001b[39mmodel\u001b[39;49m\u001b[39m\"\u001b[39;49m, rev\u001b[39m=\u001b[39;49m\u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39mexp-4\u001b[39;49m\u001b[39m\"\u001b[39;49m)\n", - "File \u001b[0;32m/data/fzills/tools/zntrack/zntrack/core/load.py:110\u001b[0m, in \u001b[0;36mfrom_rev\u001b[0;34m(name, remote, rev, **kwargs)\u001b[0m\n\u001b[1;32m 108\u001b[0m \u001b[39mif\u001b[39;00m module \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m 109\u001b[0m module_name \u001b[39m=\u001b[39m package_and_module\u001b[39m.\u001b[39msplit(\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m)[\u001b[39m0\u001b[39m]\n\u001b[0;32m--> 110\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mModuleNotFoundError\u001b[39;00m(\n\u001b[1;32m 111\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mNo module named \u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m. The package might be available via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 112\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m install \u001b[39m\u001b[39m{\u001b[39;00mmodule_name\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m or from the remote via \u001b[39m\u001b[39m'\u001b[39m\u001b[39mpip install git+\u001b[39m\u001b[39m{\u001b[39;00mremote\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 113\u001b[0m )\n\u001b[1;32m 115\u001b[0m \u001b[39mcls\u001b[39m \u001b[39m=\u001b[39m \u001b[39mgetattr\u001b[39m(module, cls_name)\n\u001b[1;32m 117\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mcls\u001b[39m\u001b[39m.\u001b[39mfrom_rev(name, remote, rev, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'. The package might be available via 'pip install src' or from the remote via 'pip install git+.'." - ] - } - ], - "source": [ - "zntrack.from_rev(\"model\", rev=f\"exp-4\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best score: 9.748 compared to initial score: 0.750\n" - ] - } - ], "source": [ "# we load split data into memory to compute the score.\n", "split.load()\n", @@ -1326,6 +338,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [