From 28d14ba025a05e1b1b8796a07936a18d26060125 Mon Sep 17 00:00:00 2001
From: Michele Milesi <74559684+michele-milesi@users.noreply.github.com>
Date: Mon, 20 May 2024 15:41:04 +0200
Subject: [PATCH] fix: dv3-imagination notebook (#290)

* fix: dv3-imagination notebook

* remove notebook outputs

* remove jupyter clear output cell hook

* remove nbconvert from dependencies

* fix: set  to  in the dv3 imagination notebook
---
 examples/model_manager.ipynb           | 322 ++-----------------------
 notebooks/dreamer_v3_imagination.ipynb |  41 +++-
 2 files changed, 43 insertions(+), 320 deletions(-)
diff --git a/examples/model_manager.ipynb b/examples/model_manager.ipynb
index 73b72014..049d2446 100644
--- a/examples/model_manager.ipynb
+++ b/examples/model_manager.ipynb
@@ -81,133 +81,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Experiment: <Experiment: artifact_location='mlflow-artifacts:/242317125620601262', creation_time=1701949559261, experiment_id='242317125620601262', last_update_time=1701949559261, lifecycle_stage='active', name='mlflow_example', tags={}>\n",
-      "Experiment (242317125620601262) runs:\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>run_id</th>\n",
-       "      <th>experiment_id</th>\n",
-       "      <th>status</th>\n",
-       "      <th>artifact_uri</th>\n",
-       "      <th>start_time</th>\n",
-       "      <th>end_time</th>\n",
-       "      <th>metrics.Loss/entropy_loss</th>\n",
-       "      <th>metrics.Test/cumulative_reward</th>\n",
-       "      <th>metrics.Info/ent_coef</th>\n",
-       "      <th>metrics.Info/learning_rate</th>\n",
-       "      <th>...</th>\n",
-       "      <th>params.algo/gae_lambda</th>\n",
-       "      <th>params.env/action_repeat</th>\n",
-       "      <th>params.env/grayscale</th>\n",
-       "      <th>params.metric/aggregator/metrics/Loss/policy_loss/sync_on_compute</th>\n",
-       "      <th>params.metric/log_level</th>\n",
-       "      <th>tags.mlflow.user</th>\n",
-       "      <th>tags.mlflow.source.type</th>\n",
-       "      <th>tags.mlflow.runName</th>\n",
-       "      <th>tags.mlflow.source.name</th>\n",
-       "      <th>tags.mlflow.log-model.history</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1e453cf2114d43f28410803df985598a</td>\n",
-       "      <td>242317125620601262</td>\n",
-       "      <td>FINISHED</td>\n",
-       "      <td>mlflow-artifacts:/242317125620601262/1e453cf21...</td>\n",
-       "      <td>2023-12-07 11:45:59.641000+00:00</td>\n",
-       "      <td>2023-12-07 11:46:10.350000+00:00</td>\n",
-       "      <td>-0.687031</td>\n",
-       "      <td>48.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.001</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0.95</td>\n",
-       "      <td>1</td>\n",
-       "      <td>False</td>\n",
-       "      <td>False</td>\n",
-       "      <td>1</td>\n",
-       "      <td>mmilesi</td>\n",
-       "      <td>LOCAL</td>\n",
-       "      <td>ppo_CartPole-v1_2023-12-07_12-45-58</td>\n",
-       "      <td>/home/mmilesi/miniconda3/envs/sheeprl/lib/pyth...</td>\n",
-       "      <td>[{\"run_id\": \"1e453cf2114d43f28410803df985598a\"...</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>1 rows × 130 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                             run_id       experiment_id    status  \\\n",
-       "0  1e453cf2114d43f28410803df985598a  242317125620601262  FINISHED   \n",
-       "\n",
-       "                                        artifact_uri  \\\n",
-       "0  mlflow-artifacts:/242317125620601262/1e453cf21...   \n",
-       "\n",
-       "                        start_time                         end_time  \\\n",
-       "0 2023-12-07 11:45:59.641000+00:00 2023-12-07 11:46:10.350000+00:00   \n",
-       "\n",
-       "   metrics.Loss/entropy_loss  metrics.Test/cumulative_reward  \\\n",
-       "0                  -0.687031                            48.0   \n",
-       "\n",
-       "   metrics.Info/ent_coef  metrics.Info/learning_rate  ...  \\\n",
-       "0                    0.0                       0.001  ...   \n",
-       "\n",
-       "   params.algo/gae_lambda  params.env/action_repeat  params.env/grayscale  \\\n",
-       "0                    0.95                         1                 False   \n",
-       "\n",
-       "   params.metric/aggregator/metrics/Loss/policy_loss/sync_on_compute  \\\n",
-       "0                                              False                   \n",
-       "\n",
-       "   params.metric/log_level  tags.mlflow.user  tags.mlflow.source.type  \\\n",
-       "0                        1           mmilesi                    LOCAL   \n",
-       "\n",
-       "                   tags.mlflow.runName  \\\n",
-       "0  ppo_CartPole-v1_2023-12-07_12-45-58   \n",
-       "\n",
-       "                             tags.mlflow.source.name  \\\n",
-       "0  /home/mmilesi/miniconda3/envs/sheeprl/lib/pyth...   \n",
-       "\n",
-       "                       tags.mlflow.log-model.history  \n",
-       "0  [{\"run_id\": \"1e453cf2114d43f28410803df985598a\"...  \n",
-       "\n",
-       "[1 rows x 130 columns]"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "import mlflow\n",
     "\n",
@@ -229,26 +105,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Name: mlflow_example_agent\n",
-      "Description: # MODEL CHANGELOG\n",
-      "## **Version 1**\n",
-      "### Author: mmilesi\n",
-      "### Date: 07/12/2023 12:46:10 CET\n",
-      "### Description: \n",
-      "PPO Agent in CartPole-v1 Environment\n",
-      "\n",
-      "Tags: {}\n",
-      "Latest Version: 1\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from sheeprl.utils.mlflow import MlflowModelManager\n",
     "from lightning import Fabric\n",
@@ -315,33 +174,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2023/12/07 12:47:03 WARNING mlflow.utils.requirements_utils: The following packages were not found in the public PyPI package index as of 2023-10-28; if these packages are not present in the public PyPI index, you must install them manually before loading your model: {'sheeprl'}\n",
-      "Registered model 'mlflow_example_agent' already exists. Creating a new version of this model...\n",
-      "2023/12/07 12:47:04 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: mlflow_example_agent, version 2\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Registered model mlflow_example_agent with version 2\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Created version '2' of model 'mlflow_example_agent'.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from sheeprl.cli import registration\n",
     "\n",
@@ -374,31 +209,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Name: mlflow_example_agent\n",
-      "Description: # MODEL CHANGELOG\n",
-      "## **Version 1**\n",
-      "### Author: mmilesi\n",
-      "### Date: 07/12/2023 12:46:10 CET\n",
-      "### Description: \n",
-      "PPO Agent in CartPole-v1 Environment\n",
-      "## **Version 2**\n",
-      "### Author: mmilesi\n",
-      "### Date: 07/12/2023 12:47:04 CET\n",
-      "### Description: \n",
-      "New PPO Agent version trained in CartPole-v1 environment\n",
-      "\n",
-      "Tags: {}\n",
-      "Latest Version: 2\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "model_info = mlflow.search_registered_models(filter_string=f\"name='{model_name}'\")[-1]\n",
     "print(\"Name:\", model_info.name)\n",
@@ -418,32 +231,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Transitioning model mlflow_example_agent version 2 from None to staging\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<ModelVersion: aliases=[], creation_timestamp=1701949624027, current_stage='Staging', description=('# MODEL CHANGELOG\\n'\n",
-       " '## **Version 2**\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:47:04 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'New PPO Agent version trained in CartPole-v1 environment\\n'), last_updated_timestamp=1701949660778, name='mlflow_example_agent', run_id='eefbe09e8815463eaa83c6542cbc36c7', run_link='', source='mlflow-artifacts:/242317125620601262/eefbe09e8815463eaa83c6542cbc36c7/artifacts/agent', status='READY', status_message='', tags={}, user_id='', version='2'>"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "model_manager.transition_model(\n",
     "    model_name=\"mlflow_example_agent\",\n",
@@ -489,42 +279,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Successfully registered model 'ppo_agent_cartpole_best_reward'.\n",
-      "2023/12/07 12:47:55 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ppo_agent_cartpole_best_reward, version 1\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Registered model ppo_agent_cartpole_best_reward with version 1\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Created version '1' of model 'ppo_agent_cartpole_best_reward'.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'agent': <ModelVersion: aliases=[], creation_timestamp=1701949675859, current_stage='None', description='', last_updated_timestamp=1701949675859, name='ppo_agent_cartpole_best_reward', run_id='eefbe09e8815463eaa83c6542cbc36c7', run_link='', source='mlflow-artifacts:/242317125620601262/eefbe09e8815463eaa83c6542cbc36c7/artifacts/agent', status='READY', status_message='', tags={}, user_id='', version='1'>}"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "models_info = {\n",
     "    \"agent\": {\n",
@@ -547,60 +304,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Model named mlflow_example_agent with version 1 does not exist\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "<RegisteredModel: aliases={}, creation_timestamp=1701949570369, description=('# MODEL CHANGELOG\\n'\n",
-       " '## **Version 1**\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:46:10 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'PPO Agent in CartPole-v1 Environment\\n'\n",
-       " '## **Version 2**\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:47:04 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'New PPO Agent version trained in CartPole-v1 environment\\n'\n",
-       " '## **Transition:**\\n'\n",
-       " '### Version 2 from None to Staging\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:47:40 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'Staging Model for demo\\n'\n",
-       " '## **Deletion:**\\n'\n",
-       " '### Version 1 from stage: None\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:48:36 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'Delete model version 1\\n'), last_updated_timestamp=1701949716092, latest_versions=[<ModelVersion: aliases=[], creation_timestamp=1701949624027, current_stage='Staging', description=('# MODEL CHANGELOG\\n'\n",
-       " '## **Version 2**\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:47:04 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'New PPO Agent version trained in CartPole-v1 environment\\n'\n",
-       " '## **Transition:**\\n'\n",
-       " '### Version 2 from None to Staging\\n'\n",
-       " '### Author: mmilesi\\n'\n",
-       " '### Date: 07/12/2023 12:47:40 CET\\n'\n",
-       " '### Description: \\n'\n",
-       " 'Staging Model for demo\\n'), last_updated_timestamp=1701949660803, name='mlflow_example_agent', run_id='eefbe09e8815463eaa83c6542cbc36c7', run_link='', source='mlflow-artifacts:/242317125620601262/eefbe09e8815463eaa83c6542cbc36c7/artifacts/agent', status='READY', status_message='', tags={}, user_id='', version='2'>], name='mlflow_example_agent', tags={}>"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "model_manager.delete_model(\n",
     "    model_name, int(latest_version.version) - 1, f\"Delete model version {int(latest_version.version)-1}\"\n",
diff --git a/notebooks/dreamer_v3_imagination.ipynb b/notebooks/dreamer_v3_imagination.ipynb
index e03e5b04..e451b62c 100644
--- a/notebooks/dreamer_v3_imagination.ipynb
+++ b/notebooks/dreamer_v3_imagination.ipynb
@@ -34,6 +34,17 @@
     "!pip install torchvision"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"MUJOCO_GL\"] = \"egl\""
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -86,6 +97,7 @@
    "source": [
     "seed = 5\n",
     "fabric = Fabric(accelerator=\"cuda\", devices=1)\n",
+    "fabric.launch()\n",
     "state = fabric.load(ckpt_path)\n",
     "cfg = dotdict(OmegaConf.to_container(OmegaConf.load(ckpt_path.parent.parent / \"config.yaml\"), resolve=True))\n",
     "\n",
@@ -99,7 +111,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "envs = gym.vector.AsyncVectorEnv(\n",
+    "envs = gym.vector.SyncVectorEnv(\n",
     "    [\n",
     "        make_env(\n",
     "            cfg,\n",
@@ -187,9 +199,7 @@
     "    step_data[k] = obs[k][np.newaxis]\n",
     "step_data[\"dones\"] = np.zeros((1, cfg.env.num_envs, 1))\n",
     "step_data[\"rewards\"] = np.zeros((1, cfg.env.num_envs, 1))\n",
-    "step_data[\"is_first\"] = np.ones_like(step_data[\"dones\"])\n",
-    "step_data[\"stochastic_state\"] = player.stochastic_state.detach().cpu().numpy()\n",
-    "step_data[\"recurrent_state\"] = player.recurrent_state.detach().cpu().numpy()"
+    "step_data[\"is_first\"] = np.ones_like(step_data[\"dones\"])"
    ]
   },
   {
@@ -215,7 +225,7 @@
     "        for k, v in obs.items():\n",
     "            preprocessed_obs[k] = torch.as_tensor(v[np.newaxis], dtype=torch.float32, device=fabric.device)\n",
     "            if k in cfg.algo.cnn_keys.encoder:\n",
-    "                preprocessed_obs[k] = preprocessed_obs[k] / 255.0\n",
+    "                preprocessed_obs[k] = preprocessed_obs[k] / 255.0 - 0.5\n",
     "        mask = {k: v for k, v in preprocessed_obs.items() if k.startswith(\"mask\")}\n",
     "        if len(mask) == 0:\n",
     "            mask = None\n",
@@ -226,6 +236,8 @@
     "        else:\n",
     "            real_actions = torch.stack([real_act.argmax(dim=-1) for real_act in real_actions], dim=-1).cpu().numpy()\n",
     "\n",
+    "    step_data[\"stochastic_state\"] = player.stochastic_state.detach().cpu().numpy()\n",
+    "    step_data[\"recurrent_state\"] = player.recurrent_state.detach().cpu().numpy()\n",
     "    step_data[\"actions\"] = actions.reshape((1, cfg.env.num_envs, -1))\n",
     "    rb_initial.add(step_data, validate_args=cfg.buffer.validate_args)\n",
     "\n",
@@ -262,8 +274,6 @@
     "    step_data[\"dones\"] = dones.reshape((1, cfg.env.num_envs, -1))\n",
     "    step_data[\"rewards\"] = clip_rewards_fn(rewards)\n",
     "    step_data[\"rewards\"] = clip_rewards_fn(rewards)\n",
-    "    step_data[\"stochastic_state\"] = player.stochastic_state.detach().cpu().numpy()\n",
-    "    step_data[\"recurrent_state\"] = player.recurrent_state.detach().cpu().numpy()\n",
     "    dones_idxes = dones.nonzero()[0].tolist()\n",
     "    reset_envs = len(dones_idxes)\n",
     "    if reset_envs > 0:\n",
@@ -283,7 +293,7 @@
     "        player.init_states(dones_idxes)\n",
     "\n",
     "    ## Save the recurrent and stochastic latent states for the imagination phase\n",
-    "    if i == initial_steps - imagination_steps - 1:\n",
+    "    if i == initial_steps - imagination_steps:\n",
     "        stochastic_state = player.stochastic_state.clone()\n",
     "        recurrent_state = player.recurrent_state.clone()"
    ]
@@ -338,10 +348,11 @@
     "\n",
     "        # imagination step\n",
     "        stochastic_state, recurrent_state = world_model.rssm.imagination(stochastic_state, recurrent_state, actions)\n",
+    "        stochastic_state = stochastic_state.view(1, 1, -1)\n",
     "        # update current state\n",
-    "        imagined_latent_states = torch.cat((stochastic_state.view(1, 1, -1), recurrent_state), -1)\n",
+    "        imagined_latent_states = torch.cat((stochastic_state, recurrent_state), -1)\n",
     "        rec_obs = world_model.observation_model(imagined_latent_states)\n",
-    "        step_data[\"rgb\"] = rec_obs[\"rgb\"].unsqueeze(0).detach().cpu().numpy()\n",
+    "        step_data[\"rgb\"] = rec_obs[\"rgb\"].unsqueeze(0).detach().cpu().numpy() + 0.5\n",
     "        step_data[\"actions\"] = actions.unsqueeze(0).detach().cpu().numpy()\n",
     "        rb_imagination.add(step_data)\n",
     "\n",
@@ -408,6 +419,13 @@
     "frame_one = frames[0]\n",
     "frame_one.save(\"real_obs.gif\", format=\"GIF\", append_images=frames, save_all=True, duration=100, loop=0)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -427,8 +445,7 @@
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
    "version": "3.10.13"
-  },
-  "orig_nbformat": 4
+  }
  },
  "nbformat": 4,
  "nbformat_minor": 2