diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ea970c265..b45ae3192 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -32,8 +32,6 @@ jobs:
         pip install .[extra,tests,docs]
         # Use headless version
         pip install opencv-python-headless
-        # Tmp fix: ROM missing in the newest atari-py version
-        pip install atari-py==0.2.5
     - name: Build the doc
       run: |
         make doc
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 63f9eafa0..45ca8f56a 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -7,8 +7,6 @@ type-check:
 pytest:
   script:
   - python --version
-  # Fix to get atari ROMs
-  - pip install atari-py==0.2.5
   # MKL_THREADING_LAYER=GNU to avoid MKL_THREADING_LAYER=INTEL incompatibility error
   - MKL_THREADING_LAYER=GNU make pytest
 
diff --git a/README.md b/README.md
index 54577e452..2a0701c1a 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ Documentation is available online: [https://sb3-contrib.readthedocs.io/](https:/
 **Note:** Stable-Baselines3 supports PyTorch >= 1.8.1.
 
 ### Prerequisites
-Stable Baselines3 requires python 3.7+.
+Stable Baselines3 requires Python 3.7+.
 
 #### Windows 10
 
diff --git a/docs/guide/callbacks.rst b/docs/guide/callbacks.rst
index 279664171..19bccb22c 100644
--- a/docs/guide/callbacks.rst
+++ b/docs/guide/callbacks.rst
@@ -174,7 +174,7 @@ and optionally a prefix for the checkpoints (``rl_model`` by default).
     checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/',
                                              name_prefix='rl_model')
 
-    model = SAC('MlpPolicy', 'Pendulum-v0')
+    model = SAC('MlpPolicy', 'Pendulum-v1')
     model.learn(2000, callback=checkpoint_callback)
 
 
@@ -206,13 +206,13 @@ It will save the best model if ``best_model_save_path`` folder is specified and
     from stable_baselines3.common.callbacks import EvalCallback
 
     # Separate evaluation env
-    eval_env = gym.make('Pendulum-v0')
+    eval_env = gym.make('Pendulum-v1')
     # Use deterministic actions for evaluation
     eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/',
                                  log_path='./logs/', eval_freq=500,
                                  deterministic=True, render=False)
 
-    model = SAC('MlpPolicy', 'Pendulum-v0')
+    model = SAC('MlpPolicy', 'Pendulum-v1')
     model.learn(5000, callback=eval_callback)
 
 
@@ -234,13 +234,13 @@ Alternatively, you can pass directly a list of callbacks to the ``learn()`` meth
 
     checkpoint_callback = CheckpointCallback(save_freq=1000, save_path='./logs/')
     # Separate evaluation env
-    eval_env = gym.make('Pendulum-v0')
+    eval_env = gym.make('Pendulum-v1')
     eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/best_model',
                                  log_path='./logs/results', eval_freq=500)
     # Create the callback list
     callback = CallbackList([checkpoint_callback, eval_callback])
 
-    model = SAC('MlpPolicy', 'Pendulum-v0')
+    model = SAC('MlpPolicy', 'Pendulum-v1')
     # Equivalent to:
     # model.learn(5000, callback=[checkpoint_callback, eval_callback])
     model.learn(5000, callback=callback)
@@ -263,12 +263,12 @@ It must be used with the :ref:`EvalCallback` and use the event triggered by a ne
     from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
 
     # Separate evaluation env
-    eval_env = gym.make('Pendulum-v0')
+    eval_env = gym.make('Pendulum-v1')
     # Stop training when the model reaches the reward threshold
     callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=-200, verbose=1)
     eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)
 
-    model = SAC('MlpPolicy', 'Pendulum-v0', verbose=1)
+    model = SAC('MlpPolicy', 'Pendulum-v1', verbose=1)
     # Almost infinite number of timesteps, but the training will stop
     # early as soon as the reward threshold is reached
     model.learn(int(1e10), callback=eval_callback)
@@ -299,7 +299,7 @@ An :ref:`EventCallback` that will trigger its child callback every ``n_steps`` t
   checkpoint_on_event = CheckpointCallback(save_freq=1, save_path='./logs/')
   event_callback = EveryNTimesteps(n_steps=500, callback=checkpoint_on_event)
 
-  model = PPO('MlpPolicy', 'Pendulum-v0', verbose=1)
+  model = PPO('MlpPolicy', 'Pendulum-v1', verbose=1)
 
   model.learn(int(2e4), callback=event_callback)
 
@@ -328,7 +328,7 @@ and in total for ``max_episodes * n_envs`` episodes.
     # Stops training when the model reaches the maximum number of episodes
     callback_max_episodes = StopTrainingOnMaxEpisodes(max_episodes=5, verbose=1)
 
-    model = A2C('MlpPolicy', 'Pendulum-v0', verbose=1)
+    model = A2C('MlpPolicy', 'Pendulum-v1', verbose=1)
     # Almost infinite number of timesteps, but the training will stop
     # early as soon as the max number of episodes is reached
     model.learn(int(1e10), callback=callback_max_episodes)
diff --git a/docs/guide/custom_policy.rst b/docs/guide/custom_policy.rst
index d17f913c5..1b8f9fb7f 100644
--- a/docs/guide/custom_policy.rst
+++ b/docs/guide/custom_policy.rst
@@ -407,5 +407,5 @@ you only need to specify ``net_arch=[256, 256]`` (here, two hidden layers of 256
   # Custom critic architecture with two layers of 400 and 300 units
   policy_kwargs = dict(net_arch=dict(pi=[64, 64], qf=[400, 300]))
   # Create the agent
-  model = SAC("MlpPolicy", "Pendulum-v0", policy_kwargs=policy_kwargs, verbose=1)
+  model = SAC("MlpPolicy", "Pendulum-v1", policy_kwargs=policy_kwargs, verbose=1)
   model.learn(5000)
diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
index 733279bc3..a5b56b249 100644
--- a/docs/guide/examples.rst
+++ b/docs/guide/examples.rst
@@ -321,7 +321,7 @@ Atari Games
 
 Training a RL agent on Atari games is straightforward thanks to ``make_atari_env`` helper function.
 It will do `all the preprocessing <https://danieltakeshi.github.io/2016/11/25/frame-skipping-and-preprocessing-for-deep-q-networks-on-atari-2600-games/>`_
-and multiprocessing for you.
+and multiprocessing for you. To install the Atari environments, run the command ``pip install gym[atari, accept-rom-license]`` to install the Atari environments and ROMs, or install Stable Baselines3 with ``pip install stable-baselines3[extra]`` to install this and other optional dependencies.
 
 .. image:: ../_static/img/colab-badge.svg
    :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/sb3/atari_games.ipynb
@@ -564,7 +564,7 @@ Behind the scene, SB3 uses an :ref:`EvalCallback <callbacks>`.
 
   # Create the model, the training environment
   # and the test environment (for evaluation)
-  model = SAC('MlpPolicy', 'Pendulum-v0', verbose=1,
+  model = SAC('MlpPolicy', 'Pendulum-v1', verbose=1,
               learning_rate=1e-3, create_eval_env=True)
 
   # Evaluate the model every 1000 steps on 5 test episodes
diff --git a/docs/guide/export.rst b/docs/guide/export.rst
index d6fe72a78..b6884c19d 100644
--- a/docs/guide/export.rst
+++ b/docs/guide/export.rst
@@ -62,7 +62,7 @@ For PPO, assuming a shared feature extactor.
         action_hidden, value_hidden = self.extractor(observation)
         return self.action_net(action_hidden), self.value_net(value_hidden)
 
-  # Example: model = PPO("MlpPolicy", "Pendulum-v0")
+  # Example: model = PPO("MlpPolicy", "Pendulum-v1")
   model = PPO.load("PathToTrainedModel.zip")
   model.policy.to("cpu")
   onnxable_model = OnnxablePolicy(model.policy.mlp_extractor, model.policy.action_net, model.policy.value_net)
diff --git a/docs/guide/tensorboard.rst b/docs/guide/tensorboard.rst
index 0929b9eef..18f1cebc4 100644
--- a/docs/guide/tensorboard.rst
+++ b/docs/guide/tensorboard.rst
@@ -61,7 +61,7 @@ Here is a simple example on how to log both additional tensor or arbitrary scala
     from stable_baselines3 import SAC
     from stable_baselines3.common.callbacks import BaseCallback
 
-    model = SAC("MlpPolicy", "Pendulum-v0", tensorboard_log="/tmp/sac/", verbose=1)
+    model = SAC("MlpPolicy", "Pendulum-v1", tensorboard_log="/tmp/sac/", verbose=1)
 
 
     class TensorboardCallback(BaseCallback):
@@ -104,7 +104,7 @@ Here is an example of how to render an image to TensorBoard at regular intervals
     from stable_baselines3.common.callbacks import BaseCallback
     from stable_baselines3.common.logger import Image
 
-    model = SAC("MlpPolicy", "Pendulum-v0", tensorboard_log="/tmp/sac/", verbose=1)
+    model = SAC("MlpPolicy", "Pendulum-v1", tensorboard_log="/tmp/sac/", verbose=1)
 
 
     class ImageRecorderCallback(BaseCallback):
@@ -141,7 +141,7 @@ Here is an example of how to store a plot in TensorBoard at regular intervals:
     from stable_baselines3.common.callbacks import BaseCallback
     from stable_baselines3.common.logger import Figure
 
-    model = SAC("MlpPolicy", "Pendulum-v0", tensorboard_log="/tmp/sac/", verbose=1)
+    model = SAC("MlpPolicy", "Pendulum-v1", tensorboard_log="/tmp/sac/", verbose=1)
 
 
     class FigureRecorderCallback(BaseCallback):
@@ -251,7 +251,7 @@ can get direct access to the underlying SummaryWriter in a callback:
 
 
 
-    model = SAC("MlpPolicy", "Pendulum-v0", tensorboard_log="/tmp/sac/", verbose=1)
+    model = SAC("MlpPolicy", "Pendulum-v1", tensorboard_log="/tmp/sac/", verbose=1)
 
 
     class SummaryWriterCallback(BaseCallback):
diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index 39830590d..07a0cf339 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -119,6 +119,7 @@ Release 1.3.0 (2021-10-23)
 
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
+- Support for Python 3.6 was removed.
 - ``sde_net_arch`` argument in policies is deprecated and will be removed in a future version.
 - ``_get_latent`` (``ActorCriticPolicy``) was removed
 - All logging keys now use underscores instead of spaces (@timokau). Concretely this changes:
@@ -127,6 +128,7 @@ Breaking Changes:
     - ``rollout/exploration rate`` to ``rollout/exploration_rate`` and
     - ``rollout/success rate`` to ``rollout/success_rate``.
 
+
 New Features:
 ^^^^^^^^^^^^^
 - Added methods ``get_distribution`` and ``predict_values`` for ``ActorCriticPolicy`` for A2C/PPO/TRPO (@cyprienc)
@@ -145,6 +147,7 @@ Bug Fixes:
 
 Deprecations:
 ^^^^^^^^^^^^^
+- Switched minimum Gym version to 0.21.0.
 
 Others:
 ^^^^^^^
diff --git a/docs/modules/ddpg.rst b/docs/modules/ddpg.rst
index 24d265f00..c484a1c93 100644
--- a/docs/modules/ddpg.rst
+++ b/docs/modules/ddpg.rst
@@ -67,7 +67,7 @@ This example is only to demonstrate the use of the library and its functions, an
   from stable_baselines3 import DDPG
   from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
 
-  env = gym.make("Pendulum-v0")
+  env = gym.make("Pendulum-v1")
 
   # The noise objects for DDPG
   n_actions = env.action_space.shape[-1]
diff --git a/docs/modules/sac.rst b/docs/modules/sac.rst
index a1156fd8c..e7f9057d5 100644
--- a/docs/modules/sac.rst
+++ b/docs/modules/sac.rst
@@ -73,7 +73,7 @@ This example is only to demonstrate the use of the library and its functions, an
 
   from stable_baselines3 import SAC
 
-  env = gym.make("Pendulum-v0")
+  env = gym.make("Pendulum-v1")
 
   model = SAC("MlpPolicy", env, verbose=1)
   model.learn(total_timesteps=10000, log_interval=4)
diff --git a/docs/modules/td3.rst b/docs/modules/td3.rst
index 3bc93d7a9..d039ae71c 100644
--- a/docs/modules/td3.rst
+++ b/docs/modules/td3.rst
@@ -67,7 +67,7 @@ This example is only to demonstrate the use of the library and its functions, an
   from stable_baselines3 import TD3
   from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
 
-  env = gym.make("Pendulum-v0")
+  env = gym.make("Pendulum-v1")
 
   # The noise objects for TD3
   n_actions = env.action_space.shape[-1]
diff --git a/setup.py b/setup.py
index 3e0f788af..eabf30c66 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,7 @@
     packages=[package for package in find_packages() if package.startswith("stable_baselines3")],
     package_data={"stable_baselines3": ["py.typed", "version.txt"]},
     install_requires=[
-        "gym>=0.17,<0.20",  # gym 0.20 breaks atari-py behavior
+        "gym>=0.21",  # Remember to also update gym version in "extra" below when this changes
         "numpy",
         "torch>=1.8.1",
         # For saving models
@@ -116,7 +116,7 @@
             # For render
             "opencv-python",
             # For atari games,
-            "atari_py==0.2.6",
+            "gym[atari,accept-rom-license]>=0.21",
             "pillow",
             # Tensorboard support
             "tensorboard>=2.2.0",
diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
index 56fc14109..e1f6d3869 100644
--- a/tests/test_callbacks.py
+++ b/tests/test_callbacks.py
@@ -75,7 +75,7 @@ def test_callbacks(tmp_path, model_class):
     if model_class in [A2C, PPO]:
         max_episodes = 1
         n_envs = 2
-        # Pendulum-v0 has a timelimit of 200 timesteps
+        # Pendulum-v1 has a timelimit of 200 timesteps
         max_episode_length = 200
         envs = make_vec_env(env_name, n_envs=n_envs, seed=0)
 
@@ -99,7 +99,7 @@ def select_env(model_class) -> str:
     if model_class is DQN:
         return "CartPole-v0"
     else:
-        return "Pendulum-v0"
+        return "Pendulum-v1"
 
 
 def test_eval_callback_vec_env():
diff --git a/tests/test_custom_policy.py b/tests/test_custom_policy.py
index 02fa7b4a0..e2d98fbd5 100644
--- a/tests/test_custom_policy.py
+++ b/tests/test_custom_policy.py
@@ -25,7 +25,7 @@ def test_flexible_mlp(model_class, net_arch):
 @pytest.mark.parametrize("net_arch", [[], [4], [4, 4], dict(qf=[8], pi=[8, 4])])
 @pytest.mark.parametrize("model_class", [SAC, TD3])
 def test_custom_offpolicy(model_class, net_arch):
-    _ = model_class("MlpPolicy", "Pendulum-v0", policy_kwargs=dict(net_arch=net_arch), learning_starts=100).learn(300)
+    _ = model_class("MlpPolicy", "Pendulum-v1", policy_kwargs=dict(net_arch=net_arch), learning_starts=100).learn(300)
 
 
 @pytest.mark.parametrize("model_class", [A2C, PPO, SAC, TD3])
@@ -38,12 +38,12 @@ def test_custom_optimizer(model_class, optimizer_kwargs):
         kwargs = dict(n_steps=64)
 
     policy_kwargs = dict(optimizer_class=th.optim.AdamW, optimizer_kwargs=optimizer_kwargs, net_arch=[32])
-    _ = model_class("MlpPolicy", "Pendulum-v0", policy_kwargs=policy_kwargs, **kwargs).learn(300)
+    _ = model_class("MlpPolicy", "Pendulum-v1", policy_kwargs=policy_kwargs, **kwargs).learn(300)
 
 
 def test_tf_like_rmsprop_optimizer():
     policy_kwargs = dict(optimizer_class=RMSpropTFLike, net_arch=[32])
-    _ = A2C("MlpPolicy", "Pendulum-v0", policy_kwargs=policy_kwargs).learn(500)
+    _ = A2C("MlpPolicy", "Pendulum-v1", policy_kwargs=policy_kwargs).learn(500)
 
 
 def test_dqn_custom_policy():
diff --git a/tests/test_deterministic.py b/tests/test_deterministic.py
index 3712fc21a..4c92d269f 100644
--- a/tests/test_deterministic.py
+++ b/tests/test_deterministic.py
@@ -13,7 +13,7 @@ def test_deterministic_training_common(algo):
     rewards = [[], []]
     # Smaller network
     kwargs = {"policy_kwargs": dict(net_arch=[64])}
-    env_id = "Pendulum-v0"
+    env_id = "Pendulum-v1"
     if algo in [TD3, SAC]:
         kwargs.update({"action_noise": NormalActionNoise(0.0, 0.1), "learning_starts": 100, "train_freq": 4})
     else:
diff --git a/tests/test_distributions.py b/tests/test_distributions.py
index b894dd478..3652b1850 100644
--- a/tests/test_distributions.py
+++ b/tests/test_distributions.py
@@ -43,7 +43,7 @@ def test_squashed_gaussian(model_class):
     """
     Test run with squashed Gaussian (notably entropy computation)
     """
-    model = model_class("MlpPolicy", "Pendulum-v0", use_sde=True, n_steps=64, policy_kwargs=dict(squash_output=True))
+    model = model_class("MlpPolicy", "Pendulum-v1", use_sde=True, n_steps=64, policy_kwargs=dict(squash_output=True))
     model.learn(500)
 
     gaussian_mean = th.rand(N_SAMPLES, N_ACTIONS)
@@ -57,10 +57,10 @@ def test_squashed_gaussian(model_class):
 @pytest.fixture()
 def dummy_model_distribution_obs_and_actions() -> Tuple[A2C, np.array, np.array]:
     """
-    Fixture creating a Pendulum-v0 gym env, an A2C model and sampling 10 random observations and actions from the env
+    Fixture creating a Pendulum-v1 gym env, an A2C model and sampling 10 random observations and actions from the env
     :return: A2C model, random observations, random actions
     """
-    env = gym.make("Pendulum-v0")
+    env = gym.make("Pendulum-v1")
     model = A2C("MlpPolicy", env, seed=23)
     random_obs = np.array([env.observation_space.sample() for _ in range(10)])
     random_actions = np.array([env.action_space.sample() for _ in range(10)])
diff --git a/tests/test_env_checker.py b/tests/test_env_checker.py
index 6364bd4ba..0b0a82d8f 100644
--- a/tests/test_env_checker.py
+++ b/tests/test_env_checker.py
@@ -11,14 +11,14 @@ class ActionDictTestEnv(gym.Env):
     observation_space = Box(low=-1.0, high=2.0, shape=(3,), dtype=np.float32)
 
     def step(self, action):
-        observation = np.array([1.0, 1.5, 0.5])
+        observation = np.array([1.0, 1.5, 0.5], dtype=self.observation_space.dtype)
         reward = 1
         done = True
         info = {}
         return observation, reward, done, info
 
     def reset(self):
-        return np.array([1.0, 1.5, 0.5])
+        return np.array([1.0, 1.5, 0.5], dtype=self.observation_space.dtype)
 
     def render(self, mode="human"):
         pass
diff --git a/tests/test_envs.py b/tests/test_envs.py
index d0434773a..b859ed703 100644
--- a/tests/test_envs.py
+++ b/tests/test_envs.py
@@ -27,7 +27,7 @@
 ]
 
 
-@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v0"])
+@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"])
 def test_env(env_id):
     """
     Check that environmnent integrated in Gym pass the test.
@@ -38,9 +38,9 @@ def test_env(env_id):
     with pytest.warns(None) as record:
         check_env(env)
 
-    # Pendulum-v0 will produce a warning because the action space is
+    # Pendulum-v1 will produce a warning because the action space is
     # in [-2, 2] and not [-1, 1]
-    if env_id == "Pendulum-v0":
+    if env_id == "Pendulum-v1":
         assert len(record) == 1
     else:
         # The other environments must pass without warning
diff --git a/tests/test_predict.py b/tests/test_predict.py
index 436547b83..853f4d11d 100644
--- a/tests/test_predict.py
+++ b/tests/test_predict.py
@@ -43,7 +43,7 @@ def test_auto_wrap(model_class):
     if model_class is DQN:
         env_name = "CartPole-v0"
     else:
-        env_name = "Pendulum-v0"
+        env_name = "Pendulum-v1"
     env = gym.make(env_name)
     eval_env = gym.make(env_name)
     model = model_class("MlpPolicy", env)
@@ -51,7 +51,7 @@ def test_auto_wrap(model_class):
 
 
 @pytest.mark.parametrize("model_class", MODEL_LIST)
-@pytest.mark.parametrize("env_id", ["Pendulum-v0", "CartPole-v1"])
+@pytest.mark.parametrize("env_id", ["Pendulum-v1", "CartPole-v1"])
 @pytest.mark.parametrize("device", ["cpu", "cuda", "auto"])
 def test_predict(model_class, env_id, device):
     if device == "cuda" and not th.cuda.is_available():
diff --git a/tests/test_run.py b/tests/test_run.py
index 67b31c482..223776dfb 100644
--- a/tests/test_run.py
+++ b/tests/test_run.py
@@ -17,7 +17,7 @@ def test_deterministic_pg(model_class, action_noise):
     """
     model = model_class(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         policy_kwargs=dict(net_arch=[64, 64]),
         learning_starts=100,
         verbose=1,
@@ -28,13 +28,13 @@ def test_deterministic_pg(model_class, action_noise):
     model.learn(total_timesteps=300, eval_freq=250)
 
 
-@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
+@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
 def test_a2c(env_id):
     model = A2C("MlpPolicy", env_id, seed=0, policy_kwargs=dict(net_arch=[16]), verbose=1, create_eval_env=True)
     model.learn(total_timesteps=1000, eval_freq=500)
 
 
-@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"])
+@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"])
 @pytest.mark.parametrize("clip_range_vf", [None, 0.2, -0.2])
 def test_ppo(env_id, clip_range_vf):
     if clip_range_vf is not None and clip_range_vf < 0:
@@ -67,7 +67,7 @@ def test_ppo(env_id, clip_range_vf):
 def test_sac(ent_coef):
     model = SAC(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         policy_kwargs=dict(net_arch=[64, 64]),
         learning_starts=100,
         verbose=1,
@@ -84,7 +84,7 @@ def test_n_critics(n_critics):
     # Test SAC with different number of critics, for TD3, n_critics=1 corresponds to DDPG
     model = SAC(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         policy_kwargs=dict(net_arch=[64, 64], n_critics=n_critics),
         learning_starts=100,
         buffer_size=10000,
@@ -112,7 +112,7 @@ def test_train_freq(tmp_path, train_freq):
 
     model = SAC(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         policy_kwargs=dict(net_arch=[64, 64], n_critics=1),
         learning_starts=100,
         buffer_size=10000,
@@ -133,7 +133,7 @@ def test_train_freq_fail(train_freq):
     with pytest.raises(ValueError):
         model = SAC(
             "MlpPolicy",
-            "Pendulum-v0",
+            "Pendulum-v1",
             policy_kwargs=dict(net_arch=[64, 64], n_critics=1),
             learning_starts=100,
             buffer_size=10000,
@@ -147,7 +147,7 @@ def test_train_freq_fail(train_freq):
 def test_offpolicy_multi_env(model_class):
     kwargs = {}
     if model_class in [SAC, TD3, DDPG]:
-        env_id = "Pendulum-v0"
+        env_id = "Pendulum-v1"
         policy_kwargs = dict(net_arch=[64], n_critics=1)
         # Check auto-conversion to VectorizedActionNoise
         kwargs = dict(action_noise=NormalActionNoise(np.zeros(1), 0.1 * np.ones(1)))
diff --git a/tests/test_save_load.py b/tests/test_save_load.py
index 69a3f4816..7d810c70e 100644
--- a/tests/test_save_load.py
+++ b/tests/test_save_load.py
@@ -269,7 +269,7 @@ def test_exclude_include_saved_params(tmp_path, model_class):
 
 
 def test_save_load_pytorch_var(tmp_path):
-    model = SAC("MlpPolicy", "Pendulum-v0", seed=3, policy_kwargs=dict(net_arch=[64], n_critics=1))
+    model = SAC("MlpPolicy", "Pendulum-v1", seed=3, policy_kwargs=dict(net_arch=[64], n_critics=1))
     model.learn(200)
     save_path = str(tmp_path / "sac_pendulum")
     model.save(save_path)
@@ -286,7 +286,7 @@ def test_save_load_pytorch_var(tmp_path):
     assert not th.allclose(log_ent_coef_before, log_ent_coef_after)
 
     # With a fixed entropy coef
-    model = SAC("MlpPolicy", "Pendulum-v0", seed=3, ent_coef=0.01, policy_kwargs=dict(net_arch=[64], n_critics=1))
+    model = SAC("MlpPolicy", "Pendulum-v1", seed=3, ent_coef=0.01, policy_kwargs=dict(net_arch=[64], n_critics=1))
     model.learn(200)
     save_path = str(tmp_path / "sac_pendulum")
     model.save(save_path)
diff --git a/tests/test_sde.py b/tests/test_sde.py
index 17ac1501d..0a650a57c 100644
--- a/tests/test_sde.py
+++ b/tests/test_sde.py
@@ -65,7 +65,7 @@ def test_state_dependent_noise(model_class, use_expln):
     kwargs = {"learning_starts": 0} if model_class == SAC else {"n_steps": 64}
     model = model_class(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         use_sde=True,
         seed=None,
         create_eval_env=True,
diff --git a/tests/test_spaces.py b/tests/test_spaces.py
index deb09c4e4..54994b2b5 100644
--- a/tests/test_spaces.py
+++ b/tests/test_spaces.py
@@ -53,10 +53,10 @@ def test_identity_spaces(model_class, env):
 
 
 @pytest.mark.parametrize("model_class", [A2C, DDPG, DQN, PPO, SAC, TD3])
-@pytest.mark.parametrize("env", ["Pendulum-v0", "CartPole-v1"])
+@pytest.mark.parametrize("env", ["Pendulum-v1", "CartPole-v1"])
 def test_action_spaces(model_class, env):
     if model_class in [SAC, DDPG, TD3]:
-        supported_action_space = env == "Pendulum-v0"
+        supported_action_space = env == "Pendulum-v1"
     elif model_class == DQN:
         supported_action_space = env == "CartPole-v1"
     elif model_class in [A2C, PPO]:
diff --git a/tests/test_tensorboard.py b/tests/test_tensorboard.py
index 3f755a7aa..20f58b912 100644
--- a/tests/test_tensorboard.py
+++ b/tests/test_tensorboard.py
@@ -7,8 +7,8 @@
 MODEL_DICT = {
     "a2c": (A2C, "CartPole-v1"),
     "ppo": (PPO, "CartPole-v1"),
-    "sac": (SAC, "Pendulum-v0"),
-    "td3": (TD3, "Pendulum-v0"),
+    "sac": (SAC, "Pendulum-v1"),
+    "td3": (TD3, "Pendulum-v1"),
 }
 
 N_STEPS = 100
diff --git a/tests/test_train_eval_mode.py b/tests/test_train_eval_mode.py
index c5eb283b7..1ea2efe67 100644
--- a/tests/test_train_eval_mode.py
+++ b/tests/test_train_eval_mode.py
@@ -172,7 +172,7 @@ def test_dqn_train_with_batch_norm():
 def test_td3_train_with_batch_norm():
     model = TD3(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         policy_kwargs=dict(net_arch=[16, 16], features_extractor_class=FlattenBatchNormDropoutExtractor),
         learning_starts=0,
         tau=0,  # do not copy the target
@@ -219,7 +219,7 @@ def test_td3_train_with_batch_norm():
 def test_sac_train_with_batch_norm():
     model = SAC(
         "MlpPolicy",
-        "Pendulum-v0",
+        "Pendulum-v1",
         policy_kwargs=dict(net_arch=[16, 16], features_extractor_class=FlattenBatchNormDropoutExtractor),
         learning_starts=0,
         tau=0,  # do not copy the target
@@ -257,7 +257,7 @@ def test_sac_train_with_batch_norm():
 
 
 @pytest.mark.parametrize("model_class", [A2C, PPO])
-@pytest.mark.parametrize("env_id", ["Pendulum-v0", "CartPole-v1"])
+@pytest.mark.parametrize("env_id", ["Pendulum-v1", "CartPole-v1"])
 def test_a2c_ppo_train_with_batch_norm(model_class, env_id):
     model = model_class(
         "MlpPolicy",
@@ -281,7 +281,7 @@ def test_offpolicy_collect_rollout_batch_norm(model_class):
     if model_class in [DQN]:
         env_id = "CartPole-v1"
     else:
-        env_id = "Pendulum-v0"
+        env_id = "Pendulum-v1"
 
     clone_helper = CLONE_HELPERS[model_class]
 
@@ -308,7 +308,7 @@ def test_offpolicy_collect_rollout_batch_norm(model_class):
 
 
 @pytest.mark.parametrize("model_class", [A2C, PPO])
-@pytest.mark.parametrize("env_id", ["Pendulum-v0", "CartPole-v1"])
+@pytest.mark.parametrize("env_id", ["Pendulum-v1", "CartPole-v1"])
 def test_a2c_ppo_collect_rollouts_with_batch_norm(model_class, env_id):
     model = model_class(
         "MlpPolicy",
@@ -332,7 +332,7 @@ def test_a2c_ppo_collect_rollouts_with_batch_norm(model_class, env_id):
 
 
 @pytest.mark.parametrize("model_class", MODEL_LIST)
-@pytest.mark.parametrize("env_id", ["Pendulum-v0", "CartPole-v1"])
+@pytest.mark.parametrize("env_id", ["Pendulum-v1", "CartPole-v1"])
 def test_predict_with_dropout_batch_norm(model_class, env_id):
     if env_id == "CartPole-v1":
         if model_class in [SAC, TD3]:
diff --git a/tests/test_utils.py b/tests/test_utils.py
index ea497140e..b07bbe931 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -43,7 +43,6 @@ def test_make_vec_env(env_id, n_envs, vec_env_cls, wrapper_class):
 @pytest.mark.parametrize("n_envs", [1, 2])
 @pytest.mark.parametrize("wrapper_kwargs", [None, dict(clip_reward=False, screen_size=60)])
 def test_make_atari_env(env_id, n_envs, wrapper_kwargs):
-    env_id = "BreakoutNoFrameskip-v4"
     env = make_atari_env(env_id, n_envs, wrapper_kwargs=wrapper_kwargs, monitor_dir=None, seed=0)
 
     assert env.num_envs == n_envs
@@ -97,7 +96,7 @@ def test_vec_env_monitor_kwargs():
 
 
 def test_env_auto_monitor_wrap():
-    env = gym.make("Pendulum-v0")
+    env = gym.make("Pendulum-v1")
     model = A2C("MlpPolicy", env)
     assert model.env.env_is_wrapped(Monitor)[0] is True
 
@@ -105,7 +104,7 @@ def test_env_auto_monitor_wrap():
     model = A2C("MlpPolicy", env)
     assert model.env.env_is_wrapped(Monitor)[0] is True
 
-    model = A2C("MlpPolicy", "Pendulum-v0")
+    model = A2C("MlpPolicy", "Pendulum-v1")
     assert model.env.env_is_wrapped(Monitor)[0] is True
 
 
@@ -137,7 +136,7 @@ def test_custom_vec_env(tmp_path):
 
 
 def test_evaluate_policy():
-    model = A2C("MlpPolicy", "Pendulum-v0", seed=0)
+    model = A2C("MlpPolicy", "Pendulum-v1", seed=0)
     n_steps_per_episode, n_eval_episodes = 200, 2
     model.n_callback_calls = 0
 
@@ -167,7 +166,7 @@ def dummy_callback(locals_, _globals):
     assert len(episode_rewards) == n_eval_episodes
 
     # Test that warning is given about no monitor
-    eval_env = gym.make("Pendulum-v0")
+    eval_env = gym.make("Pendulum-v1")
     with pytest.warns(UserWarning):
         _ = evaluate_policy(model, eval_env, n_eval_episodes)
 
@@ -356,7 +355,7 @@ def test_zip_strict():
 
 def test_is_wrapped():
     """Test that is_wrapped correctly detects wraps"""
-    env = gym.make("Pendulum-v0")
+    env = gym.make("Pendulum-v1")
     env = gym.Wrapper(env)
     assert not is_wrapped(env, Monitor)
     monitor_env = Monitor(env)
@@ -373,11 +372,11 @@ def test_ppo_warnings():
 
     # Only 1 step: advantage normalization will return NaN
     with pytest.raises(AssertionError):
-        PPO("MlpPolicy", "Pendulum-v0", n_steps=1)
+        PPO("MlpPolicy", "Pendulum-v1", n_steps=1)
 
     # Truncated mini-batch
     with pytest.warns(UserWarning):
-        PPO("MlpPolicy", "Pendulum-v0", n_steps=6, batch_size=8)
+        PPO("MlpPolicy", "Pendulum-v1", n_steps=6, batch_size=8)
 
 
 def test_get_system_info():
diff --git a/tests/test_vec_normalize.py b/tests/test_vec_normalize.py
index 0136c2657..c3d1d3065 100644
--- a/tests/test_vec_normalize.py
+++ b/tests/test_vec_normalize.py
@@ -16,7 +16,7 @@
     unwrap_vec_normalize,
 )
 
-ENV_ID = "Pendulum-v0"
+ENV_ID = "Pendulum-v1"
 
 
 class DummyRewardEnv(gym.Env):