taherfattahi · AravindXD · Nov 9, 2024 · Nov 9, 2024 · Nov 10, 2024 · Nov 10, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+.DS_Store
+__pycache__
+PPO_logs
+PPO_figs
+rocket
diff --git a/PPO.py b/PPO.py
@@ -5,14 +5,18 @@
 
 ################################## set device ##################################
 print("============================================================================================")
-# set device to cpu or cuda
-device = torch.device('cpu')
-if(torch.cuda.is_available()): 
-    device = torch.device('cuda:0') 
+
+if torch.cuda.is_available():
+    device = torch.device("cuda:0")
     torch.cuda.empty_cache()
-    print("Device set to : " + str(torch.cuda.get_device_name(device)))
+    print("Device set to:", torch.cuda.get_device_name(device))
+elif torch.backends.mps.is_available():
+    device = torch.device("mps")
+    print("Device set to: MPS (Apple Silicon)")
 else:
-    print("Device set to : cpu")
+    device = torch.device("cpu")
+    print("Device set to: CPU")
+
 print("============================================================================================")
 
 
@@ -47,31 +51,31 @@ def __init__(self, state_dim, action_dim, has_continuous_action_space, action_st
         # actor
         if has_continuous_action_space :
             self.actor = nn.Sequential(
-                            nn.Linear(state_dim, 64),
-                            nn.Tanh(),
-                            nn.Linear(64, 64),
-                            nn.Tanh(),
-                            nn.Linear(64, action_dim),
-                            nn.Tanh()
-                        )
+                nn.Linear(state_dim, 64),
+                nn.Tanh(),
+                nn.Linear(64, 64),
+                nn.Tanh(),
+                nn.Linear(64, action_dim),
+                nn.Tanh()
+            )
         else:
             self.actor = nn.Sequential(
-                            nn.Linear(state_dim, 64),
-                            nn.Tanh(),
-                            nn.Linear(64, 64),
-                            nn.Tanh(),
-                            nn.Linear(64, action_dim),
-                            nn.Softmax(dim=-1)
-                        )
+                nn.Linear(state_dim, 64),
+                nn.Tanh(),
+                nn.Linear(64, 64),
+                nn.Tanh(),
+                nn.Linear(64, action_dim),
+                nn.Softmax(dim=-1)
+            )
         # critic
         self.critic = nn.Sequential(
-                        nn.Linear(state_dim, 64),
-                        nn.Tanh(),
-                        nn.Linear(64, 64),
-                        nn.Tanh(),
-                        nn.Linear(64, 1)
-                    )
-        
+            nn.Linear(state_dim, 64),
+            nn.Tanh(),
+            nn.Linear(64, 64),
+            nn.Tanh(),
+            nn.Linear(64, 1)
+        )
+
     def set_action_std(self, new_action_std):
         if self.has_continuous_action_space:
             self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
@@ -137,9 +141,9 @@ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs,
 
         self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
         self.optimizer = torch.optim.Adam([
-                        {'params': self.policy.actor.parameters(), 'lr': lr_actor},
-                        {'params': self.policy.critic.parameters(), 'lr': lr_critic}
-                    ])
+            {'params': self.policy.actor.parameters(), 'lr': lr_actor},
+            {'params': self.policy.critic.parameters(), 'lr': lr_critic}
+        ])
 
         self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
         self.policy_old.load_state_dict(self.policy.state_dict())
@@ -173,29 +177,24 @@ def decay_action_std(self, action_std_decay_rate, min_action_std):
         print("--------------------------------------------------------------------------------------------")
 
     def select_action(self, state):
-
         if self.has_continuous_action_space:
             with torch.no_grad():
                 state = torch.FloatTensor(state).to(device)
                 action, action_logprob, state_val = self.policy_old.act(state)
-
-            self.buffer.states.append(state)
-            self.buffer.actions.append(action)
-            self.buffer.logprobs.append(action_logprob)
-            self.buffer.state_values.append(state_val)
-
-            return action.detach().cpu().numpy().flatten()
+                self.buffer.states.append(state)
+                self.buffer.actions.append(action)
+                self.buffer.logprobs.append(action_logprob)
+                self.buffer.state_values.append(state_val)
+                return action.detach().cpu().numpy().flatten()
         else:
             with torch.no_grad():
                 state = torch.FloatTensor(state).to(device)
                 action, action_logprob, state_val = self.policy_old.act(state)
-
-            self.buffer.states.append(state)
-            self.buffer.actions.append(action)
-            self.buffer.logprobs.append(action_logprob)
-            self.buffer.state_values.append(state_val)
-
-            return action.item()
+                self.buffer.states.append(state)
+                self.buffer.actions.append(action)
+                self.buffer.logprobs.append(action_logprob)
+                self.buffer.state_values.append(state_val)
+                return action.item()
 
     def update(self):
         # Monte Carlo estimate of returns
@@ -252,11 +251,13 @@ def update(self):
 
     def save(self, checkpoint_path):
         torch.save(self.policy_old.state_dict(), checkpoint_path)
-
+        torch.save(self.policy.state_dict(), checkpoint_path)
+
     def load(self, checkpoint_path):
-        self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
-        self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
-
-
-
+        self.policy_old.load_state_dict(
+            torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
+        )
+        self.policy.load_state_dict(
+            torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
+        )
 
diff --git a/PPO_preTrained/RocketLanding/PPO_RocketLanding_0_0.pth b/PPO_preTrained/RocketLanding/PPO_RocketLanding_0_0.pth
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 The goal is to train a reinforcement learning agent to control a rocket to either hover or land safely using the PPO algorithm. The environment simulates physics for the rocket, and the agent learns to make decisions based on the state observations to achieve the task.
 
-https://github.com/user-attachments/assets/2bc71416-0043-4e8d-8f00-cd0d85a834ec
+https://github.com/user-attachments/assets/d1977412-2de8-49c3-b0d1-f602dc28bb61
 
 ![RewardsChart](images/rewards-timesteps.png)
 
@@ -90,22 +90,26 @@ These states provide the necessary information for the agent to understand the r
    source venv/bin/activate  # On Windows use venv\Scripts\activate
    ```
 
-3. **Install Dependencies**
+3. [**Install Dependencies**](requirements.txt)
 
    ```bash
-   pip install torch numpy matplotlib
+   pip install -r requirements.txt
    ```
 
-4. **Ensure CUDA Availability (Optional)**
+4. **Ensure GPU Availability (Optional)**
 
-   If you have a CUDA-compatible GPU and want to utilize it:
+   If you have a CUDA-compatible GPU or Apple Silicon Chip and want to utilize it:
 
    - Install the appropriate CUDA toolkit version compatible with your PyTorch installation.
-   - Verify CUDA availability in PyTorch:
-
+   - Verify GPU availability in PyTorch:
      ```python
-     import torch
-     torch.cuda.is_available()
+      import torch
+      if torch.cuda.is_available():
+         device = torch.device("cuda:0")
+         print("Device set to:", torch.cuda.get_device_name(device))
+      elif torch.backends.mps.is_available():
+         device = torch.device("mps")
+         print("Device set to: MPS (Apple Silicon)")
      ```
 
 ---