Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhanced with New Checkpoints, Apple Silicon Support, and Improved Logging #1

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.DS_Store
__pycache__
PPO_logs
PPO_figs
rocket
103 changes: 52 additions & 51 deletions PPO.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@

################################## set device ##################################
print("============================================================================================")
# set device to cpu or cuda
device = torch.device('cpu')
if(torch.cuda.is_available()):
device = torch.device('cuda:0')

if torch.cuda.is_available():
device = torch.device("cuda:0")
torch.cuda.empty_cache()
print("Device set to : " + str(torch.cuda.get_device_name(device)))
print("Device set to:", torch.cuda.get_device_name(device))
elif torch.backends.mps.is_available():
device = torch.device("mps")
print("Device set to: MPS (Apple Silicon)")
else:
print("Device set to : cpu")
device = torch.device("cpu")
print("Device set to: CPU")

print("============================================================================================")


Expand Down Expand Up @@ -47,31 +51,31 @@ def __init__(self, state_dim, action_dim, has_continuous_action_space, action_st
# actor
if has_continuous_action_space :
self.actor = nn.Sequential(
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 64),
nn.Tanh(),
nn.Linear(64, action_dim),
nn.Tanh()
)
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 64),
nn.Tanh(),
nn.Linear(64, action_dim),
nn.Tanh()
)
else:
self.actor = nn.Sequential(
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 64),
nn.Tanh(),
nn.Linear(64, action_dim),
nn.Softmax(dim=-1)
)
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 64),
nn.Tanh(),
nn.Linear(64, action_dim),
nn.Softmax(dim=-1)
)
# critic
self.critic = nn.Sequential(
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 64),
nn.Tanh(),
nn.Linear(64, 1)
)
nn.Linear(state_dim, 64),
nn.Tanh(),
nn.Linear(64, 64),
nn.Tanh(),
nn.Linear(64, 1)
)

def set_action_std(self, new_action_std):
if self.has_continuous_action_space:
self.action_var = torch.full((self.action_dim,), new_action_std * new_action_std).to(device)
Expand Down Expand Up @@ -137,9 +141,9 @@ def __init__(self, state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs,

self.policy = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
self.optimizer = torch.optim.Adam([
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
{'params': self.policy.critic.parameters(), 'lr': lr_critic}
])
{'params': self.policy.actor.parameters(), 'lr': lr_actor},
{'params': self.policy.critic.parameters(), 'lr': lr_critic}
])

self.policy_old = ActorCritic(state_dim, action_dim, has_continuous_action_space, action_std_init).to(device)
self.policy_old.load_state_dict(self.policy.state_dict())
Expand Down Expand Up @@ -173,29 +177,24 @@ def decay_action_std(self, action_std_decay_rate, min_action_std):
print("--------------------------------------------------------------------------------------------")

def select_action(self, state):

if self.has_continuous_action_space:
with torch.no_grad():
state = torch.FloatTensor(state).to(device)
action, action_logprob, state_val = self.policy_old.act(state)

self.buffer.states.append(state)
self.buffer.actions.append(action)
self.buffer.logprobs.append(action_logprob)
self.buffer.state_values.append(state_val)

return action.detach().cpu().numpy().flatten()
self.buffer.states.append(state)
self.buffer.actions.append(action)
self.buffer.logprobs.append(action_logprob)
self.buffer.state_values.append(state_val)
return action.detach().cpu().numpy().flatten()
else:
with torch.no_grad():
state = torch.FloatTensor(state).to(device)
action, action_logprob, state_val = self.policy_old.act(state)

self.buffer.states.append(state)
self.buffer.actions.append(action)
self.buffer.logprobs.append(action_logprob)
self.buffer.state_values.append(state_val)

return action.item()
self.buffer.states.append(state)
self.buffer.actions.append(action)
self.buffer.logprobs.append(action_logprob)
self.buffer.state_values.append(state_val)
return action.item()

def update(self):
# Monte Carlo estimate of returns
Expand Down Expand Up @@ -252,11 +251,13 @@ def update(self):

def save(self, checkpoint_path):
torch.save(self.policy_old.state_dict(), checkpoint_path)

torch.save(self.policy.state_dict(), checkpoint_path)

def load(self, checkpoint_path):
self.policy_old.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))
self.policy.load_state_dict(torch.load(checkpoint_path, map_location=lambda storage, loc: storage))



self.policy_old.load_state_dict(
torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
)
self.policy.load_state_dict(
torch.load(checkpoint_path, map_location=lambda storage, loc: storage)
)

Binary file not shown.
22 changes: 13 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

The goal is to train a reinforcement learning agent to control a rocket to either hover or land safely using the PPO algorithm. The environment simulates physics for the rocket, and the agent learns to make decisions based on the state observations to achieve the task.

https://github.com/user-attachments/assets/2bc71416-0043-4e8d-8f00-cd0d85a834ec
https://github.com/user-attachments/assets/d1977412-2de8-49c3-b0d1-f602dc28bb61

![RewardsChart](images/rewards-timesteps.png)

Expand Down Expand Up @@ -90,22 +90,26 @@ These states provide the necessary information for the agent to understand the r
source venv/bin/activate # On Windows use venv\Scripts\activate
```

3. **Install Dependencies**
3. [**Install Dependencies**](requirements.txt)

```bash
pip install torch numpy matplotlib
pip install -r requirements.txt
```

4. **Ensure CUDA Availability (Optional)**
4. **Ensure GPU Availability (Optional)**

If you have a CUDA-compatible GPU and want to utilize it:
If you have a CUDA-compatible GPU or Apple Silicon Chip and want to utilize it:

- Install the appropriate CUDA toolkit version compatible with your PyTorch installation.
- Verify CUDA availability in PyTorch:

- Verify GPU availability in PyTorch:
```python
import torch
torch.cuda.is_available()
import torch
if torch.cuda.is_available():
device = torch.device("cuda:0")
print("Device set to:", torch.cuda.get_device_name(device))
elif torch.backends.mps.is_available():
device = torch.device("mps")
print("Device set to: MPS (Apple Silicon)")
```

---
Expand Down
Loading