Lightning-AI · awaelchli · Jun 21, 2024 · Jun 17, 2024 · Jun 17, 2024 · Jun 21, 2024
@@ -146,7 +146,9 @@ def main(args: argparse.Namespace):
  # Single environment step
  next_obs, reward, done, truncated, info = envs.step(action.cpu().numpy())
  done = torch.logical_or(torch.tensor(done), torch.tensor(truncated))
- rewards[step] = torch.tensor(reward, device=device).view(-1)
+ rewards[step] = torch.tensor(
+ reward, device=device, dtype=torch.float32 if device.type == "mps" else None
+ ).view(-1)
  next_obs, next_done = torch.tensor(next_obs, device=device), done.to(device)
 
  if "final_info" in info: