[BugFix] Fix discrete SAC log-prob (#1750)

Co-authored-by: Matteo Bettini <55539777+matteobettini@users.noreply.github.com>
pytorch · Dec 17, 2023 · 0e02132 · 0e02132
1 parent 08f0bed
commit 0e02132
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 7 deletions.
diff --git a/test/test_tensordictmodules.py b/test/test_tensordictmodules.py
@@ -1892,9 +1892,7 @@ def call(data, params):
             with params.to_module(training_model):
                 return training_model(data)
 
-        assert vmap(call, (None, 0))(data, params).shape == torch.Size(
-            (2, 50, 11)
-        )
+        assert vmap(call, (None, 0))(data, params).shape == torch.Size((2, 50, 11))
 
 
 class TestGRUModule:
@@ -2221,9 +2219,7 @@ def call(data, params):
             with params.to_module(training_model):
                 return training_model(data)
 
-        assert vmap(call, (None, 0))(data, params).shape == torch.Size(
-            (2, 50, 11)
-        )
+        assert vmap(call, (None, 0))(data, params).shape == torch.Size((2, 50, 11))
 
 
 def test_safe_specs():

diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
@@ -1202,7 +1202,7 @@ def _actor_loss(
         with self.actor_network_params.to_module(self.actor_network):
             dist = self.actor_network.get_dist(tensordict.clone(False))
         prob = dist.probs
-        log_prob = prob.clamp_min(torch.finfo(prob.dtype).resolution)
+        log_prob = dist.logits
 
         td_q = tensordict.select(*self.qvalue_network.in_keys)