[BugFix] Fix missing min/max alpha clamps in losses (#2684)

pytorch · Jan 9, 2025 · ed656a1 · ed656a1
1 parent f672c70
commit ed656a1
Show file tree

Hide file tree

Showing 4 changed files with 5 additions and 5 deletions.
diff --git a/torchrl/objectives/cql.py b/torchrl/objectives/cql.py
@@ -892,7 +892,7 @@ def alpha_loss(self, tensordict: TensorDictBase) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         alpha = self.log_alpha.data.exp()
         return alpha

diff --git a/torchrl/objectives/crossq.py b/torchrl/objectives/crossq.py
@@ -677,7 +677,7 @@ def alpha_loss(self, log_prob: Tensor) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         with torch.no_grad():
             alpha = self.log_alpha.exp()

diff --git a/torchrl/objectives/decision_transformer.py b/torchrl/objectives/decision_transformer.py
@@ -171,7 +171,7 @@ def _forward_value_estimator_keys(self, **kwargs):
 
     @property
     def alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         with torch.no_grad():
             alpha = self.log_alpha.exp()

diff --git a/torchrl/objectives/sac.py b/torchrl/objectives/sac.py
@@ -846,7 +846,7 @@ def _alpha_loss(self, log_prob: Tensor) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data.clamp_(self.min_log_alpha, self.max_log_alpha)
         with torch.no_grad():
             alpha = self.log_alpha.exp()
@@ -1374,7 +1374,7 @@ def _alpha_loss(self, log_prob: Tensor) -> Tensor:
 
     @property
     def _alpha(self):
-        if self.min_log_alpha is not None:
+        if self.min_log_alpha is not None or self.max_log_alpha is not None:
             self.log_alpha.data = self.log_alpha.data.clamp(
                 self.min_log_alpha, self.max_log_alpha
             )