GFNOrg · josephdviviano · Feb 27, 2024 · Feb 20, 2024 · Feb 22, 2024 · Feb 22, 2024
diff --git a/src/gfn/gflownet/base.py b/src/gfn/gflownet/base.py
@@ -153,8 +153,8 @@ def get_pfs_and_pbs(
         if self.off_policy:
             # We re-use the values calculated in .sample_trajectories().
             if trajectories.estimator_outputs is not None:
-                estimator_outputs = trajectories.estimator_outputs[
-                    ~trajectories.actions.is_dummy
+                estimator_outputs = trajectories.estimator_outputs[  # TODO: This contains `inf` when we use the new `stack_states` method in `samplers.py`!
+                    ~trajectories.actions.is_dummy                   #       And this causes later failures (p_f is not finite).
                 ]
             else:
                 raise Exception(

diff --git a/src/gfn/samplers.py b/src/gfn/samplers.py
@@ -7,7 +7,7 @@
 from gfn.containers import Trajectories
 from gfn.env import Env
 from gfn.modules import GFNModule
-from gfn.states import States
+from gfn.states import States, stack_states
 
 
 class Sampler:
@@ -140,6 +140,8 @@ def sample_trajectories(
             else states.is_sink_state
         )
 
+        trajectories_states_b: List[States] = [states]
+
         trajectories_states: List[TT["n_trajectories", "state_shape", torch.float]] = [
             states.tensor
         ]
@@ -220,9 +222,18 @@ def sample_trajectories(
             dones = dones | new_dones
 
             trajectories_states += [states.tensor]
+            trajectories_states_b += [states]
+
+        # New Method
+        trajectories_states_b = stack_states(trajectories_states_b)
+
+        # Old Method
+        trajectories_states = env.states_from_tensor(
+            torch.stack(trajectories_states, dim=0))
+
+        assert (trajectories_states_b.tensor == trajectories_states.tensor).sum() == trajectories_states.tensor.numel()
+        assert (trajectories_states_b.forward_masks == trajectories_states.forward_masks).sum() == trajectories_states.forward_masks.numel()
 
-        trajectories_states = torch.stack(trajectories_states, dim=0)
-        trajectories_states = env.states_from_tensor(trajectories_states)
         trajectories_actions = env.Actions.stack(trajectories_actions)
         trajectories_logprobs = torch.stack(trajectories_logprobs, dim=0)
 

diff --git a/src/gfn/states.py b/src/gfn/states.py
@@ -3,7 +3,7 @@
 from abc import ABC, abstractmethod
 from copy import deepcopy
 from math import prod
-from typing import Callable, ClassVar, Optional, Sequence, cast
+from typing import Callable, ClassVar, Optional, Sequence, List, cast
 
 import torch
 from torchtyping import TensorType as TT
@@ -446,3 +446,20 @@ def init_forward_masks(self, set_ones: bool = True):
             self.forward_masks = torch.ones(shape).bool()
         else:
             self.forward_masks = torch.zeros(shape).bool()
+
+
+def stack_states(states: List[States]):
+    """Given a list of states, stacks them along a new dimension (0)."""
+    state_example = states[0]  # We assume all elems of `states` are the same.
+
+    stacked_states = state_example.from_batch_shape((0, 0))  # Empty.
+    stacked_states.tensor = torch.stack([s.tensor for s in states], dim=0)
+    if state_example._log_rewards:
+        stacked_states._log_rewards = torch.stack([s._log_rewards for s in states], dim=0)
+    stacked_states.forward_masks = torch.stack([s.forward_masks for s in states], dim=0)
+    stacked_states.backward_masks = torch.stack([s.backward_masks for s in states], dim=0)
+
+    # Adds the trajectory dimension.
+    stacked_states.batch_shape = (stacked_states.tensor.shape[0],) + state_example.batch_shape
+
+    return stacked_states