alexhernandezgarcia · AlexandraVolokhova · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/.gitignore b/.gitignore
@@ -10,7 +10,6 @@ sh/
 *.txt
 .vscode/
 external/
-playground/
 !requirements.txt
 !docs/requirements-docs.txt
 .DS_Store

diff --git a/config/evaluator/base.yaml b/config/evaluator/base.yaml
@@ -1,5 +1,6 @@
 _target_: gflownet.evaluator.base.BaseEvaluator
 
+reward_sampling_method: rejection
 # config formerly from logger.test
 first_it: True
 period: 100

diff --git a/gflownet/envs/README.md b/gflownet/envs/README.md
@@ -14,7 +14,7 @@ Note that the mask of invalid actions indeed flags _invalid_ actions, as opposed
 
 ## Buffer, train data and test data
 
-A train and a test set can be created at the beginning of training. The train set may be used to sample offline (backward) trajectories. The test set may be used to compute metrics during and after training. These sets may be created in different ways, specificied by the configuration variables `env.buffer.train.type` and `env.buffer.test.type`. Options for the data set `type` are
+A train and a test set can be created at the beginning of training. The train set may be used to sample offline (backward) trajectories. The test set may be used to compute metrics during and after training (e.g. JSD, correlation). These sets may be created in different ways, specificied by the configuration variables `env.buffer.train.type` and `env.buffer.test.type`. Options for the data set `type` are
 
 - `all`: all terminating states in the output space $\mathcal{X}$ will be added - Convenient but only feasible for small, synthetic environments like the hyper-grid.
 - `grid`: a grid of points in the output space $\mathcal{X}$ - Only available in certain environments where obtaining a grid of points is meaningful. This mode also requires specifying the number of points via `env.buffer.<train/test>.n`.
@@ -52,3 +52,12 @@ To use the replay buffer (once enabled) for backward sampling, one can specify `
 :::{tip}
 You can use [MyST](https://myst-parser.readthedocs.io/en/latest/syntax/admonitions.html) in the documentation. This is expected to fail on Github.
 :::
+
+## Evaluator
+Evaluator's parameters define which method is used for sampling from reward (`nested` or `rejection` sampling), and how many points will be sampled `evaluator.n`. evaluator.n_grid is used only for plotting and defines, if applicable, the number of grid points for visualizing KDEs.
+```yaml
+evaluator:
+  reward_sampling_method: nested
+  n_grid: 1000 # number of grid points to visualize KDEs
+  n: 1000 # number of samples from reward and from gfn
+  ```
diff --git a/gflownet/envs/alaninedipeptide.py b/gflownet/envs/alaninedipeptide.py
@@ -7,9 +7,9 @@
 from torchtyping import TensorType
 
 from gflownet.envs.ctorus import ContinuousTorus
-from gflownet.utils.molecule import constants
-from gflownet.utils.molecule.atom_positions_dataset import AtomPositionsDataset
-from gflownet.utils.molecule.conformer_base import ConformerBase
+from gflownet.utils.molecule.constants import AD_FREE_TAS, AD_SMILES
+from gflownet.utils.molecule.datasets import AtomPositionsDataset
+from gflownet.utils.molecule.rdkit_conformer import RDKitConformer
 
 
 class AlanineDipeptide(ContinuousTorus):
@@ -26,9 +26,7 @@ def __init__(
             path_to_dataset, url_to_dataset
         )
         atom_positions = self.atom_positions_dataset.sample()
-        self.conformer = ConformerBase(
-            atom_positions, constants.ad_smiles, constants.ad_free_tas
-        )
+        self.conformer = RDKitConformer(atom_positions, AD_SMILES, AD_FREE_TAS)
         n_dim = len(self.conformer.freely_rotatable_tas)
         super().__init__(**kwargs)
         self.sync_conformer_with_state()
@@ -62,7 +60,7 @@ def states2proxy(
         -------
         A numpy array containing all the states in the batch.
         """
-        return super().states2proxy(states).numpy()
+        return super().states2proxy(states).detach().cpu().numpy()
 
 
 if __name__ == "__main__":

diff --git a/gflownet/envs/base.py b/gflownet/envs/base.py
@@ -16,7 +16,14 @@
 from torch.distributions import Categorical
 from torchtyping import TensorType
 
-from gflownet.utils.common import copy, set_device, set_float_precision, tbool, tfloat
+from gflownet.utils.common import (
+    copy,
+    set_device,
+    set_float_precision,
+    tbool,
+    tfloat,
+    torch2np,
+)
 
 CMAP = mpl.colormaps["cividis"]
 """
@@ -48,7 +55,7 @@ def __init__(
         # Call reset() to set initial state, done, n_actions
         self.reset()
         # Device
-        self.device = set_device(device)
+        self.set_device(set_device(device))
         # Float precision
         self.float = set_float_precision(float_precision)
         # Flag to skip checking if action is valid (computing mask) before step
@@ -72,6 +79,17 @@ def __init__(
         self.policy_output_dim = len(self.fixed_policy_output)
         self.policy_input_dim = len(self.state2policy())
 
+    def set_device(self, device: torch.device):
+        """
+        Set the device of the environment.
+
+        Parameters
+        ----------
+        device : torch.device
+            The device to set the environment to.
+        """
+        self.device = device
+
     @abstractmethod
     def get_action_space(self):
         """
@@ -757,6 +775,15 @@ def traj2readable(self, traj=None):
         """
         return str(traj).replace("(", "[").replace(")", "]").replace(",", "")
 
+    def states2kde(
+        self, states: Union[List, TensorType["batch", "state_dim"]]
+    ) -> Union[List, npt.NDArray, TensorType["batch", "kde_dim"]]:
+        """
+        Converts a batch of states into a batch of states suitable for the KDE computations.
+        """
+        states_kde = self.states2proxy(states)
+        return torch2np(states_kde)
+
     def reset(self, env_id: Union[int, str] = None):
         """
         Resets the environment.
@@ -1249,6 +1276,7 @@ def top_k_metrics_and_plots(
 
         return metrics, figs, fig_names
 
+    @torch.no_grad()
     def plot_reward_distribution(
         self, states=None, scores=None, ax=None, title=None, proxy=None, **kwargs
     ):
@@ -1269,7 +1297,7 @@ def plot_reward_distribution(
             states_proxy = self.states2proxy(states)
             scores = self.proxy(states_proxy)
         if isinstance(scores, TensorType):
-            scores = scores.cpu().detach().numpy()
+            scores = scores.detach().cpu().numpy()
         ax.hist(scores)
         ax.set_title(title)
         ax.set_ylabel("Number of Samples")

diff --git a/gflownet/envs/cube.py b/gflownet/envs/cube.py
@@ -1449,7 +1449,6 @@ def fit_kde(
         bandwidth : float
             The bandwidth of the kernel.
         """
-        samples = torch2np(samples)
         return KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(samples)
 
     def plot_reward_samples(
@@ -1489,8 +1488,6 @@ def plot_reward_samples(
         """
         if self.n_dim != 2:
             return None
-        samples = torch2np(samples)
-        samples_reward = torch2np(samples_reward)
         rewards = torch2np(rewards)
         # Create mesh grid from samples_reward
         n_per_dim = int(np.sqrt(samples_reward.shape[0]))
@@ -1543,7 +1540,6 @@ def plot_kde(
         """
         if self.n_dim != 2:
             return None
-        samples = torch2np(samples)
         # Create mesh grid from samples
         n_per_dim = int(np.sqrt(samples.shape[0]))
         assert n_per_dim**2 == samples.shape[0]

diff --git a/gflownet/envs/htorus.py b/gflownet/envs/htorus.py
@@ -54,6 +54,7 @@ def __init__(
             "vonmises_mean": 0.0,
             "vonmises_concentration": 0.001,
         },
+        reward_sampling_method="rejection",
         **kwargs,
     ):
         assert n_dim > 0
@@ -74,6 +75,9 @@ def __init__(
         self.source = self.source_angles + [0]
         # End-of-sequence action: (n_dim, 0)
         self.eos = (self.n_dim, 0)
+
+        self.reward_sampling_method = reward_sampling_method
+
         # Base class init
         super().__init__(
             fixed_distr_params=fixed_distr_params,
@@ -556,7 +560,6 @@ def fit_kde(
         bandwidth : float
             The bandwidth of the kernel.
         """
-        samples = torch2np(samples)
         samples_aug = self.augment_samples(samples)
         kde = KernelDensity(kernel=kernel, bandwidth=bandwidth).fit(samples_aug)
         return kde
@@ -606,7 +609,6 @@ def plot_reward_samples(
         """
         if self.n_dim != 2:
             return None
-        samples = torch2np(samples)
         rewards = torch2np(rewards)
         n_per_dim = int(np.sqrt(rewards.shape[0]))
         assert n_per_dim**2 == rewards.shape[0]
@@ -677,7 +679,6 @@ def plot_kde(
         """
         if self.n_dim != 2:
             return None
-        samples = torch2np(samples)
         # Create mesh grid from samples
         n_per_dim = int(np.sqrt(samples.shape[0]))
         assert n_per_dim**2 == samples.shape[0]

diff --git a/gflownet/envs/tetris.py b/gflownet/envs/tetris.py
@@ -317,7 +317,7 @@ def state2readable(self, state: Optional[TensorType["height", "width"]] = None):
         if isinstance(state, tuple):
             readable = str(np.stack(state))
         else:
-            readable = str(state.cpu().numpy())
+            readable = str(state.detach().cpu().numpy())
         readable = readable.replace("[[", "[").replace("]]", "]").replace("\n ", "\n")
         return readable
 
@@ -581,7 +581,7 @@ def _plot_board(board, ax: Axes, cellsize: int = 20, linewidth: int = 2):
         linewidth : int
             The width of the separation between cells, in pixels.
         """
-        board = board.clone().numpy()
+        board = board.detach().clone().numpy()
         height = board.shape[0] * cellsize
         width = board.shape[1] * cellsize
         board_img = 128 * np.ones(

diff --git a/gflownet/envs/tree.py b/gflownet/envs/tree.py
@@ -882,7 +882,7 @@ def state2readable(self, state=None):
         """
         if state is None:
             state = self.state.clone().detach()
-        state = state.cpu().numpy()
+        state = state.detach().cpu().numpy()
         readable = ""
         for idx in range(self.n_nodes):
             attributes = self._attributes_to_readable(state[idx])

diff --git a/gflownet/evaluator/base.py b/gflownet/evaluator/base.py
@@ -34,8 +34,7 @@ class methods to instantiate an evaluator.
 
 
 class BaseEvaluator(AbstractEvaluator):
-
-    def __init__(self, gfn_agent=None, **config):
+    def __init__(self, gfn_agent=None, reward_sampling_method="rejection", **config):
         """
         Base evaluator class for GFlowNetAgent.
 
@@ -56,6 +55,7 @@ def __init__(self, gfn_agent=None, **config):
         details about other methods and attributes, including the
         :meth:`~gflownet.evaluator.abstract.AbstractEvaluator.__init__`.
         """
+        self.reward_sampling_method = reward_sampling_method
         super().__init__(gfn_agent, **config)
 
     def define_new_metrics(self):
@@ -223,6 +223,7 @@ def eval_top_k(self, it, gfn_states=None, random_states=None):
             "summary": summary,
         }
 
+    @torch.no_grad()
     def compute_log_prob_metrics(self, x_tt, metrics=None):
         """
         Compute log-probability metrics for the given test data.
@@ -275,10 +276,12 @@ def compute_log_prob_metrics(self, x_tt, metrics=None):
 
         if "reward_batch" in reqs:
             rewards_x_tt = self.gfn.proxy.rewards(self.gfn.env.states2proxy(x_tt))
+            if torch.is_tensor(rewards_x_tt):
+                rewards_x_tt = rewards_x_tt.detach().cpu().numpy()
 
             if "corr_prob_traj_rewards" in metrics:
                 lp_metrics["corr_prob_traj_rewards"] = np.corrcoef(
-                    np.exp(logprobs_x_tt.cpu().numpy()), rewards_x_tt
+                    np.exp(logprobs_x_tt.detach().cpu().numpy()), rewards_x_tt
                 )[0, 1]
 
             if "var_logrewards_logp" in metrics:
@@ -304,6 +307,7 @@ def compute_log_prob_metrics(self, x_tt, metrics=None):
             "metrics": lp_metrics,
         }
 
+    @torch.no_grad()
     def compute_density_metrics(self, x_tt, dict_tt, metrics=None):
         """
         Compute density metrics for the given test data.
@@ -371,9 +375,9 @@ def compute_density_metrics(self, x_tt, dict_tt, metrics=None):
         elif self.gfn.continuous and hasattr(self.gfn.env, "fit_kde"):
             batch, _ = self.gfn.sample_batch(n_forward=self.config.n, train=False)
             assert batch.is_valid()
-            x_sampled = batch.get_terminating_states(proxy=True)
+            x_sampled = self.gfn.env.states2kde(batch.get_terminating_states())
             # TODO make it work with conditional env
-            x_tt = torch2np(self.gfn.env.states2proxy(x_tt))
+            x_tt = self.gfn.env.states2kde(x_tt)
             kde_pred = self.gfn.env.fit_kde(
                 x_sampled,
                 kernel=self.config.kde.kernel,
@@ -384,8 +388,10 @@ def compute_density_metrics(self, x_tt, dict_tt, metrics=None):
                 kde_true = dict_tt["kde_true"]
             else:
                 # Sample from reward via rejection sampling
-                x_from_reward = self.gfn.env.states2proxy(
-                    self.gfn.sample_from_reward(n_samples=self.config.n)
+                x_from_reward = self.gfn.env.states2kde(
+                    self.gfn.sample_from_reward(
+                        n_samples=self.config.n, method=self.reward_sampling_method
+                    )
                 )
                 # Fit KDE with samples from reward
                 kde_true = self.gfn.env.fit_kde(
@@ -444,6 +450,7 @@ def compute_density_metrics(self, x_tt, dict_tt, metrics=None):
             "data": density_data,
         }
 
+    @torch.no_grad()
     def eval(self, metrics=None, **plot_kwargs):
         """
         Evaluate the GFlowNetAgent and compute metrics and plots.
@@ -560,9 +567,11 @@ def plot(self, x_sampled, kde_pred, kde_true, plot_kwargs, **kwargs):
         fig_kde_pred = fig_kde_true = fig_reward_samples = fig_samples_topk = None
 
         if hasattr(self.gfn.env, "plot_reward_samples") and x_sampled is not None:
-            (sample_space_batch, rewards_sample_space) = (
-                self.gfn.get_sample_space_and_reward()
-            )
+            (
+                sample_space_batch,
+                rewards_sample_space,
+            ) = self.gfn.get_sample_space_and_reward(return_states_proxy=False)
+            sample_space_batch = self.gfn.env.states2kde(sample_space_batch)
             fig_reward_samples = self.gfn.env.plot_reward_samples(
                 x_sampled,
                 sample_space_batch,
@@ -571,7 +580,8 @@ def plot(self, x_sampled, kde_pred, kde_true, plot_kwargs, **kwargs):
             )
 
         if hasattr(self.gfn.env, "plot_kde"):
-            sample_space_batch, _ = self.gfn.get_sample_space_and_reward()
+            sample_space_batch = self.gfn.get_sample_space()
+            sample_space_batch = self.gfn.env.states2kde(sample_space_batch)
             if kde_pred is not None:
                 fig_kde_pred = self.gfn.env.plot_kde(
                     sample_space_batch, kde_pred, **plot_kwargs