diff --git a/sbibm/algorithms/sbi/mcabc.py b/sbibm/algorithms/sbi/mcabc.py index d019ba29..a8b15156 100644 --- a/sbibm/algorithms/sbi/mcabc.py +++ b/sbibm/algorithms/sbi/mcabc.py @@ -1,32 +1,16 @@ -from typing import Optional, Tuple +import pickle +from typing import Optional, Tuple, Union import torch from sbi.inference import MCABC +from sbi.utils import KDEWrapper +from torch import Tensor import sbibm from sbibm.tasks.task import Task from sbibm.utils.io import save_tensor_to_csv - -def run( - task: Task, - num_samples: int, - num_simulations: int, - num_observation: Optional[int] = None, - observation: Optional[torch.Tensor] = None, - num_top_samples: Optional[int] = 100, - quantile: Optional[float] = None, - eps: Optional[float] = None, - distance: str = "l2", - batch_size: int = 1000, - save_distances: bool = False, - kde_bandwidth: Optional[str] = "cv", - sass: bool = False, - sass_fraction: float = 0.5, - sass_feature_expansion_degree: int = 3, - lra: bool = False, -) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: - """Runs REJ-ABC from `sbi` +__DOCSTRING__ = """Runs REJ-ABC from `sbi` Choose one of `num_top_samples`, `quantile`, `eps`. @@ -45,14 +29,44 @@ def run( kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection sass: If True, summary statistics are learned as in - Fearnhead & Prangle 2012. + Fearnhead & Prangle 2012 + https://doi.org/10.1111/j.1467-9868.2011.01010.x sass_fraction: Fraction of simulation budget to use for sass. sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with - linear regression as in Beaumont et al. 2002. + linear regression as in Beaumont et al. 2002, + https://doi.org/10.1093/genetics/162.4.2025 + + """ + + +def build_posterior( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + num_top_samples: Optional[int] = 100, + quantile: Optional[float] = None, + eps: Optional[float] = None, + distance: str = "l2", + batch_size: int = 1000, + save_distances: bool = False, + kde_bandwidth: Optional[str] = "cv", + sass: bool = False, + sass_fraction: float = 0.5, + sass_feature_expansion_degree: int = 3, + lra: bool = False, +) -> Tuple[ + Union[Tuple[Tensor, dict], Tuple[KDEWrapper, dict], Tensor, KDEWrapper], dict +]: + f""" + build_posterior method creating the inferred posterior object + {__DOCSTRING__} + Returns: - Samples from posterior, number of simulator calls, log probability of true params if computable + posterior wrapper, summary dictionary """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) @@ -60,7 +74,7 @@ def run( assert not (num_top_samples is None and quantile is None and eps is None) log = sbibm.get_logger(__name__) - log.info(f"Running REJ-ABC") + log.info(f"Building REJ-ABC posterior") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) @@ -103,6 +117,59 @@ def run( if save_distances: save_tensor_to_csv("distances.csv", summary["distances"]) + return output, summary + + +def run( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + num_top_samples: Optional[int] = 100, + quantile: Optional[float] = None, + eps: Optional[float] = None, + distance: str = "l2", + batch_size: int = 1000, + save_distances: bool = False, + kde_bandwidth: Optional[str] = "cv", + sass: bool = False, + sass_fraction: float = 0.5, + sass_feature_expansion_degree: int = 3, + lra: bool = False, + posterior_path: Optional[str] = "", +) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) + + Returns: + Samples from posterior, number of simulator calls, log probability of true params if computable + """ + assert not (num_observation is None and observation is None) + assert not (num_observation is not None and observation is not None) + assert not (num_top_samples is None and quantile is None and eps is None) + + inkwargs = {k: v for k, v in locals().items() if "posterior_path" not in k} + + log = sbibm.get_logger(__name__) + log.info(f"Running REJ-ABC") + simulator = task.get_simulator(max_calls=num_simulations) + + output, summary = build_posterior(**inkwargs) + kde = kde_bandwidth is not None + + if posterior_path: + if not kde: + log.info( + f"unable to save posterior as non was created, kde = {kde, kde_bandwidth}" + ) + elif posterior_path is not None: + log.info(f"storing posterior at {posterior_path}") + with open(posterior_path, "wb") as ofile: + pickle.dump(output, ofile) + if kde: kde_posterior = output samples = kde_posterior.sample(num_samples) diff --git a/sbibm/algorithms/sbi/sl.py b/sbibm/algorithms/sbi/sl.py index 3d89bf98..aef4d53b 100644 --- a/sbibm/algorithms/sbi/sl.py +++ b/sbibm/algorithms/sbi/sl.py @@ -1,5 +1,5 @@ import logging -import math +import pickle from typing import Any, Dict, Optional import torch @@ -68,6 +68,7 @@ def run( mcmc_method: str = "slice_np", mcmc_parameters: Dict[str, Any] = {}, diag_eps: float = 0.0, + posterior_path: Optional[str] = "", ) -> (torch.Tensor, int, Optional[torch.Tensor]): """Runs (S)NLE from `sbi` @@ -82,6 +83,8 @@ def run( mcmc_method: MCMC method mcmc_parameters: MCMC parameters diag_eps: Epsilon applied to diagonal + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) Returns: Samples from posterior, number of simulator calls, log probability of true params if computable @@ -121,6 +124,11 @@ def run( posterior = wrap_posterior(posterior, transforms) + if posterior_path: + log.info(f"storing posterior at {posterior_path}") + with open(posterior_path, "wb") as ofile: + pickle.dump(posterior, ofile) + # assert simulator.num_simulations == num_simulations samples = posterior.sample((num_samples,)).detach() diff --git a/sbibm/algorithms/sbi/smcabc.py b/sbibm/algorithms/sbi/smcabc.py index 6c804b6d..bc9515f4 100644 --- a/sbibm/algorithms/sbi/smcabc.py +++ b/sbibm/algorithms/sbi/smcabc.py @@ -1,43 +1,18 @@ -from typing import Optional, Tuple +import pickle +from typing import Optional, Tuple, Union import pandas as pd import torch from sbi.inference import SMCABC -from sklearn.linear_model import LinearRegression +from sbi.utils import KDEWrapper +from torch import Tensor import sbibm from sbibm.tasks.task import Task from .utils import clip_int - -def run( - task: Task, - num_samples: int, - num_simulations: int, - num_observation: Optional[int] = None, - observation: Optional[torch.Tensor] = None, - population_size: Optional[int] = None, - distance: str = "l2", - epsilon_decay: float = 0.2, - distance_based_decay: bool = True, - ess_min: Optional[float] = None, - initial_round_factor: int = 5, - batch_size: int = 1000, - kernel: str = "gaussian", - kernel_variance_scale: float = 0.5, - use_last_pop_samples: bool = True, - algorithm_variant: str = "C", - save_summary: bool = False, - sass: bool = False, - sass_fraction: float = 0.5, - sass_feature_expansion_degree: int = 3, - lra: bool = False, - lra_sample_weights: bool = True, - kde_bandwidth: Optional[str] = "cv", - kde_sample_weights: bool = False, -) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: - """Runs SMC-ABC from `sbi` +__DOCSTRING__ = """Runs SMC-ABC from `sbi` SMC-ABC supports two different ways of scheduling epsilon: 1) Exponential decay: eps_t+1 = epsilon_decay * eps_t @@ -75,22 +50,56 @@ def run( sass_feature_expansion_degree: Degree of polynomial expansion of the summary statistics. lra: If True, posterior samples are adjusted with - linear regression as in Beaumont et al. 2002. + linear regression as in Beaumont et al. 2002, + https://doi.org/10.1093/genetics/162.4.2025 lra_sample_weights: Whether to weigh LRA samples kde_bandwidth: If not None, will resample using KDE when necessary, set e.g. to "cv" for cross-validated bandwidth selection kde_sample_weights: Whether to weigh KDE samples + """ + + +def build_posterior( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + population_size: Optional[int] = None, + distance: str = "l2", + epsilon_decay: float = 0.2, + distance_based_decay: bool = True, + ess_min: Optional[float] = None, + initial_round_factor: int = 5, + batch_size: int = 1000, + kernel: str = "gaussian", + kernel_variance_scale: float = 0.5, + use_last_pop_samples: bool = True, + algorithm_variant: str = "C", + save_summary: bool = False, + sass: bool = False, + sass_fraction: float = 0.5, + sass_feature_expansion_degree: int = 3, + lra: bool = False, + lra_sample_weights: bool = True, + kde_bandwidth: Optional[str] = "cv", + kde_sample_weights: bool = False, +) -> Tuple[ + Union[Tuple[Tensor, dict], Tuple[KDEWrapper, dict], Tensor, KDEWrapper], dict +]: + f""" + build_posterior method creating the inferred posterior object + {__DOCSTRING__} + Returns: - Samples from posterior, number of simulator calls, log probability of true params if computable + posterior wrapper, summary dictionary """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) log = sbibm.get_logger(__name__) - smc_papers = dict(A="Toni 2010", B="Sisson et al. 2007", C="Beaumont et al. 2009") - log.info(f"Running SMC-ABC as in {smc_papers[algorithm_variant]}.") prior = task.get_prior_dist() simulator = task.get_simulator(max_calls=num_simulations) @@ -151,6 +160,66 @@ def run( assert simulator.num_simulations == num_simulations + return output, summary + + +def run( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + population_size: Optional[int] = None, + distance: str = "l2", + epsilon_decay: float = 0.2, + distance_based_decay: bool = True, + ess_min: Optional[float] = None, + initial_round_factor: int = 5, + batch_size: int = 1000, + kernel: str = "gaussian", + kernel_variance_scale: float = 0.5, + use_last_pop_samples: bool = True, + algorithm_variant: str = "C", + save_summary: bool = False, + sass: bool = False, + sass_fraction: float = 0.5, + sass_feature_expansion_degree: int = 3, + lra: bool = False, + lra_sample_weights: bool = True, + kde_bandwidth: Optional[str] = "cv", + kde_sample_weights: bool = False, + posterior_path: Optional[str] = "", +) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) + + Returns: + Samples from posterior, number of simulator calls, log probability of true params if computable + """ + assert not (num_observation is None and observation is None) + assert not (num_observation is not None and observation is not None) + + inkwargs = {k: v for k, v in locals().items() if "posterior_path" not in k} + + log = sbibm.get_logger(__name__) + smc_papers = dict(A="Toni 2010", B="Sisson et al. 2007", C="Beaumont et al. 2009") + log.info(f"Building SMC-ABC Posterior as in {smc_papers[algorithm_variant]}.") + + simulator = task.get_simulator(max_calls=num_simulations) + kde = kde_bandwidth is not None + output, summary = build_posterior(**inkwargs) + if posterior_path: + if not kde: + log.info( + f"unable to save posterior as non was created, kde = {kde, kde_bandwidth}" + ) + elif posterior_path is not None: + log.info(f"storing posterior at {posterior_path}") + with open(posterior_path, "wb") as ofile: + pickle.dump(output, ofile) + # Return samples from kde or raw samples. if kde: kde_posterior = output diff --git a/sbibm/algorithms/sbi/snle.py b/sbibm/algorithms/sbi/snle.py index 122fd872..2338579b 100644 --- a/sbibm/algorithms/sbi/snle.py +++ b/sbibm/algorithms/sbi/snle.py @@ -1,5 +1,6 @@ import logging import math +import pickle from typing import Any, Dict, Optional, Tuple import torch @@ -13,8 +14,30 @@ ) from sbibm.tasks.task import Task +__DOCSTRING__ = """ + Runs (S)NLE from `sbi` -def run( + Args: + task: Task instance + num_observation: Observation number to load, alternative to `observation` + observation: Observation, alternative to `num_observation` + num_samples: Number of samples to generate from posterior + num_simulations: Simulation budget + num_rounds: Number of rounds + neural_net: Neural network to use, one of maf / mdn / made / nsf + hidden_features: Number of hidden features in network + simulation_batch_size: Batch size for simulator + training_batch_size: Batch size for training network + automatic_transforms_enabled: Whether to enable automatic transforms + mcmc_method: MCMC method + mcmc_parameters: MCMC parameters + z_score_x: Whether to z-score x + z_score_theta: Whether to z-score theta + max_num_epochs: Maximum number of epochs +""" + + +def build_posterior( task: Task, num_samples: int, num_simulations: int, @@ -38,26 +61,11 @@ def run( z_score_x: bool = True, z_score_theta: bool = True, max_num_epochs: Optional[int] = None, -) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: - """Runs (S)NLE from `sbi` - - Args: - task: Task instance - num_observation: Observation number to load, alternative to `observation` - observation: Observation, alternative to `num_observation` - num_samples: Number of samples to generate from posterior - num_simulations: Simulation budget - num_rounds: Number of rounds - neural_net: Neural network to use, one of maf / mdn / made / nsf - hidden_features: Number of hidden features in network - simulation_batch_size: Batch size for simulator - training_batch_size: Batch size for training network - automatic_transforms_enabled: Whether to enable automatic transforms - mcmc_method: MCMC method - mcmc_parameters: MCMC parameters - z_score_x: Whether to z-score x - z_score_theta: Whether to z-score theta - max_num_epochs: Maximum number of epochs +) -> Tuple[torch.Tensor, None]: + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) Returns: Samples from posterior, number of simulator calls, log probability of true params if computable @@ -107,7 +115,9 @@ def run( posteriors = [] proposal = prior mcmc_parameters["warmup_steps"] = 25 - mcmc_parameters["enable_transform"] = False # NOTE: Disable `sbi` auto-transforms, since `sbibm` does its own + mcmc_parameters[ + "enable_transform" + ] = False # NOTE: Disable `sbi` auto-transforms, since `sbibm` does its own for r in range(num_rounds): theta, x = inference.simulate_for_sbi( @@ -141,6 +151,57 @@ def run( assert simulator.num_simulations == num_simulations + return posterior + + +def run( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + num_rounds: int = 10, + neural_net: str = "maf", + hidden_features: int = 50, + simulation_batch_size: int = 1000, + training_batch_size: int = 10000, + automatic_transforms_enabled: bool = True, + mcmc_method: str = "slice_np_vectorized", + mcmc_parameters: Dict[str, Any] = { + "num_chains": 100, + "thin": 10, + "warmup_steps": 100, + "init_strategy": "sir", + "sir_batch_size": 1000, + "sir_num_batches": 100, + }, + z_score_x: bool = True, + z_score_theta: bool = True, + max_num_epochs: Optional[int] = None, + posterior_path: Optional[str] = "", +) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) + + Returns: + Samples from posterior, number of simulator calls, log probability of true params if computable + """ + assert not (num_observation is None and observation is None) + assert not (num_observation is not None and observation is not None) + + inkwargs = {k: v for k, v in locals().items() if "posterior_path" not in k} + log = logging.getLogger(__name__) + simulator = task.get_simulator(max_calls=num_simulations) + + posterior = build_posterior(**inkwargs) + + if posterior_path: + log.info(f"storing posterior at {posterior_path}") + with open(posterior_path, "wb") as ofile: + pickle.dump(posterior, ofile) + samples = posterior.sample((num_samples,)).detach() return samples, simulator.num_simulations, None diff --git a/sbibm/algorithms/sbi/snpe.py b/sbibm/algorithms/sbi/snpe.py index 7dc42036..e20b1d1d 100644 --- a/sbibm/algorithms/sbi/snpe.py +++ b/sbibm/algorithms/sbi/snpe.py @@ -1,11 +1,13 @@ import logging import math +import pickle from typing import Optional, Tuple import torch from sbi import inference as inference from sbi.utils.get_nn_models import posterior_nn +import sbibm from sbibm.algorithms.sbi.utils import ( wrap_posterior, wrap_prior_dist, @@ -13,25 +15,7 @@ ) from sbibm.tasks.task import Task - -def run( - task: Task, - num_samples: int, - num_simulations: int, - num_observation: Optional[int] = None, - observation: Optional[torch.Tensor] = None, - num_rounds: int = 10, - neural_net: str = "nsf", - hidden_features: int = 50, - simulation_batch_size: int = 1000, - training_batch_size: int = 10000, - num_atoms: int = 10, - automatic_transforms_enabled: bool = False, - z_score_x: bool = True, - z_score_theta: bool = True, - max_num_epochs: Optional[int] = None, - ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: - """Runs (S)NPE from `sbi` +__DOCSTRING__ = """Runs (S)NPE from `sbi` Args: task: Task instance @@ -49,9 +33,32 @@ def run( z_score_x: Whether to z-score x z_score_theta: Whether to z-score theta max_num_epochs: Maximum number of epochs + """ + + +def build_posterior( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + num_rounds: int = 10, + neural_net: str = "nsf", + hidden_features: int = 50, + simulation_batch_size: int = 1000, + training_batch_size: int = 10000, + num_atoms: int = 10, + automatic_transforms_enabled: bool = False, + z_score_x: bool = True, + z_score_theta: bool = True, + max_num_epochs: Optional[int] = None, +) -> Tuple[torch.Tensor, None]: + f""" + build_posterior method creating the inferred posterior object + {__DOCSTRING__} Returns: - Samples from posterior, number of simulator calls, log probability of true params if computable + Trained posterior """ assert not (num_observation is None and observation is None) assert not (num_observation is not None and observation is not None) @@ -125,7 +132,49 @@ def run( assert simulator.num_simulations == num_simulations + return posterior, None + + +def run( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + num_rounds: int = 10, + neural_net: str = "nsf", + hidden_features: int = 50, + simulation_batch_size: int = 1000, + training_batch_size: int = 10000, + num_atoms: int = 10, + automatic_transforms_enabled: bool = False, + z_score_x: bool = True, + z_score_theta: bool = True, + max_num_epochs: Optional[int] = None, + posterior_path: Optional[str] = "", +) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) + + Returns: + Samples from posterior, number of simulator calls, log probability of true params if computable + """ + assert not (num_observation is None and observation is None) + assert not (num_observation is not None and observation is not None) + inkwargs = {k: v for k, v in locals().items() if "posterior_path" not in k} + + simulator = task.get_simulator(max_calls=num_simulations) + log = sbibm.get_logger(__name__) + + posterior, _ = build_posterior(**inkwargs) + samples = posterior.sample((num_samples,)).detach() + if posterior_path: + log.info(f"storing posterior at {posterior_path}") + with open(posterior_path, "wb") as ofile: + pickle.dump(posterior, ofile) if num_observation is not None: true_parameters = task.get_true_parameters(num_observation=num_observation) diff --git a/sbibm/algorithms/sbi/snre.py b/sbibm/algorithms/sbi/snre.py index adfdfa6b..65215b93 100644 --- a/sbibm/algorithms/sbi/snre.py +++ b/sbibm/algorithms/sbi/snre.py @@ -1,11 +1,13 @@ import logging import math +import pickle from typing import Any, Dict, Optional, Tuple import torch from sbi import inference as inference from sbi.utils.get_nn_models import classifier_nn +import sbibm from sbibm.algorithms.sbi.utils import ( wrap_posterior, wrap_prior_dist, @@ -13,8 +15,32 @@ ) from sbibm.tasks.task import Task +__DOCSTRING__ = """ + Runs (S)NRE from `sbi` -def run( + Args: + task: Task instance + num_samples: Number of samples to generate from posterior + num_observation: Observation number to load, alternative to `observation` + observation: Observation, alternative to `num_observation` + num_simulations: Simulation budget + num_rounds: Number of rounds + neural_net: Neural network to use, one of linear / mlp / resnet + hidden_features: Number of hidden features in network + simulation_batch_size: Batch size for simulator + training_batch_size: Batch size for training network + num_atoms: Number of atoms, -1 means same as `training_batch_size` + automatic_transforms_enabled: Whether to enable automatic transforms + mcmc_method: MCMC method + mcmc_parameters: MCMC parameters + z_score_x: Whether to z-score x + z_score_theta: Whether to z-score theta + variant: Can be used to switch between SNRE-A (AALR) and -B (SRE) + max_num_epochs: Maximum number of epochs +""" + + +def build_posterior( task: Task, num_samples: int, num_simulations: int, @@ -40,28 +66,12 @@ def run( z_score_theta: bool = True, variant: str = "B", max_num_epochs: Optional[int] = None, + posterior_path: Optional[str] = "", ) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: - """Runs (S)NRE from `sbi` - - Args: - task: Task instance - num_samples: Number of samples to generate from posterior - num_observation: Observation number to load, alternative to `observation` - observation: Observation, alternative to `num_observation` - num_simulations: Simulation budget - num_rounds: Number of rounds - neural_net: Neural network to use, one of linear / mlp / resnet - hidden_features: Number of hidden features in network - simulation_batch_size: Batch size for simulator - training_batch_size: Batch size for training network - num_atoms: Number of atoms, -1 means same as `training_batch_size` - automatic_transforms_enabled: Whether to enable automatic transforms - mcmc_method: MCMC method - mcmc_parameters: MCMC parameters - z_score_x: Whether to z-score x - z_score_theta: Whether to z-score theta - variant: Can be used to switch between SNRE-A (AALR) and -B (SRE) - max_num_epochs: Maximum number of epochs + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) Returns: Samples from posterior, number of simulator calls, log probability of true params if computable @@ -117,7 +127,9 @@ def run( posteriors = [] proposal = prior mcmc_parameters["warmup_steps"] = 25 - mcmc_parameters["enable_transform"] = False # NOTE: Disable `sbi` auto-transforms, since `sbibm` does its own + mcmc_parameters[ + "enable_transform" + ] = False # NOTE: Disable `sbi` auto-transforms, since `sbibm` does its own for r in range(num_rounds): theta, x = inference.simulate_for_sbi( @@ -152,6 +164,59 @@ def run( assert simulator.num_simulations == num_simulations + return posterior, None + + +def run( + task: Task, + num_samples: int, + num_simulations: int, + num_observation: Optional[int] = None, + observation: Optional[torch.Tensor] = None, + num_rounds: int = 10, + neural_net: str = "resnet", + hidden_features: int = 50, + simulation_batch_size: int = 1000, + training_batch_size: int = 10000, + num_atoms: int = 10, + automatic_transforms_enabled: bool = True, + mcmc_method: str = "slice_np_vectorized", + mcmc_parameters: Dict[str, Any] = { + "num_chains": 100, + "thin": 10, + "warmup_steps": 100, + "init_strategy": "sir", + "sir_batch_size": 1000, + "sir_num_batches": 100, + }, + z_score_x: bool = True, + z_score_theta: bool = True, + variant: str = "B", + max_num_epochs: Optional[int] = None, + posterior_path: Optional[str] = "", +) -> Tuple[torch.Tensor, int, Optional[torch.Tensor]]: + f""" + {__DOCSTRING__} + posterior_path: filesystem location where to store the posterior under + (if None, posterior is not saved) + + Returns: + Samples from posterior, number of simulator calls, log probability of true params if computable + """ + assert not (num_observation is None and observation is None) + assert not (num_observation is not None and observation is not None) + inkwargs = {k: v for k, v in locals().items() if "posterior_path" not in k} + + simulator = task.get_simulator(max_calls=num_simulations) + log = sbibm.get_logger(__name__) + + posterior, _ = build_posterior(**inkwargs) + + if posterior_path: + log.info(f"storing posterior at {posterior_path}") + with open(posterior_path, "wb") as ofile: + pickle.dump(posterior, ofile) + samples = posterior.sample((num_samples,)).detach() return samples, simulator.num_simulations, None diff --git a/tests/algorithms/sbi/test_abc_run_methods.py b/tests/algorithms/sbi/test_abc_run_methods.py index a0dc861c..835e0cb6 100644 --- a/tests/algorithms/sbi/test_abc_run_methods.py +++ b/tests/algorithms/sbi/test_abc_run_methods.py @@ -1,12 +1,55 @@ +import pickle +import tempfile +from pathlib import Path + import pytest import torch import sbibm +from sbibm.algorithms.sbi.mcabc import build_posterior as build_posterior_mcabc from sbibm.algorithms.sbi.mcabc import run as run_mcabc +from sbibm.algorithms.sbi.smcabc import build_posterior as build_posterior_smcabc from sbibm.algorithms.sbi.smcabc import run as run_smcabc from sbibm.metrics.c2st import c2st +@pytest.mark.parametrize("task_name", ("gaussian_linear", "two_moons")) +@pytest.mark.parametrize( + "build_method", (build_posterior_mcabc, build_posterior_smcabc) +) +def test_build_posterior_interface( + task_name, + build_method, + num_simulations=100, + num_samples=5, +): + task = sbibm.get_task(task_name) + nobs = 3 + post, summary = build_method( + task=task, + num_simulations=num_simulations, + num_observation=nobs, + num_samples=num_samples, + ) + + assert len(summary) != 0 + assert len(list(summary.keys())) > 0 + + assert hasattr(post, "sample") + assert hasattr(post, "log_prob") + + tp_exp = task.get_true_parameters(num_observation=nobs) + tp_obs = task.get_true_parameters(num_observation=nobs + 1) + + assert not torch.allclose(tp_exp, tp_obs) + + logprob_exp = post.log_prob(tp_exp) + logprob_obs = post.log_prob(tp_obs) + + assert logprob_exp.shape == logprob_obs.shape + assert not torch.allclose(logprob_exp, logprob_obs) + + @pytest.mark.parametrize("task_name", ("gaussian_linear", "two_moons")) @pytest.mark.parametrize("run_method", (run_mcabc, run_smcabc)) def test_run_posterior_interface( @@ -26,3 +69,43 @@ def test_run_posterior_interface( # we are not interested in testing for correctness assert samples.shape == torch.Size([num_samples, task.dim_parameters]) + + +@pytest.mark.parametrize("task_name", ("gaussian_linear",)) +@pytest.mark.parametrize("run_method", (run_mcabc, run_smcabc)) +def test_check_stored_posterior( + task_name, + run_method, + num_simulations=100, + num_samples=50, +): + task = sbibm.get_task(task_name) + + th, tfile_ = tempfile.mkstemp(".pkl") + + tfile = Path(tfile_) + nobs = 3 + + samples, _, _ = run_method( + task=task, + num_simulations=num_simulations, + num_observation=nobs, + num_samples=num_samples, + posterior_path=tfile_, + ) + + # we are not interested in testing for correctness + assert samples.shape == torch.Size([num_samples, task.dim_parameters]) + assert tfile.exists() + assert tfile.stat().st_size > 0 + + # reload and pickle the KDEWrapper + with tfile.open("rb") as rfile: + kdewrapped = pickle.load(rfile) + obs = kdewrapped.sample(samples.shape[0]) + assert obs.shape == samples.shape + + assert not torch.allclose(samples.mean(axis=0), obs.mean(axis=0)) + + # clean up after ourselves + tfile.unlink() diff --git a/tests/algorithms/sbi/test_snle_posterior.py b/tests/algorithms/sbi/test_snle_posterior.py index fe262d79..b3cd8f01 100644 --- a/tests/algorithms/sbi/test_snle_posterior.py +++ b/tests/algorithms/sbi/test_snle_posterior.py @@ -1,5 +1,8 @@ +import pickle +import tempfile +from pathlib import Path + import pytest -import torch import sbibm from sbibm.algorithms.sbi.snle import run as run_posterior @@ -43,5 +46,53 @@ def test_nle_posterior( acc = c2st(predicted, expected) assert acc > 0.5 - assert acc < 1.0 + assert acc <= 1.0 + assert acc > 0.6 + + +@pytest.mark.parametrize("task_name", ("gaussian_linear",)) +def test_stored_nle_posterior( + task_name, num_observation=3, num_simulations=2_000, num_samples=100 +): + task = sbibm.get_task(task_name) + th, tfile_ = tempfile.mkstemp(".pkl") + tfile = Path(tfile_) + + predicted, _, _ = run_posterior( + task=task, + num_observation=num_observation, + num_simulations=num_simulations, + num_samples=num_samples, + num_rounds=1, + max_num_epochs=30, + posterior_path=tfile_, + ) + + reference_samples = task.get_reference_posterior_samples( + num_observation=num_observation + ) + + expected = reference_samples[:num_samples, :] + + assert expected.shape == predicted.shape + + acc = c2st(predicted, expected) + + assert acc > 0.5 + assert acc <= 1.0 assert acc > 0.6 + + assert tfile.exists() + assert tfile.stat().st_size > 0 + + # reload and pickle the KDEWrapper + with tfile.open("rb") as rfile: + rposterior = pickle.load(rfile) + obs = rposterior.sample((num_samples,)) + assert obs.shape == expected.shape + acc = c2st(obs, expected) + assert acc > 0.8 + print(f"reloaded posterior samples versus expected samples, c2st score = {acc}") + + # clean up after ourselves + tfile.unlink() diff --git a/tests/algorithms/sbi/test_snpe_posterior.py b/tests/algorithms/sbi/test_snpe_posterior.py index eda7f3ea..77346a97 100644 --- a/tests/algorithms/sbi/test_snpe_posterior.py +++ b/tests/algorithms/sbi/test_snpe_posterior.py @@ -1,11 +1,58 @@ +import pickle +import tempfile +from pathlib import Path + import pytest import torch import sbibm +from sbibm.algorithms.sbi.snpe import build_posterior from sbibm.algorithms.sbi.snpe import run as run_posterior from sbibm.metrics.c2st import c2st +# a fast test +@pytest.mark.parametrize( + "task_name,num_observation", + [ + (task_name, num_observation) + for task_name in [ + "gaussian_linear", + "gaussian_linear_uniform", + ] + for num_observation in [1, 3] + ], +) +def test_build_posterior_interface( + task_name, num_observation, num_simulations=2_000, num_samples=100 +): + task = sbibm.get_task(task_name) + + post, summary = build_posterior( + task=task, + num_observation=num_observation, + num_simulations=num_simulations, + num_samples=num_samples, + num_rounds=1, + neural_net="mdn", + max_num_epochs=30, + ) + + assert hasattr(post, "sample") + assert hasattr(post, "log_prob") + + tp_exp = task.get_true_parameters(num_observation=num_observation) + tp_obs = task.get_true_parameters(num_observation=num_observation + 1) + + assert not torch.allclose(tp_exp, tp_obs) + + logprob_exp = post.log_prob(tp_exp) + logprob_obs = post.log_prob(tp_obs) + + assert logprob_exp.shape == logprob_obs.shape + assert not torch.allclose(logprob_exp, logprob_obs) + + # a fast test @pytest.mark.parametrize( "task_name,num_observation", @@ -44,5 +91,53 @@ def test_npe_posterior( acc = c2st(predicted, expected) assert acc > 0.5 - assert acc < 1.0 + assert acc <= 1.0 + assert acc > 0.6 + + +@pytest.mark.parametrize("task_name", ("gaussian_linear",)) +def test_stored_nle_posterior( + task_name, num_observation=3, num_simulations=2_000, num_samples=100 +): + task = sbibm.get_task(task_name) + th, tfile_ = tempfile.mkstemp(".pkl") + tfile = Path(tfile_) + + predicted, _, _ = run_posterior( + task=task, + num_observation=num_observation, + num_simulations=num_simulations, + num_samples=num_samples, + num_rounds=1, + max_num_epochs=30, + posterior_path=tfile_, + ) + + reference_samples = task.get_reference_posterior_samples( + num_observation=num_observation + ) + + expected = reference_samples[:num_samples, :] + + assert expected.shape == predicted.shape + + acc = c2st(predicted, expected) + + assert acc > 0.5 + assert acc <= 1.0 assert acc > 0.6 + + assert tfile.exists() + assert tfile.stat().st_size > 0 + + # reload and pickle the KDEWrapper + with tfile.open("rb") as rfile: + rposterior = pickle.load(rfile) + obs = rposterior.sample((num_samples,)) + assert obs.shape == expected.shape + acc = c2st(obs, expected) + assert acc > 0.8 + print(f"reloaded posterior samples versus expected samples, c2st score = {acc}") + + # clean up after ourselves + tfile.unlink() diff --git a/tests/algorithms/sbi/test_snre_posterior.py b/tests/algorithms/sbi/test_snre_posterior.py index 44f03c81..c6e2c92e 100644 --- a/tests/algorithms/sbi/test_snre_posterior.py +++ b/tests/algorithms/sbi/test_snre_posterior.py @@ -1,5 +1,8 @@ +import pickle +import tempfile +from pathlib import Path + import pytest -import torch import sbibm from sbibm.algorithms.sbi.snre import run as run_posterior @@ -43,5 +46,53 @@ def test_nre_posterior( acc = c2st(predicted, expected) assert acc > 0.5 - assert acc < 1.0 + assert acc <= 1.0 + assert acc > 0.6 + + +@pytest.mark.parametrize("task_name", ("gaussian_linear",)) +def test_stored_nre_posterior( + task_name, num_observation=4, num_simulations=2_000, num_samples=100 +): + task = sbibm.get_task(task_name) + th, tfile_ = tempfile.mkstemp(".pkl") + tfile = Path(tfile_) + + predicted, _, _ = run_posterior( + task=task, + num_observation=num_observation, + num_simulations=num_simulations, + num_samples=num_samples, + num_rounds=1, + max_num_epochs=30, + posterior_path=tfile_, + ) + + reference_samples = task.get_reference_posterior_samples( + num_observation=num_observation + ) + + expected = reference_samples[:num_samples, :] + + assert expected.shape == predicted.shape + + acc = c2st(predicted, expected) + + assert acc > 0.5 + assert acc <= 1.0 assert acc > 0.6 + + assert tfile.exists() + assert tfile.stat().st_size > 0 + + # reload and pickle the KDEWrapper + with tfile.open("rb") as rfile: + rposterior = pickle.load(rfile) + obs = rposterior.sample((num_samples,)) + assert obs.shape == expected.shape + acc = c2st(obs, expected) + assert acc > 0.8 + print(f"reloaded posterior samples versus expected samples, c2st score = {acc}") + + # clean up after ourselves + tfile.unlink()