diff --git a/sbi/inference/posteriors/base_posterior.py b/sbi/inference/posteriors/base_posterior.py index d85b560f8..97ef95087 100644 --- a/sbi/inference/posteriors/base_posterior.py +++ b/sbi/inference/posteriors/base_posterior.py @@ -29,6 +29,7 @@ check_dist_class, check_warn_and_setstate, optimize_potential_fn, + rejection_sample, ) from sbi.utils.torchutils import ( BoxUniform, @@ -66,7 +67,7 @@ def __init__( neural_net: A classifier for SNRE, a density estimator for SNPE and SNL. prior: Prior distribution with `.log_prob()` and `.sample()`. x_shape: Shape of the simulator data. - sample_with: Method to use for sampling from the posterior. Must be in + sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy @@ -80,13 +81,15 @@ def __init__( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling. Init strategies may have their own keywords which can also be set from `mcmc_parameters`. - rejection_sampling_parameters: Dictionary overriding the default parameters for - rejection sampling. The following parameters are supported: - `proposal`, as the proposal distribtution. `num_samples_to_find_max` - as the number of samples that are used to find the maximum of the - `potential_fn / proposal` ratio. `m` as multiplier to that ratio. - `sampling_batch_size` as the batchsize of samples being drawn from - the proposal at every iteration. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution. + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. device: Training device, e.g., cpu or cuda. """ if method_family in ("snpe", "snle", "snre_a", "snre_b"): @@ -152,7 +155,7 @@ def set_default_x(self, x: Tensor) -> "NeuralPosterior": return self @property - def sample_with(self) -> bool: + def sample_with(self) -> str: """ Return `True` if NeuralPosterior instance should use MCMC in `.sample()`. """ @@ -164,7 +167,7 @@ def sample_with(self, value: str) -> None: self.set_sample_with(value) def set_sample_with(self, sample_with: str) -> "NeuralPosterior": - """Turns MCMC sampling on or off and returns `NeuralPosterior`. + """Set the sampling method for the `NeuralPosterior`. Args: sample_with: The method to sample with. @@ -176,6 +179,10 @@ def set_sample_with(self, sample_with: str) -> "NeuralPosterior": ValueError: on attempt to turn off MCMC sampling for family of methods that do not support rejection sampling. """ + if sample_with not in ("mcmc", "rejection"): + raise NameError( + "The only implemented sampling methods are `mcmc` and `rejection`." + ) self._sample_with = sample_with return self @@ -235,7 +242,7 @@ def set_mcmc_parameters(self, parameters: Dict[str, Any]) -> "NeuralPosterior": @property def rejection_sampling_parameters(self) -> dict: - """Returns rejection sampling parameter.""" + """Returns rejection sampling parameters.""" if self._rejection_sampling_parameters is None: return {} else: @@ -254,9 +261,11 @@ def set_rejection_sampling_parameters( Args: parameters: Dictonary overriding the default parameters for rejection sampling. The following parameters are supported: - `max_sampling_batch_size` to the set the batch size for drawing new - samples from the candidate distribution, e.g., the posterior. Larger - batch size speeds up sampling. + `proposal` as the proposal distribtution. `num_samples_to_find_max` + as the number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `m` as multiplier to that ratio. + `sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. Returns: `NeuralPosterior for chainable calls. @@ -357,7 +366,7 @@ def _prepare_for_sample( sample_shape: Optional[Tensor], ) -> Tuple[Tensor, int]: r""" - Return checked and correctly shaped values for `x` and `sample_shape`. + Return checked, reshaped, potentially default values for `x` and `sample_shape`. Args: sample_shape: Desired shape of samples that are drawn from posterior. If @@ -367,8 +376,8 @@ def _prepare_for_sample( fall back onto `x_o` if previously provided for multiround training, or to a set default (see `set_default_x()` method). - Returns: Single (potentially default) $x$ with batch dimension; an integer - number of samples + Returns: Single (default) $x$ with batch dimension; an integer number of + samples. """ x = atleast_2d_float32_tensor(self._x_else_default_x(x)) @@ -397,7 +406,7 @@ def _potentially_replace_mcmc_parameters( Importance-Resampling using `init_strategy_num_candidates` to find init locations. - Returns: A (potentially default) mcmc method and (potentially + Returns: A (default) mcmc method and (potentially default) mcmc parameters. """ mcmc_method = mcmc_method if mcmc_method is not None else self.mcmc_method @@ -413,11 +422,13 @@ def _potentially_replace_rejection_parameters( Return potentially default values to rejection sample the posterior. Args: - rejection_sampling_parameters: Dictionary overriding the default - parameters for rejection sampling. The following parameters are - supported: `m` as multiplier to the maximum ratio between - potential function and the proposal. `proposal`, as the proposal - distribtution. + rejection_sampling_parameters: Dictionary overriding the default + parameters for rejection sampling. The following parameters are + supported: `proposal` as the proposal distribtution. + `num_samples_to_find_max` as the number of samples that are used to + find the maximum of the `potential_fn / proposal` ratio. `m` as + multiplier to that ratio. `sampling_batch_size` as the batchsize of + samples being drawn from the proposal at every iteration. Returns: Potentially default rejection sampling parameters. """ @@ -624,9 +635,11 @@ def sample_conditional( condition: Tensor, dims_to_sample: List[int], x: Optional[Tensor] = None, + sample_with: str = "mcmc", show_progress_bars: bool = True, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, ) -> Tensor: r""" Return samples from conditional posterior $p(\theta_i|\theta_j, x)$. @@ -635,7 +648,7 @@ def sample_conditional( from a few parameter dimensions while the other parameter dimensions are kept fixed at values specified in `condition`. - Samples are obtained with MCMC. + Samples are obtained with MCMC or rejection sampling. Args: potential_fn_provider: Returns the potential function for the unconditional @@ -653,6 +666,9 @@ def sample_conditional( x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x_o` if previously provided for multiround training, or to a set default (see `set_default_x()` method). + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. In this method, the value of + `self.sample_with` will be ignored. show_progress_bars: Whether to show sampling progress monitor. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. @@ -663,38 +679,71 @@ def sample_conditional( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Samples from conditional posterior. """ - x, num_samples, mcmc_method, mcmc_parameters = self._prepare_for_sample( - x, sample_shape, mcmc_method, mcmc_parameters - ) - self.net.eval() + x, num_samples = self._prepare_for_sample(x, sample_shape) + cond_potential_fn_provider = ConditionalPotentialFunctionProvider( potential_fn_provider, condition, dims_to_sample ) - samples = self._sample_posterior_mcmc( - num_samples=num_samples, - potential_fn=cond_potential_fn_provider( - self._prior, self.net, x, mcmc_method - ), - init_fn=self._build_mcmc_init_fn( - # Restrict prior to sample only free dimensions. - RestrictedPriorForConditional(self._prior, dims_to_sample), - cond_potential_fn_provider(self._prior, self.net, x, "slice_np"), + if sample_with == "mcmc": + mcmc_method, mcmc_parameters = self._potentially_replace_mcmc_parameters( + mcmc_method, mcmc_parameters + ) + samples = self._sample_posterior_mcmc( + num_samples=num_samples, + potential_fn=cond_potential_fn_provider( + self._prior, self.net, x, mcmc_method + ), + init_fn=self._build_mcmc_init_fn( + # Restrict prior to sample only free dimensions. + RestrictedPriorForConditional(self._prior, dims_to_sample), + cond_potential_fn_provider(self._prior, self.net, x, "slice_np"), + **mcmc_parameters, + ), + mcmc_method=mcmc_method, + condition=condition, + dims_to_sample=dims_to_sample, + show_progress_bars=show_progress_bars, **mcmc_parameters, - ), - mcmc_method=mcmc_method, - condition=condition, - dims_to_sample=dims_to_sample, - show_progress_bars=show_progress_bars, - **mcmc_parameters, - ) + ) + elif sample_with == "rejection": + rejection_sampling_parameters = ( + self._potentially_replace_rejection_parameters( + rejection_sampling_parameters + ) + ) + if "proposal" not in rejection_sampling_parameters: + rejection_sampling_parameters[ + "proposal" + ] = RestrictedPriorForConditional(self._prior, dims_to_sample) + + samples, _ = rejection_sample( + potential_fn=cond_potential_fn_provider( + self._prior, self.net, x, "rejection" + ), + num_samples=num_samples, + **rejection_sampling_parameters, + ) + else: + raise NameError( + "The only implemented sampling methods are `mcmc` and `rejection`." + ) self.net.train(True) @@ -1071,18 +1120,43 @@ def __init__( self.condition = ensure_theta_batched(condition) self.dims_to_sample = dims_to_sample - def __call__(self, prior, net: nn.Module, x: Tensor, mcmc_method: str) -> Callable: + def __call__(self, prior, net: nn.Module, x: Tensor, method: str) -> Callable: """Return potential function. - Switch on numpy or pyro potential function based on `mcmc_method`. + Switch on numpy or pyro potential function based on `method`. """ # Set prior, net, and x as attributes of unconditional potential_fn_provider. - _ = self.potential_fn_provider.__call__(prior, net, x, mcmc_method) + _ = self.potential_fn_provider.__call__(prior, net, x, method) - if mcmc_method in ("slice", "hmc", "nuts"): + if method in ("slice", "hmc", "nuts"): return self.pyro_potential - else: + elif "slice_np" in method: return self.np_potential + elif method == "rejection": + return self.rejection_potential + else: + NotImplementedError + + def rejection_potential(self, theta: np.ndarray) -> ScalarFloat: + r""" + Return conditional posterior log-probability or $-\infty$ if outside prior. + + The only differences to the `np_potential` is that it tracks the gradients and + does not return a `numpy` array. + + Args: + theta: Free parameters $\theta_i$, batch dimension 1. + + Returns: + Conditional posterior log-probability $\log(p(\theta_i|\theta_j, x))$, + masked outside of prior. + """ + theta = torch.as_tensor(theta, dtype=torch.float32) + + theta_condition = deepcopy(self.condition) + theta_condition[:, self.dims_to_sample] = theta + + return self.potential_fn_provider.rejection_potential(theta_condition) def np_potential(self, theta: np.ndarray) -> ScalarFloat: r""" diff --git a/sbi/inference/posteriors/direct_posterior.py b/sbi/inference/posteriors/direct_posterior.py index 642d61b5a..932659abf 100644 --- a/sbi/inference/posteriors/direct_posterior.py +++ b/sbi/inference/posteriors/direct_posterior.py @@ -12,7 +12,7 @@ from sbi import utils as utils from sbi.inference.posteriors.base_posterior import NeuralPosterior from sbi.types import ScalarFloat, Shape -from sbi.utils import del_entries, within_support +from sbi.utils import del_entries, rejection_sample, within_support from sbi.utils.torchutils import ( atleast_2d, batched_first_of_batch, @@ -42,10 +42,10 @@ def __init__( neural_net: nn.Module, prior, x_shape: torch.Size, - rejection_sampling_parameters: Optional[Dict[str, Any]] = None, - sample_with_mcmc: bool = True, + sample_with: str = "rejection", mcmc_method: str = "slice_np", mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, device: str = "cpu", ): """ @@ -54,14 +54,10 @@ def __init__( neural_net: A classifier for SNRE, a density estimator for SNPE and SNL. prior: Prior distribution with `.log_prob()` and `.sample()`. x_shape: Shape of a single simulator output. - rejection_sampling_parameters: Dictonary overriding the default parameters - for rejection sampling. The following parameters are supported: - `max_sampling_batch_size` to set the batch size for drawing new - samples from the candidate distribution, e.g., the posterior. Larger - batch size speeds up sampling. - sample_with_mcmc: Whether to sample with MCMC. Will always be `True` for SRE - and SNL, but can also be set to `True` for SNPE if MCMC is preferred to - deal with leakage over rejection sampling. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. With default parameters, `rejection` samples + from the posterior estimated by the neural net and rejects only if the + samples are outside of the prior support. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for @@ -74,45 +70,51 @@ def __init__( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the trained + neural net). `max_sampling_batch_size` as the batchsize of samples + being drawn from the proposal at every iteration. + `num_samples_to_find_max` as the number of samples that are used to + find the maximum of the `potential_fn / proposal` ratio. + `num_iter_to_find_max` as the number of gradient ascent iterations to + find the maximum of that ratio. `m` as multiplier to that ratio. device: Training device, e.g., cpu or cuda:0 """ kwargs = del_entries( locals(), - entries=( - "self", - "__class__", - "sample_with_mcmc", - "rejection_sampling_parameters", - ), + entries=("self", "__class__"), ) super().__init__(**kwargs) - self.set_sample_with_mcmc(sample_with_mcmc) - self.set_rejection_sampling_parameters(rejection_sampling_parameters) self._purpose = ( "It allows to .sample() and .log_prob() the posterior and wraps the " "output of the .net to avoid leakage into regions with 0 prior probability." ) @property - def sample_with_mcmc(self) -> bool: + def _sample_with_mcmc(self) -> bool: """ - Deprecated, will be removed. + Deprecated, will be removed in future versions of `sbi`. + Return `True` if NeuralPosterior instance should use MCMC in `.sample()`. """ - warn("Deprecated") return self._sample_with_mcmc - @sample_with_mcmc.setter - def sample_with_mcmc(self, value: bool) -> None: - """See `set_sample_with_mcmc`.""" - warn("Deprecated") - # XXX call `.sample_with("mcmc")` - self.set_sample_with_mcmc(value) + @_sample_with_mcmc.setter + def _sample_with_mcmc(self, value: bool) -> None: + """ + Deprecated, will be removed in future versions of `sbi`. - def set_sample_with_mcmc(self, use_mcmc: bool) -> "NeuralPosterior": - """Turns MCMC sampling on or off and returns `NeuralPosterior`. + See `set_sample_with_mcmc`.""" + self._set_sample_with_mcmc(value) + + def _set_sample_with_mcmc(self, use_mcmc: bool) -> "NeuralPosterior": + """ + Deprecated, will be removed in future versions of `sbi`. + + Turns MCMC sampling on or off and returns `NeuralPosterior`. Args: use_mcmc: Flag to set whether or not MCMC sampling is used. @@ -124,7 +126,15 @@ def set_sample_with_mcmc(self, use_mcmc: bool) -> "NeuralPosterior": ValueError: on attempt to turn off MCMC sampling for family of methods that do not support rejection sampling. """ - warn("Deprecated") + warn( + f"You set `sample_with_mcmc={use_mcmc}`. This is deprecated " + "since `sbi v0.17.0` and will lead to an error in future versions. " + "Please use `sample_with='mcmc'` instead." + ) + if use_mcmc: + self.set_sample_with("mcmc") + else: + self.set_sample_with("rejection") self._sample_with_mcmc = use_mcmc return self @@ -162,7 +172,6 @@ def log_prob( Returns: `(len(θ),)`-shaped log posterior probability $\log p(\theta|x)$ for θ in the support of the prior, -∞ (corresponding to 0 probability) outside. - """ # TODO Train exited here, entered after sampling? @@ -234,12 +243,13 @@ def leakage_correction( """ def acceptance_at(x: Tensor) -> Tensor: - return utils.sample_posterior_within_prior( - self.net, - self._prior, - x.to(self._device), - num_rejection_samples, - show_progress_bars, + + return utils.rejection_sample_posterior_within_prior( + posterior_nn=self.net, + prior=self._prior, + x=x.to(self._device), + num_samples=num_rejection_samples, + show_progress_bars=show_progress_bars, sample_for_correction_factor=True, max_sampling_batch_size=rejection_sampling_batch_size, )[1] @@ -263,10 +273,11 @@ def sample( sample_shape: Shape = torch.Size(), x: Optional[Tensor] = None, show_progress_bars: bool = True, - sample_with_mcmc: Optional[bool] = None, + sample_with: Optional[str] = None, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, rejection_sampling_parameters: Optional[Dict[str, Any]] = None, + sample_with_mcmc: Optional[bool] = None, ) -> Tensor: r""" Return samples from posterior distribution $p(\theta|x)$. @@ -283,7 +294,10 @@ def sample( x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x` passed to `set_default_x()`. show_progress_bars: Whether to show sampling progress monitor. - sample_with_mcmc: Optional parameter to override `self.sample_with_mcmc`. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. With default parameters, `rejection` samples + from the posterior estimated by the neural net and rejects only if the + samples are outside of the prior support. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. The following parameters are supported: `thin` to set the thinning @@ -295,25 +309,40 @@ def sample( locations. rejection_sampling_parameters: Dictionary overriding the default parameters for rejection sampling. The following parameters are supported: - `max_sampling_batch_size` to set the batch size for drawing new - samples from the candidate distribution, e.g., the posterior. Larger - batch size speeds up sampling. + `proposal` as the proposal distribtution (default is the trained + neural net). `max_sampling_batch_size` as the batchsize of samples + being drawn from the proposal at every iteration. + `num_samples_to_find_max` as the number of samples that are used to + find the maximum of the `potential_fn / proposal` ratio. + `num_iter_to_find_max` as the number of gradient ascent iterations to + find the maximum of that ratio. `m` as multiplier to that ratio. + sample_with_mcmc: Deprecated since `sbi v0.17.0`. Use `sample_with=mcmc` + instead. + Returns: Samples from posterior. """ - x, num_samples, mcmc_method, mcmc_parameters = self._prepare_for_sample( - x, sample_shape, mcmc_method, mcmc_parameters - ) - - sample_with_mcmc = ( - sample_with_mcmc if sample_with_mcmc is not None else self.sample_with_mcmc - ) + if sample_with_mcmc is not None: + warn( + f"You set `sample_with_mcmc={sample_with_mcmc}`. This is deprecated " + "since `sbi v0.17.0` and will lead to an error in future versions. " + "Please use `sample_with='mcmc'` instead." + ) + if sample_with_mcmc: + sample_with = "mcmc" self.net.eval() - if sample_with_mcmc: - potential_fn_provider = PotentialFunctionProvider() + sample_with = sample_with if sample_with is not None else self._sample_with + + x, num_samples = self._prepare_for_sample(x, sample_shape) + + potential_fn_provider = PotentialFunctionProvider() + if sample_with == "mcmc": + mcmc_method, mcmc_parameters = self._potentially_replace_mcmc_parameters( + mcmc_method, mcmc_parameters + ) samples = self._sample_posterior_mcmc( num_samples=num_samples, potential_fn=potential_fn_provider( @@ -328,17 +357,42 @@ def sample( show_progress_bars=show_progress_bars, **mcmc_parameters, ) + elif sample_with == "rejection": + rejection_sampling_parameters = ( + self._potentially_replace_rejection_parameters( + rejection_sampling_parameters + ) + ) + if "proposal" not in rejection_sampling_parameters: + assert ( + not self.net.training + ), "Posterior nn must be in eval mode for sampling." + + # If the user does not explictly pass a `proposal`, we sample from the + # neural net estimating the posterior and reject only those samples + # that are outside of the prior support. This can be considered as + # rejection sampling with a very good proposal. + samples = utils.rejection_sample_posterior_within_prior( + posterior_nn=self.net, + prior=self._prior, + x=x.to(self._device), + num_samples=num_samples, + show_progress_bars=show_progress_bars, + sample_for_correction_factor=True, + **rejection_sampling_parameters, + )[0] + else: + samples, _ = rejection_sample( + potential_fn=potential_fn_provider( + self._prior, self.net, x, "rejection" + ), + num_samples=num_samples, + show_progress_bars=show_progress_bars, + **rejection_sampling_parameters, + ) else: - # Rejection sampling. - samples, _ = utils.sample_posterior_within_prior( - self.net, - self._prior, - x, - num_samples=num_samples, - show_progress_bars=show_progress_bars, - **rejection_sampling_parameters - if (rejection_sampling_parameters is not None) - else self.rejection_sampling_parameters, + raise NameError( + "The only implemented sampling methods are `mcmc` and `rejection`." ) self.net.train(True) @@ -351,9 +405,11 @@ def sample_conditional( condition: Tensor, dims_to_sample: List[int], x: Optional[Tensor] = None, + sample_with: str = "mcmc", show_progress_bars: bool = True, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, ) -> Tensor: r""" Return samples from conditional posterior $p(\theta_i|\theta_j, x)$. @@ -377,6 +433,9 @@ def sample_conditional( `condition`. x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x` passed to `set_default_x()`. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. In this method, the value of + `self.sample_with` will be ignored. show_progress_bars: Whether to show sampling progress monitor. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. @@ -387,6 +446,15 @@ def sample_conditional( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Samples from conditional posterior. @@ -398,9 +466,11 @@ def sample_conditional( condition, dims_to_sample, x, + sample_with, show_progress_bars, mcmc_method, mcmc_parameters, + rejection_sampling_parameters, ) def map( @@ -465,52 +535,6 @@ def map( log_prob_kwargs={"norm_posterior": False}, ) - @torch.no_grad() - def sample_posterior_within_prior( - self, - posterior_nn: nn.Module, - prior, - x: Tensor, - num_samples: int = 1, - show_progress_bars: bool = False, - warn_acceptance: float = 0.01, - sample_for_correction_factor: bool = False, - max_sampling_batch_size: int = 10_000, - ) -> Tuple[Tensor, Tensor]: - - assert ( - not posterior_nn.training - ), "Posterior nn must be in eval mode for sampling." - - def potential_fn(theta): - are_within_prior = within_support(prior, theta) - probs = posterior_nn.log_prob(theta, context=x) - probs[~are_within_prior] = float("-inf") - return probs - - class Proposal: - def __init__(self, posterior_nn: Any): - self.posterior_nn = posterior_nn - - def sample(self, sample_shape, **kwargs): - return self.posterior_nn.sample(sample_shape.numel(), **kwargs) - - def log_prob(self, theta, **kwargs): - return self.posterior_nn.log_prob(theta, context=x) - - proposal = Proposal(posterior_nn) - - samples = rejection_sample_raw( - potential_fn=potential_fn, - proposal=proposal, - num_samples=num_samples, - show_progress_bars=show_progress_bars, - warn_acceptance=warn_acceptance, - sample_for_correction_factor=sample_for_correction_factor, - max_sampling_batch_size=max_sampling_batch_size, - ) - return samples - class PotentialFunctionProvider: """ @@ -536,25 +560,31 @@ def __call__( prior, posterior_nn: nn.Module, x: Tensor, - mcmc_method: str, + method: str, ) -> Callable: """Return potential function. - Switch on numpy or pyro potential function based on `mcmc_method`. + Switch on numpy or pyro potential function based on `method`. """ self.posterior_nn = posterior_nn self.prior = prior self.device = next(posterior_nn.parameters()).device self.x = atleast_2d(x).to(self.device) - if mcmc_method == "slice": + if method == "slice": return partial(self.pyro_potential, track_gradients=False) - elif mcmc_method in ("hmc", "nuts"): + elif method in ("hmc", "nuts"): return partial(self.pyro_potential, track_gradients=True) + elif "slice_np" in method: + return partial(self.posterior_potential, track_gradients=False) + elif method == "rejection": + return partial(self.posterior_potential, track_gradients=True) else: - return self.np_potential + NotImplementedError - def np_potential(self, theta: np.ndarray) -> ScalarFloat: + def posterior_potential( + self, theta: np.ndarray, track_gradients: bool = False + ) -> ScalarFloat: r"""Return posterior theta log prob. $p(\theta|x)$, $-\infty$ if outside prior." Args: @@ -570,7 +600,7 @@ def np_potential(self, theta: np.ndarray) -> ScalarFloat: # Repeat x over batch dim to match theta batch, accounting for multi-D x. x_repeated = self.x.repeat(num_batch, *(1 for _ in range(self.x.ndim - 1))) - with torch.set_grad_enabled(False): + with torch.set_grad_enabled(track_gradients): target_log_prob = self.posterior_nn.log_prob( inputs=theta.to(self.device), context=x_repeated, @@ -595,7 +625,7 @@ def pyro_potential( theta = next(iter(theta.values())) with torch.set_grad_enabled(track_gradients): - # Notice opposite sign to numpy. + # Notice opposite sign to `posterior_potential`. # Move theta to device for evaluation. log_prob_posterior = -self.posterior_nn.log_prob( inputs=theta.to(self.device), diff --git a/sbi/inference/posteriors/likelihood_based_posterior.py b/sbi/inference/posteriors/likelihood_based_posterior.py index 6fbd7421f..4797d5b49 100644 --- a/sbi/inference/posteriors/likelihood_based_posterior.py +++ b/sbi/inference/posteriors/likelihood_based_posterior.py @@ -10,9 +10,8 @@ from torch import Tensor, nn from sbi.inference.posteriors.base_posterior import NeuralPosterior -from sbi.mcmc import rejection_sample from sbi.types import Shape -from sbi.utils import del_entries +from sbi.utils import del_entries, optimize_potential_fn, rejection_sample from sbi.utils.torchutils import ScalarFloat, atleast_2d, ensure_theta_batched @@ -49,7 +48,7 @@ def __init__( independent and identically distributed data / trials. I.e., the data is assumed to be generated based on the same (unknown) model parameters or experimental condations. - sample_with: Method to use for sampling from the posterior. Must be in + sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy @@ -63,13 +62,15 @@ def __init__( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. - rejection_sampling_parameters: Dictionary overriding the default parameters for - rejection sampling. The following parameters are supported: - `proposal`, as the proposal distribtution. `num_samples_to_find_max` - as the number of samples that are used to find the maximum of the - `potential_fn / proposal` ratio. `m` as multiplier to that ratio. - `sampling_batch_size` as the batchsize of samples being drawn from - the proposal at every iteration. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. device: Training device, e.g., cpu or cuda:0. """ @@ -126,7 +127,7 @@ def sample( sample_shape: Shape = torch.Size(), x: Optional[Tensor] = None, show_progress_bars: bool = True, - sample_with: str = "mcmc", + sample_with: Optional[str] = None, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, rejection_sampling_parameters: Optional[Dict[str, Any]] = None, @@ -141,7 +142,7 @@ def sample( x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x` passed to `set_default_x()`. show_progress_bars: Whether to show sampling progress monitor. - sample_with: Method to use for sampling from the posterior. Must be in + sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection`]. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. @@ -154,11 +155,13 @@ def sample( locations. rejection_sampling_parameters: Dictionary overriding the default parameters for rejection sampling. The following parameters are supported: - `proposal`, as the proposal distribtution. `num_samples_to_find_max` - as the number of samples that are used to find the maximum of the - `potential_fn / proposal` ratio. `m` as multiplier to that ratio. - `sampling_batch_size` as the batchsize of samples being drawn from - the proposal at every iteration. + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Samples from posterior. @@ -166,6 +169,7 @@ def sample( self.net.eval() + sample_with = sample_with if sample_with is not None else self._sample_with x, num_samples = self._prepare_for_sample(x, sample_shape) potential_fn_provider = PotentialFunctionProvider() @@ -198,11 +202,11 @@ def sample( if "proposal" not in rejection_sampling_parameters: rejection_sampling_parameters["proposal"] = self._prior - samples = self._sample_posterior_rejection( - num_samples=num_samples, + samples, _ = rejection_sample( potential_fn=potential_fn_provider( self._prior, self.net, x, "rejection" ), + num_samples=num_samples, **rejection_sampling_parameters, ) else: @@ -220,9 +224,11 @@ def sample_conditional( condition: Tensor, dims_to_sample: List[int], x: Optional[Tensor] = None, + sample_with: str = "mcmc", show_progress_bars: bool = True, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, ) -> Tensor: r""" Return samples from conditional posterior $p(\theta_i|\theta_j, x)$. @@ -246,6 +252,9 @@ def sample_conditional( `condition`. x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x` passed to `set_default_x()`. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. In this method, the value of + `self._sample_with` will be ignored. show_progress_bars: Whether to show sampling progress monitor. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. @@ -256,6 +265,15 @@ def sample_conditional( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Samples from conditional posterior. @@ -267,9 +285,11 @@ def sample_conditional( condition, dims_to_sample, x, + sample_with, show_progress_bars, mcmc_method, mcmc_parameters, + rejection_sampling_parameters, ) def map( @@ -382,95 +402,6 @@ def _log_likelihoods_over_trials( return log_likelihood_trial_sum - def _sample_posterior_rejection( - self, - num_samples: torch.Size, - potential_fn: Any, - proposal: Any, - num_samples_to_find_max: int = 10_000, - m: float = 1.2, - sampling_batch_size: int = 10_000, - ): - r""" - Return samples from a distribution via rejection sampling. - - This function is used in any case by SNLE and SNRE, but can also be used by SNPE - in order to deal with strong leakage. Depending on the inference method, a - different potential function for the rejection sampler is required. - - Args: - num_samples: Desired number of samples. - potential_fn: Potential function used for rejection sampling. - proposal: Proposal distribution for rejection sampling. - num_samples_to_find_max: Number of samples that are used to find the maximum - of the `potential_fn / proposal` ratio. - m: Multiplier to the maximum ratio between potential function and the - proposal. A higher value will ensure that the samples are indeed from - the posterior, but will increase the rejection ratio and thus - computation time. - sampling_batch_size: Batchsize of samples being drawn from - the proposal at every iteration. - - Returns: - Tensor of shape (num_samples, shape_of_single_theta). - """ - - find_max = proposal.sample((num_samples_to_find_max,)) - - # Define a potential as the ratio between target distribution and proposal. - def potential_over_proposal(theta): - return torch.squeeze(potential_fn(theta)) - proposal.log_prob(theta) - - # Search for the maximum of the ratio. - _, max_log_ratio = optimize_potential_fn( - potential_fn=potential_over_proposal, - inits=find_max, - dist_specifying_bounds=proposal, - num_iter=100, - learning_rate=0.01, - num_to_optimize=max(1, int(num_samples_to_find_max / 10)), - show_progress_bars=False, - ) - - if m < 1.0: - warn("A value of m < 1.0 will lead to systematically wrong results.") - - class ScaledProposal: - def __init__(self, proposal: Any, max_log_ratio: float, log_m: float): - self.proposal = proposal - self.max_log_ratio = max_log_ratio - self.log_m = log_m - - def sample(self, sample_shape, **kwargs): - return self.proposal.sample((sample_shape,), **kwargs) - - def log_prob(self, theta, **kwargs): - return self.proposal.log_prob(theta) + self.max_log_ratio + self.log_m - - scaled_proposal = ScaledProposal( - proposal, max_log_ratio, torch.log(torch.as_tensor(m)) - ) - - samples, _ = rejection_sample_raw( - potential_fn, scaled_proposal, num_samples=num_samples - ) - return samples - - num_accepted = 0 - all_ = [] - while num_accepted < num_samples: - candidates = proposal.sample((sampling_batch_size,)) - probs = potential_fn(candidates) - target_log_probs = potential_fn(candidates) - proposal_log_probs = proposal.log_prob(candidates) + max_ratio - target_proposal_ratio = exp(target_log_probs - proposal_log_probs) - acceptance = rand(target_proposal_ratio.shape) - accepted = candidates[target_proposal_ratio > acceptance] - num_accepted += accepted.shape[0] - all_.append(accepted) - samples = torch.cat(all_)[:num_samples] - return samples - class PotentialFunctionProvider: """ @@ -500,14 +431,14 @@ def __call__( ) -> Callable: r"""Return potential function for posterior $p(\theta|x)$. - Switch on numpy or pyro potential function based on mcmc_method. + Switch on numpy or pyro potential function based on `method`. Args: prior: Prior distribution that can be evaluated. likelihood_nn: Neural likelihood estimator that can be evaluated. x: Conditioning variable for posterior $p(\theta|x)$. Can be a batch of iid x. - mcmc_method: One of `slice_np`, `slice`, `hmc` or `nuts`. + method: One of `slice_np`, `slice`, `hmc` or `nuts`, `rejection`. Returns: Potential function for sampler. @@ -522,9 +453,9 @@ def __call__( elif method in ("hmc", "nuts"): return partial(self.pyro_potential, track_gradients=True) elif "slice_np" in method: - return self.np_potential + return partial(self.posterior_potential, track_gradients=False) elif method == "rejection": - return self.rejection_potential + return partial(self.posterior_potential, track_gradients=True) else: NotImplementedError @@ -540,25 +471,9 @@ def log_likelihood(self, theta: Tensor, track_gradients: bool = False) -> Tensor return log_likelihoods - def rejection_potential(self, theta: np.array) -> ScalarFloat: - r"""Return posterior log prob. of theta $p(\theta|x)$" - - The only difference to the `np_potential` is that it tracks the gradients. - - Args: - theta: Parameters $\theta$, batch dimension 1. - - Returns: - Posterior log probability of the theta, $-\infty$ if impossible under prior. - """ - theta = torch.as_tensor(theta, dtype=torch.float32) - - # Notice opposite sign to pyro potential. - return self.log_likelihood( - theta, track_gradients=True - ).cpu() + self.prior.log_prob(theta) - - def np_potential(self, theta: np.array) -> ScalarFloat: + def posterior_potential( + self, theta: np.array, track_gradients: bool = False + ) -> ScalarFloat: r"""Return posterior log prob. of theta $p(\theta|x)$" Args: @@ -571,7 +486,7 @@ def np_potential(self, theta: np.array) -> ScalarFloat: # Notice opposite sign to pyro potential. return self.log_likelihood( - theta, track_gradients=False + theta, track_gradients=track_gradients ).cpu() + self.prior.log_prob(theta) def pyro_potential( diff --git a/sbi/inference/posteriors/ratio_based_posterior.py b/sbi/inference/posteriors/ratio_based_posterior.py index 4c17a936d..dc99214b8 100644 --- a/sbi/inference/posteriors/ratio_based_posterior.py +++ b/sbi/inference/posteriors/ratio_based_posterior.py @@ -11,7 +11,7 @@ from sbi.inference.posteriors.base_posterior import NeuralPosterior from sbi.types import Shape -from sbi.utils import del_entries +from sbi.utils import del_entries, optimize_potential_fn, rejection_sample from sbi.utils.torchutils import ScalarFloat, atleast_2d, ensure_theta_batched @@ -35,8 +35,10 @@ def __init__( neural_net: nn.Module, prior, x_shape: torch.Size, + sample_with: str = "mcmc", mcmc_method: str = "slice_np", mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, device: str = "cpu", ): """ @@ -44,7 +46,14 @@ def __init__( method_family: One of snpe, snl, snre_a or snre_b. neural_net: A classifier for SNRE, a density estimator for SNPE and SNL. prior: Prior distribution with `.log_prob()` and `.sample()`. - x_shape: Shape of a single simulator output. + x_shape: Shape of the simulated data. It can differ from the + observed data the posterior is conditioned on later in the batch + dimension. If it differs, the additional entries are interpreted as + independent and identically distributed data / trials. I.e., the data is + assumed to be generated based on the same (unknown) model parameters or + experimental condations. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for @@ -57,6 +66,15 @@ def __init__( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. device: Training device, e.g., cpu or cuda:0. """ kwargs = del_entries(locals(), entries=("self", "__class__")) @@ -119,10 +137,10 @@ def sample( sample_shape: Shape = torch.Size(), x: Optional[Tensor] = None, show_progress_bars: bool = True, - sample_with: str = "mcmc", + sample_with: Optional[str] = None, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, - rejection_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, ) -> Tensor: r""" Return samples from posterior distribution $p(\theta|x)$ with MCMC. @@ -134,8 +152,8 @@ def sample( x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x` passed to `set_default_x()`. show_progress_bars: Whether to show sampling progress monitor. - sample_with: Method to use for sampling from the posterior. Must be in - [`mcmc` | `rejection` | `vi`]. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. The following parameters are supported: `thin` to set the thinning @@ -145,10 +163,15 @@ def sample( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. - rejection_parameters: Dictionary overriding the default parameters for - rejection sampling. The following parameters are supported: `m` as - multiplier to the maximum ratio between potential function and the - proposal. `proposal`, as the proposal distribtution. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Samples from posterior. @@ -156,14 +179,19 @@ def sample( self.net.eval() + sample_with = sample_with if sample_with is not None else self._sample_with + x, num_samples = self._prepare_for_sample(x, sample_shape) + potential_fn_provider = PotentialFunctionProvider() if sample_with == "mcmc": - x, num_samples, mcmc_method, mcmc_parameters = self._prepare_for_sample( - x, sample_shape, mcmc_method, mcmc_parameters + mcmc_method, mcmc_parameters = self._potentially_replace_mcmc_parameters( + mcmc_method, mcmc_parameters ) samples = self._sample_posterior_mcmc( num_samples=num_samples, - potential_fn=potential_fn_provider(self._prior, self.net, x, mcmc_method), + potential_fn=potential_fn_provider( + self._prior, self.net, x, mcmc_method + ), init_fn=self._build_mcmc_init_fn( self._prior, potential_fn_provider(self._prior, self.net, x, "slice_np"), @@ -174,16 +202,25 @@ def sample( **mcmc_parameters, ) elif sample_with == "rejection": - samples = rejection_sample( - num_samples=num_samples, + rejection_sampling_parameters = ( + self._potentially_replace_rejection_parameters( + rejection_sampling_parameters + ) + ) + if "proposal" not in rejection_sampling_parameters: + rejection_sampling_parameters["proposal"] = self._prior + + samples, _ = rejection_sample( potential_fn=potential_fn_provider( - self._prior, self.net, x, "slice_np" + self._prior, self.net, x, "rejection" ), - proposal=self._prior, - m=1.0, + num_samples=num_samples, + **rejection_sampling_parameters, ) else: - raise NameError("The only implemented sampling methods are `mcmc` and `rejection`.") + raise NameError( + "The only implemented sampling methods are `mcmc` and `rejection`." + ) self.net.train(True) @@ -195,9 +232,11 @@ def sample_conditional( condition: Tensor, dims_to_sample: List[int], x: Optional[Tensor] = None, + sample_with: str = "mcmc", show_progress_bars: bool = True, mcmc_method: Optional[str] = None, mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, ) -> Tensor: r""" Return samples from conditional posterior $p(\theta_i|\theta_j, x)$. @@ -221,6 +260,9 @@ def sample_conditional( `condition`. x: Conditioning context for posterior $p(\theta|x)$. If not provided, fall back onto `x` passed to `set_default_x()`. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. In this method, the value of + `self._sample_with` will be ignored. show_progress_bars: Whether to show sampling progress monitor. mcmc_method: Optional parameter to override `self.mcmc_method`. mcmc_parameters: Dictionary overriding the default parameters for MCMC. @@ -231,6 +273,15 @@ def sample_conditional( will draw init locations from prior, whereas `sir` will use Sequential- Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Samples from conditional posterior. @@ -242,9 +293,11 @@ def sample_conditional( condition, dims_to_sample, x, + sample_with, show_progress_bars, mcmc_method, mcmc_parameters, + rejection_sampling_parameters, ) def map( @@ -277,10 +330,6 @@ def map( fall back onto `x` passed to `set_default_x()`. num_iter: Maximum Number of optimization steps that the algorithm takes to find the MAP. - early_stop_at: If `None`, it will optimize for `max_num_iter` iterations. - If `float`, the optimization will stop as soon as the steps taken by - the optimizer are smaller than `early_stop_at` times the standard - deviation of the initial guesses. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples @@ -400,7 +449,7 @@ class PotentialFunctionProvider: most current trained posterior neural net. Returns: - Potential function for use by either numpy or pyro sampler + Potential function for use by either numpy, pyro, or rejection sampler. """ def __call__( @@ -408,18 +457,18 @@ def __call__( prior, classifier: nn.Module, x: Tensor, - mcmc_method: str, + method: str, ) -> Callable: r"""Return potential function for posterior $p(\theta|x)$. - Switch on numpy or pyro potential function based on `mcmc_method`. + Switch on numpy or pyro potential function based on `method`. Args: prior: Prior distribution that can be evaluated. classifier: Binary classifier approximating the likelihood up to a constant. x: Conditioning variable for posterior $p(\theta|x)$. - mcmc_method: One of `slice_np`, `slice`, `hmc` or `nuts`. + method: One of `slice_np`, `slice`, `hmc` or `nuts`, `rejection`. Returns: Potential function for sampler. @@ -430,17 +479,23 @@ def __call__( self.device = next(classifier.parameters()).device self.x = atleast_2d(x).to(self.device) - if mcmc_method == "slice": + if method == "slice": return partial(self.pyro_potential, track_gradients=False) - elif mcmc_method in ("hmc", "nuts"): + elif method in ("hmc", "nuts"): return partial(self.pyro_potential, track_gradients=True) + elif "slice_np" in method: + return partial(self.posterior_potential, track_gradients=False) + elif method == "rejection": + return partial(self.posterior_potential, track_gradients=True) else: - return self.np_potential + NotImplementedError - def np_potential(self, theta: np.array) -> ScalarFloat: - """Return potential for Numpy slice sampler." + def posterior_potential( + self, theta: np.array, track_gradients: bool = False + ) -> ScalarFloat: + """Returns the unnormalized posterior log-probability. - For numpy MCMC samplers this is the unnormalized posterior log prob. + This is the potential used in the numpy slice sampler and in rejection sampling. Args: theta: Parameters $\theta$, batch dimension 1. @@ -452,7 +507,10 @@ def np_potential(self, theta: np.array) -> ScalarFloat: theta = ensure_theta_batched(theta) log_ratio = RatioBasedPosterior._log_ratios_over_trials( - self.x, theta.to(self.device), self.classifier, track_gradients=False + self.x, + theta.to(self.device), + self.classifier, + track_gradients=track_gradients, ) # Notice opposite sign to pyro potential. diff --git a/sbi/inference/snle/snle_base.py b/sbi/inference/snle/snle_base.py index 7a82570af..bf3f7a168 100644 --- a/sbi/inference/snle/snle_base.py +++ b/sbi/inference/snle/snle_base.py @@ -276,7 +276,7 @@ def build_posterior( Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. - sample_with: Method to use for sampling from the posterior. Must be in + sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy @@ -290,13 +290,15 @@ def build_posterior( draw init locations from prior, whereas `sir` will use Sequential-Importance-Resampling using `init_strategy_num_candidates` to find init locations. - rejection_sampling_parameters: Dictionary overriding the default parameters for - rejection sampling. The following parameters are supported: - `proposal`, as the proposal distribtution. `num_samples_to_find_max` - as the number of samples that are used to find the maximum of the - `potential_fn / proposal` ratio. `m` as multiplier to that ratio. - `sampling_batch_size` as the batchsize of samples being drawn from - the proposal at every iteration. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Posterior $p(\theta|x)$ with `.sample()` and `.log_prob()` methods diff --git a/sbi/inference/snpe/snpe_a.py b/sbi/inference/snpe/snpe_a.py index 39ef6f660..54420f9a8 100644 --- a/sbi/inference/snpe/snpe_a.py +++ b/sbi/inference/snpe/snpe_a.py @@ -298,8 +298,8 @@ def build_posterior( neural_net=wrapped_density_estimator, prior=self._prior, x_shape=self._x_shape, + sample_with="rejection", rejection_sampling_parameters=rejection_sampling_parameters, - sample_with_mcmc=False, device=device, ) diff --git a/sbi/inference/snpe/snpe_base.py b/sbi/inference/snpe/snpe_base.py index 98faf6c58..ede638171 100644 --- a/sbi/inference/snpe/snpe_base.py +++ b/sbi/inference/snpe/snpe_base.py @@ -347,10 +347,11 @@ def train( def build_posterior( self, density_estimator: Optional[TorchModule] = None, - rejection_sampling_parameters: Optional[Dict[str, Any]] = None, - sample_with_mcmc: bool = False, + sample_with: str = "rejection", mcmc_method: str = "slice_np", mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, + sample_with_mcmc: Optional[bool] = None, ) -> DirectPosterior: r""" Build posterior from the neural density estimator. @@ -367,13 +368,10 @@ def build_posterior( Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. - rejection_sampling_parameters: Dictionary overriding the default parameters - for rejection sampling. The following parameters are supported: - `max_sampling_batch_size` to set the batch size for drawing new - samples from the candidate distribution, e.g., the posterior. Larger - batch size speeds up sampling. - sample_with_mcmc: Whether to sample with MCMC. MCMC can be used to deal - with high leakage. + sample_with: Method to use for sampling from the posterior. Must be one of + [`rejection` | `mcmc`]. With default parameters, `rejection` samples + from the posterior estimated by the neural net and rejects only if the + samples are outside of the prior support. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for @@ -386,10 +384,29 @@ def build_posterior( draw init locations from prior, whereas `sir` will use Sequential-Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the trained + neural net). `max_sampling_batch_size` as the batchsize of samples + being drawn from the proposal at every iteration. + `num_samples_to_find_max` as the number of samples that are used to + find the maximum of the `potential_fn / proposal` ratio. + `num_iter_to_find_max` as the number of gradient ascent iterations to + find the maximum of that ratio. `m` as multiplier to that ratio. + sample_with_mcmc: Deprecated since `sbi v0.16.0`. Use `sample_with=mcmc` + instead. Returns: Posterior $p(\theta|x)$ with `.sample()` and `.log_prob()` methods. """ + if sample_with_mcmc is not None: + warn( + f"You set `sample_with_mcmc={sample_with_mcmc}`. This is deprecated " + "since `sbi v0.17.0` and will lead to an error in future versions. " + "Please use `sample_with='mcmc'` instead." + ) + if sample_with_mcmc: + sample_with = "mcmc" if density_estimator is None: density_estimator = self._neural_net @@ -404,10 +421,10 @@ def build_posterior( neural_net=density_estimator, prior=self._prior, x_shape=self._x_shape, - rejection_sampling_parameters=rejection_sampling_parameters, - sample_with_mcmc=sample_with_mcmc, + sample_with=sample_with, mcmc_method=mcmc_method, mcmc_parameters=mcmc_parameters, + rejection_sampling_parameters=rejection_sampling_parameters, device=device, ) diff --git a/sbi/inference/snre/snre_base.py b/sbi/inference/snre/snre_base.py index 40f0f317b..7e4818d63 100644 --- a/sbi/inference/snre/snre_base.py +++ b/sbi/inference/snre/snre_base.py @@ -267,8 +267,10 @@ def train( def build_posterior( self, density_estimator: Optional[TorchModule] = None, + sample_with: str = "mcmc", mcmc_method: str = "slice_np", mcmc_parameters: Optional[Dict[str, Any]] = None, + rejection_sampling_parameters: Optional[Dict[str, Any]] = None, ) -> RatioBasedPosterior: r""" Build posterior from the neural density estimator. @@ -285,6 +287,8 @@ def build_posterior( Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. + sample_with: Method to use for sampling from the posterior. Must be one of + [`mcmc` | `rejection`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for @@ -297,6 +301,15 @@ def build_posterior( draw init locations from prior, whereas `sir` will use Sequential-Importance-Resampling using `init_strategy_num_candidates` to find init locations. + rejection_sampling_parameters: Dictionary overriding the default parameters + for rejection sampling. The following parameters are supported: + `proposal` as the proposal distribtution (default is the prior). + `max_sampling_batch_size` as the batchsize of samples being drawn from + the proposal at every iteration. `num_samples_to_find_max` as the + number of samples that are used to find the maximum of the + `potential_fn / proposal` ratio. `num_iter_to_find_max` as the number + of gradient ascent iterations to find the maximum of that ratio. `m` as + multiplier to that ratio. Returns: Posterior $p(\theta|x)$ with `.sample()` and `.log_prob()` methods @@ -316,8 +329,10 @@ def build_posterior( neural_net=density_estimator, prior=self._prior, x_shape=self._x_shape, + sample_with=sample_with, mcmc_method=mcmc_method, mcmc_parameters=mcmc_parameters, + rejection_sampling_parameters=rejection_sampling_parameters, device=device, ) diff --git a/sbi/mcmc/__init__.py b/sbi/mcmc/__init__.py index 83a1e2b89..7c59fb054 100644 --- a/sbi/mcmc/__init__.py +++ b/sbi/mcmc/__init__.py @@ -1,5 +1,4 @@ from sbi.mcmc.init_strategy import IterateParameters, prior_init, sir -from sbi.mcmc.rejection_sampling import rejection_sample from sbi.mcmc.slice import Slice from sbi.mcmc.slice_numpy import SliceSampler from sbi.mcmc.slice_numpy_vectorized import SliceSamplerVectorized diff --git a/sbi/mcmc/rejection_sampling.py b/sbi/mcmc/rejection_sampling.py deleted file mode 100644 index 9121e1772..000000000 --- a/sbi/mcmc/rejection_sampling.py +++ /dev/null @@ -1,11 +0,0 @@ -from typing import Any -from warnings import warn - -import torch -from torch import Tensor, exp, log, rand - -from sbi.utils import ( - optimize_potential_fn, - rejection_sample_raw, - sample_posterior_within_prior, -) diff --git a/sbi/utils/__init__.py b/sbi/utils/__init__.py index 91175b34d..a77d5fce1 100644 --- a/sbi/utils/__init__.py +++ b/sbi/utils/__init__.py @@ -21,8 +21,8 @@ mask_sims_from_prior, mog_log_prob, optimize_potential_fn, - rejection_sample_raw, - sample_posterior_within_prior, + rejection_sample, + rejection_sample_posterior_within_prior, standardizing_net, standardizing_transform, warn_if_zscoring_changes_data, diff --git a/sbi/utils/sbiutils.py b/sbi/utils/sbiutils.py index b86265145..e67f73f4d 100644 --- a/sbi/utils/sbiutils.py +++ b/sbi/utils/sbiutils.py @@ -146,19 +146,26 @@ def standardizing_net(batch_t: Tensor, min_std: float = 1e-7) -> nn.Module: return Standardize(t_mean, t_std) -def rejection_sample_raw( - potential_fn: nn.Module, - proposal, - num_samples: int = 1, +@torch.no_grad() +def rejection_sample_posterior_within_prior( + posterior_nn: Any, + prior: Callable, + x: Tensor, + num_samples: int, show_progress_bars: bool = False, warn_acceptance: float = 0.01, sample_for_correction_factor: bool = False, max_sampling_batch_size: int = 10_000, -): + **kwargs, +) -> Tuple[Tensor, Tensor]: r"""Return samples from a posterior $p(\theta|x)$ only within the prior support. This is relevant for snpe methods and flows for which the posterior tends to have - mass outside the prior boundaries. + mass outside the prior support. + + This function could in principle be integrated into `rejection_sample()`. However, + to keep the warnings clean, to avoid additional code for integration, and confusing + if-cases, we decided to keep two separate functions. This function uses rejection sampling with samples from posterior in order to 1) obtain posterior samples within the prior support, and @@ -168,22 +175,33 @@ def rejection_sample_raw( Args: posterior_nn: Neural net representing the posterior. prior: Distribution-like object that evaluates probabilities with `log_prob`. + x: Conditioning variable $x$ for the posterior $p(\theta|x)$. num_samples: Desired number of samples. show_progress_bars: Whether to show a progressbar during sampling. warn_acceptance: A minimum acceptance rate under which to warn about slowness. sample_for_correction_factor: True if this function was called by `leakage_correction()`. False otherwise. Will be used to adapt the leakage - warning. + warning and to decide whether we have to search for the maximum. max_sampling_batch_size: Batch size for drawing samples from the posterior. Takes effect only in the second iteration of the loop below, i.e., in case of leakage or `num_samples>max_sampling_batch_size`. Larger batch size speeds up sampling. + kwargs: Absorb additional unused arguments that can be passed to + `rejection_sample()`. Warn if not empty. Returns: Accepted samples and acceptance rate as scalar Tensor. """ - # Progress bar can be skipped, e.g. when sampling after each round just for logging. + if kwargs: + logging.warn( + f"You passed arguments to `rejection_sampling_parameters` that " + f"are unused when you do not specify a `proposal` in the same " + f"dictionary. The unused arguments are: {kwargs}" + ) + + # Progress bar can be skipped, e.g. when sampling after each round just for + # logging. pbar = tqdm( disable=not show_progress_bars, total=num_samples, @@ -200,18 +218,14 @@ def rejection_sample_raw( # Sample and reject. candidates = ( - proposal.sample(sampling_batch_size) + posterior_nn.sample(sampling_batch_size, context=x) .reshape(sampling_batch_size, -1) - .cpu() # Move to cpu to evaluate under prior. + .cpu() ) - # are_within_prior = within_support(prior, candidates) - # samples = candidates[are_within_prior] - target_proposal_ratio = torch.exp( - potential_fn(candidates) - proposal.log_prob(candidates) - ) - uniform_rand = torch.rand(target_proposal_ratio.shape) - samples = candidates[target_proposal_ratio > uniform_rand] + # SNPE-style rejection-sampling when the proposal is the neural net. + are_within_prior = within_support(prior, candidates) + samples = candidates[are_within_prior] accepted.append(samples) @@ -235,24 +249,26 @@ def rejection_sample_raw( if sample_for_correction_factor: logging.warning( f"""Drawing samples from posterior to estimate the normalizing - constant for `log_prob()`. However, only {acceptance_rate:.0%} - posterior samples are within the prior support. It may take a - long time to collect the remaining {num_remaining} samples. - Consider interrupting (Ctrl-C) and either basing the estimate - of the normalizing constant on fewer samples (by calling - `posterior.leakage_correction(x_o, num_rejection_samples=N)`, - where `N` is the number of samples you want to base the + constant for `log_prob()`. However, only + {acceptance_rate:.0%} posterior samples are within the + prior support. It may take a long time to collect the + remaining {num_remaining} samples. + Consider interrupting (Ctrl-C) and either basing the + estimate of the normalizing constant on fewer samples (by + calling `posterior.leakage_correction(x_o, + num_rejection_samples=N)`, where `N` is the number of + samples you want to base the estimate on (default N=10000), or not estimating the normalizing constant at all - (`log_prob(..., norm_posterior=False)`. The latter will result - in an unnormalized `log_prob()`.""" + (`log_prob(..., norm_posterior=False)`. The latter will + result in an unnormalized `log_prob()`.""" ) else: logging.warning( f"""Only {acceptance_rate:.0%} posterior samples are within the - prior support. It may take a long time to collect the remaining - {num_remaining} samples. Consider interrupting (Ctrl-C) - and switching to `sample_with_mcmc=True`.""" + prior support. It may take a long time to collect the + remaining {num_remaining} samples. Consider interrupting + (Ctrl-C) and switching to `sample_with='mcmc'`.""" ) leakage_warning_raised = True # Ensure warning is raised just once. @@ -267,6 +283,168 @@ def rejection_sample_raw( return samples, as_tensor(acceptance_rate) +def rejection_sample( + potential_fn: Callable, + proposal: Any, + num_samples: int = 1, + show_progress_bars: bool = False, + warn_acceptance: float = 0.01, + max_sampling_batch_size: int = 10_000, + num_samples_to_find_max: int = 10_000, + num_iter_to_find_max: int = 100, + m: float = 1.2, +) -> Tuple[Tensor, Tensor]: + r"""Return samples from a `potential_fn` obtained via rejection sampling. + + This function uses rejection sampling with samples from posterior in order to + 1) obtain posterior samples within the prior support, and + 2) calculate the fraction of accepted samples as a proxy for correcting the + density during evaluation of the posterior. + + Args: + potential_fn: The potential to sample from. The potential should be passed as + the logarithm of the desired distribution. + proposal: The proposal from which to draw candidate samples. Must have a + `sample()` and a `log_prob()` method. + num_samples: Desired number of samples. + show_progress_bars: Whether to show a progressbar during sampling. + warn_acceptance: A minimum acceptance rate under which to warn about slowness. + max_sampling_batch_size: Batch size for drawing samples from the posterior. + Takes effect only in the second iteration of the loop below, i.e., in case + of leakage or `num_samples>max_sampling_batch_size`. Larger batch size + speeds up sampling. + num_samples_to_find_max: Number of samples that are used to find the maximum + of the `potential_fn / proposal` ratio. + num_iter_to_find_max: Number of gradient ascent iterations to find the maximum + of the `potential_fn / proposal` ratio. + m: Multiplier to the maximum ratio between potential function and the + proposal. This factor is applied after already having scaled the proposal + with the maximum ratio of the `potential_fn / proposal` ratio. A higher + value will ensure that the samples are indeed from the correct + distribution, but will increase the fraction of rejected samples and thus + computation time. + + Returns: + Accepted samples and acceptance rate as scalar Tensor. + """ + + samples_to_find_max = proposal.sample((num_samples_to_find_max,)) + + # Define a potential as the ratio between target distribution and proposal. + def potential_over_proposal(theta): + return potential_fn(theta) - proposal.log_prob(theta).cpu() + + # Search for the maximum of the ratio. + _, max_log_ratio = optimize_potential_fn( + potential_fn=potential_over_proposal, + inits=samples_to_find_max, + dist_specifying_bounds=proposal, + num_iter=num_iter_to_find_max, + learning_rate=0.01, + num_to_optimize=max(1, int(num_samples_to_find_max / 10)), + show_progress_bars=False, + ) + + if m < 1.0: + warnings.warn("A value of m < 1.0 will lead to systematically wrong results.") + + class ScaledProposal: + """ + Proposal for rejection sampling which is strictly larger than the potential_fn. + """ + + def __init__(self, proposal: Any, max_log_ratio: float, log_m: float): + self.proposal = proposal + self.max_log_ratio = max_log_ratio + self.log_m = log_m + + def sample(self, sample_shape: torch.Size, **kwargs) -> Tensor: + """ + Samples from the `ScaledProposal` are samples from the `proposal`. + """ + return self.proposal.sample((sample_shape,), **kwargs) + + def log_prob(self, theta: Tensor, **kwargs) -> Tensor: + """ + The log-prob is scaled such that the proposal is always above the potential. + """ + return ( + self.proposal.log_prob(theta, **kwargs) + + self.max_log_ratio + + self.log_m + ) + + proposal = ScaledProposal(proposal, max_log_ratio, torch.log(torch.as_tensor(m))) + + with torch.no_grad(): + # Progress bar can be skipped, e.g. when sampling after each round just for + # logging. + pbar = tqdm( + disable=not show_progress_bars, + total=num_samples, + desc=f"Drawing {num_samples} posterior samples", + ) + + num_sampled_total, num_remaining = 0, num_samples + accepted, acceptance_rate = [], float("Nan") + leakage_warning_raised = False + + # To cover cases with few samples without leakage: + sampling_batch_size = min(num_samples, max_sampling_batch_size) + while num_remaining > 0: + + # Sample and reject. + candidates = proposal.sample(sampling_batch_size).reshape( + sampling_batch_size, -1 + ) + + # `candidates` will lie on CPU if the proposal is the prior and + # possibly on the GPU if the proposal is a neural net. Everything returned + # by the `potential_fn` will lie on the CPU. + target_proposal_ratio = torch.exp( + potential_fn(candidates) - proposal.log_prob(candidates).cpu() + ) + uniform_rand = torch.rand(target_proposal_ratio.shape) + samples = candidates.cpu()[target_proposal_ratio > uniform_rand] + + accepted.append(samples) + + # Update. + num_sampled_total += sampling_batch_size + num_remaining -= samples.shape[0] + pbar.update(samples.shape[0]) + + # To avoid endless sampling when leakage is high, we raise a warning if the + # acceptance rate is too low after the first 1_000 samples. + acceptance_rate = (num_samples - num_remaining) / num_sampled_total + + # For remaining iterations (leakage or many samples) continue sampling with + # fixed batch size. + sampling_batch_size = max_sampling_batch_size + if ( + num_sampled_total > 1000 + and acceptance_rate < warn_acceptance + and not leakage_warning_raised + ): + logging.warning( + f"""Only {acceptance_rate:.0%} proposal samples were accepted. It + may take a long time to collect the remaining {num_remaining} + samples. Consider interrupting (Ctrl-C) and switching to + `sample_with='mcmc`.""" + ) + leakage_warning_raised = True # Ensure warning is raised just once. + + pbar.close() + + # When in case of leakage a batch size was used there could be too many samples. + samples = torch.cat(accepted)[:num_samples] + assert ( + samples.shape[0] == num_samples + ), "Number of accepted samples must match required samples." + + return samples, as_tensor(acceptance_rate) + + def handle_invalid_x( x: Tensor, exclude_invalid_x: bool = True ) -> Tuple[Tensor, int, int]: @@ -457,43 +635,39 @@ def optimize_potential_fn( interruption_note: str = "", ) -> Tuple[Tensor, Tensor]: """ - Returns the location of the maximum of a `potential_fn`. + Returns the `argmax` and `max` of a `potential_fn`. - The method can be interrupted (Ctrl-C) when the user sees that the - log-probability converges. The best estimate will be returned. + The method can be interrupted (Ctrl-C) when the user sees that the log-probability + converges. The best estimate will be returned. - The location of the maximum is obtained by running gradient ascent from given - number of starting positions. After the optimization is done, we select the - parameter set that has the highest `potential_fn` value after the optimization. + The maximum is obtained by running gradient ascent from given starting parameters. + After the optimization is done, we select the parameter set that has the highest + `potential_fn` value after the optimization. - Warning: The default values used by this function are not well-tested. They - might require hand-tuning for the problem at hand. + Warning: The default values used by this function are not well-tested. They might + require hand-tuning for the problem at hand. Args: potential_fn: The function on which to optimize. + inits: The initial parameters at which to start the gradient ascent steps. + dist_specifying_bounds: Distribution the specifies bounds for the optimization. + If it is a `sbi.utils.BoxUniform`, we transform the space into + unconstrained space and carry out the optimization there. num_iter: Number of optimization steps that the algorithm takes to find the MAP. + num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` + with highest log-probability as the initial points for the optimization. learning_rate: Learning rate of the optimizer. - init_method: How to select the starting parameters for the optimization. If - it is a string, it can be either [`posterior`, `prior`], which samples - the respective distribution `num_init_samples` times. If it is a - tensor, the tensor will be used as init locations. - num_init_samples: Draw this number of samples from the posterior and - evaluate the log-probability of all of them. - num_to_optimize: From the drawn `num_init_samples`, use the - `num_to_optimize` with highest log-probability as the initial points - for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. - Computing the best log-probability creates a significant overhead - (thus, the default is `10`.) - show_progress_bars: Whether or not to show a progressbar for the - optimization. - interrruption_note: The message printed when the user interrupts the + Computing the best log-probability creates a significant overhead (thus, + the default is `10`.) + show_progress_bars: Whether or not to show a progressbar for the optimization. + interruption_note: The message printed when the user interrupts the optimization. Returns: - The location and value of the maximum of the `potential_fn`. + The `argmax` and `max` of the `potential_fn`. """ # If the prior is `BoxUniform`, define a transformation to optimize in diff --git a/tests/inference_on_device_test.py b/tests/inference_on_device_test.py index 804d89a31..a3c7437e3 100644 --- a/tests/inference_on_device_test.py +++ b/tests/inference_on_device_test.py @@ -60,7 +60,7 @@ def simulator(theta): ) mcmc_kwargs = ( dict( - sample_with_mcmc=True, + sample_with="mcmc", mcmc_method="slice_np", ) if method == SNPE_C @@ -70,12 +70,13 @@ def simulator(theta): kwargs = dict( density_estimator=utils.likelihood_nn(model=model), ) - mcmc_kwargs = dict(mcmc_method="slice") + mcmc_kwargs = dict(sample_with="mcmc", mcmc_method="slice") elif method in (SNRE_A, SNRE_B): kwargs = dict( classifier=utils.classifier_nn(model=model), ) mcmc_kwargs = dict( + sample_with="mcmc", mcmc_method="slice_np_vectorized", ) else: diff --git a/tests/linearGaussian_snle_test.py b/tests/linearGaussian_snle_test.py index 1e0d9e2e8..bb95255b9 100644 --- a/tests/linearGaussian_snle_test.py +++ b/tests/linearGaussian_snle_test.py @@ -238,13 +238,13 @@ def test_c2st_multi_round_snl_on_linearGaussian(num_trials: int, set_seed): check_c2st(samples, target_samples, alg="multi-round-snl") -# TODO: add test for rejection sampling. @pytest.mark.slow @pytest.mark.parametrize("prior_str", ("gaussian", "uniform")) @pytest.mark.parametrize( - "mcmc_method", ("slice_np", "slice_np_vectorized", "slice", "nuts", "hmc") + "sampling_method", + ("slice_np", "slice_np_vectorized", "slice", "nuts", "hmc", "rejection"), ) -def test_api_snl_sampling_methods(mcmc_method: str, prior_str: str, set_seed): +def test_api_snl_sampling_methods(sampling_method: str, prior_str: str, set_seed): """Runs SNL on linear Gaussian and tests sampling from posterior via mcmc. Args: @@ -257,10 +257,14 @@ def test_api_snl_sampling_methods(mcmc_method: str, prior_str: str, set_seed): num_samples = 10 num_trials = 2 # HMC with uniform prior needs good likelihood. - num_simulations = 10000 if mcmc_method == "hmc" else 1000 + num_simulations = 10000 if sampling_method == "hmc" else 1000 x_o = zeros((num_trials, num_dim)) # Test for multiple chains is cheap when vectorized. - num_chains = 3 if mcmc_method == "slice_np_vectorized" else 1 + num_chains = 3 if sampling_method == "slice_np_vectorized" else 1 + if sampling_method == "rejection": + sample_with = "rejection" + else: + sample_with = "mcmc" if prior_str == "gaussian": prior = MultivariateNormal(loc=zeros(num_dim), covariance_matrix=eye(num_dim)) @@ -274,7 +278,9 @@ def test_api_snl_sampling_methods(mcmc_method: str, prior_str: str, set_seed): simulator, prior, num_simulations, simulation_batch_size=50 ) _ = inference.append_simulations(theta, x).train(max_num_epochs=5) - posterior = inference.build_posterior(mcmc_method=mcmc_method).set_default_x(x_o) + posterior = inference.build_posterior( + sample_with=sample_with, mcmc_method=sampling_method + ).set_default_x(x_o) posterior.sample( sample_shape=(num_samples,), diff --git a/tests/linearGaussian_snpe_test.py b/tests/linearGaussian_snpe_test.py index 2af04c5d7..fc8c9e0a6 100644 --- a/tests/linearGaussian_snpe_test.py +++ b/tests/linearGaussian_snpe_test.py @@ -290,17 +290,17 @@ def test_c2st_multi_round_snpe_on_linearGaussian(method_str: str, set_seed): @pytest.mark.slow @pytest.mark.parametrize("snpe_method", [SNPE_A, SNPE_C]) @pytest.mark.parametrize( - "sample_with_mcmc, mcmc_method, prior_str", + "sample_with, mcmc_method, prior_str", ( - (True, "slice_np", "gaussian"), - (True, "slice", "gaussian"), + ("mcmc", "slice_np", "gaussian"), + ("mcmc", "slice", "gaussian"), # XXX (True, "slice", "uniform"), # XXX takes very long. fix when refactoring pyro sampling - (False, "rejection", "uniform"), + ("rejection", "rejection", "uniform"), ), ) def test_api_snpe_c_posterior_correction( - snpe_method: type, sample_with_mcmc, mcmc_method, prior_str, set_seed + snpe_method: type, sample_with, mcmc_method, prior_str, set_seed ): """Test that leakage correction applied to sampling works, with both MCMC and rejection. @@ -329,7 +329,7 @@ def test_api_snpe_c_posterior_correction( inference = snpe_method( prior, simulation_batch_size=50, - sample_with_mcmc=sample_with_mcmc, + sample_with=sample_with, mcmc_method=mcmc_method, show_progress_bars=False, ) @@ -338,9 +338,7 @@ def test_api_snpe_c_posterior_correction( _ = inference.append_simulations(theta, x).train(max_num_epochs=5) posterior = inference.build_posterior() - posterior = posterior.set_sample_with_mcmc(sample_with_mcmc).set_mcmc_method( - mcmc_method - ) + posterior = posterior.set_sample_with(sample_with).set_mcmc_method(mcmc_method) # Posterior should be corrected for leakage even if num_rounds just 1. samples = posterior.sample((10,), x=x_o) diff --git a/tests/linearGaussian_snre_test.py b/tests/linearGaussian_snre_test.py index b3490ac98..2494dad6c 100644 --- a/tests/linearGaussian_snre_test.py +++ b/tests/linearGaussian_snre_test.py @@ -228,16 +228,12 @@ def simulator(theta): @pytest.mark.slow +@pytest.mark.parametrize("prior_str", ("gaussian", "uniform")) @pytest.mark.parametrize( - "mcmc_method, prior_str", - ( - ("slice_np", "gaussian"), - ("slice_np", "uniform"), - ("slice", "gaussian"), - ("slice", "uniform"), - ), + "sampling_method", + ("slice_np", "slice_np_vectorized", "slice", "nuts", "hmc", "rejection"), ) -def test_api_sre_sampling_methods(mcmc_method: str, prior_str: str, set_seed): +def test_api_sre_sampling_methods(sampling_method: str, prior_str: str, set_seed): """Test leakage correction both for MCMC and rejection sampling. Args: @@ -245,23 +241,37 @@ def test_api_sre_sampling_methods(mcmc_method: str, prior_str: str, set_seed): prior_str: one of "gaussian" or "uniform" set_seed: fixture for manual seeding """ - num_dim = 2 - x_o = zeros(num_dim) + num_samples = 10 + num_trials = 2 + # HMC with uniform prior needs good likelihood. + num_simulations = 10000 if sampling_method == "hmc" else 1000 + x_o = zeros((num_trials, num_dim)) + # Test for multiple chains is cheap when vectorized. + num_chains = 3 if sampling_method == "slice_np_vectorized" else 1 + if sampling_method == "rejection": + sample_with = "rejection" + else: + sample_with = "mcmc" + if prior_str == "gaussian": prior = MultivariateNormal(loc=zeros(num_dim), covariance_matrix=eye(num_dim)) else: - prior = utils.BoxUniform(low=-1.0 * ones(num_dim), high=ones(num_dim)) + prior = utils.BoxUniform(-1.0 * ones(num_dim), ones(num_dim)) simulator, prior = prepare_for_sbi(diagonal_linear_gaussian, prior) - inference = SNRE_B( - prior, - classifier="resnet", - show_progress_bars=False, - ) + inference = SNRE_B(prior, classifier="resnet", show_progress_bars=False) - theta, x = simulate_for_sbi(simulator, prior, 200, simulation_batch_size=50) + theta, x = simulate_for_sbi( + simulator, prior, num_simulations, simulation_batch_size=50 + ) _ = inference.append_simulations(theta, x).train(max_num_epochs=5) - posterior = inference.build_posterior(mcmc_method=mcmc_method) + posterior = inference.build_posterior( + sample_with=sample_with, mcmc_method=sampling_method + ).set_default_x(x_o) - posterior.sample(sample_shape=(10,), x=x_o) + posterior.sample( + sample_shape=(num_samples,), + x=x_o, + mcmc_parameters={"thin": 3, "num_chains": num_chains}, + )