From 7135fd766e6908f345cafbdeb7a9ae798d69f355 Mon Sep 17 00:00:00 2001 From: Reuben Date: Mon, 12 Aug 2024 09:33:43 -0400 Subject: [PATCH] Simplify `run_inference_algorithm` (#714) * fix minor type errors * storing only expectation values * fixed memory efficient sampling * clean up * renaming vars * precommit fixes * fixing tests * fixing tests * fixing tests * fixing tests * fixing tests * merge main * burn in and fix tests * burn in and fix tests * minor fixes * minor fixes * minor fixes --------- Co-authored-by: jakob.robnik@gmail.com --- blackjax/adaptation/mclmc_adaptation.py | 8 +- blackjax/util.py | 156 +++++++++++++++--------- tests/adaptation/test_adaptation.py | 2 +- tests/mcmc/test_sampling.py | 57 +++++---- tests/test_benchmarks.py | 2 +- tests/test_util.py | 56 +++++---- 6 files changed, 171 insertions(+), 110 deletions(-) diff --git a/blackjax/adaptation/mclmc_adaptation.py b/blackjax/adaptation/mclmc_adaptation.py index 76a016242..7645a890b 100644 --- a/blackjax/adaptation/mclmc_adaptation.py +++ b/blackjax/adaptation/mclmc_adaptation.py @@ -20,7 +20,7 @@ from jax.flatten_util import ravel_pytree from blackjax.diagnostics import effective_sample_size -from blackjax.util import pytree_size, streaming_average_update +from blackjax.util import incremental_value_update, pytree_size class MCLMCAdaptationState(NamedTuple): @@ -199,9 +199,9 @@ def step(iteration_state, weight_and_key): x = ravel_pytree(state.position)[0] # update the running average of x, x^2 - streaming_avg = streaming_average_update( - current_value=jnp.array([x, jnp.square(x)]), - previous_weight_and_average=streaming_avg, + streaming_avg = incremental_value_update( + expectation=jnp.array([x, jnp.square(x)]), + incremental_val=streaming_avg, weight=(1 - mask) * success * params.step_size, zero_prevention=mask, ) diff --git a/blackjax/util.py b/blackjax/util.py index 9f4d6f9c7..b6c5367b5 100644 --- a/blackjax/util.py +++ b/blackjax/util.py @@ -3,12 +3,11 @@ from functools import partial from typing import Callable, Union -import jax import jax.numpy as jnp from jax import jit, lax from jax.flatten_util import ravel_pytree from jax.random import normal, split -from jax.tree_util import tree_leaves +from jax.tree_util import tree_leaves, tree_map from blackjax.base import SamplingAlgorithm, VIAlgorithm from blackjax.progress_bar import gen_scan_fn @@ -149,9 +148,7 @@ def run_inference_algorithm( initial_state: ArrayLikeTree = None, initial_position: ArrayLikeTree = None, progress_bar: bool = False, - transform: Callable = lambda x: x, - return_state_history=True, - expectation: Callable = lambda x: x, + transform: Callable = lambda state, info: (state, info), ) -> tuple: """Wrapper to run an inference algorithm. @@ -166,8 +163,7 @@ def run_inference_algorithm( initial_state The initial state of the inference algorithm. initial_position - The initial position of the inference algorithm. This is used when the initial - state is not provided. + The initial position of the inference algorithm. This is used when the initial state is not provided. inference_algorithm One of blackjax's sampling algorithms or variational inference algorithms. num_steps @@ -175,26 +171,14 @@ def run_inference_algorithm( progress_bar Whether to display a progress bar. transform - A transformation of the trace of states to be returned. This is useful for + A transformation of the trace of states (and info) to be returned. This is useful for computing determinstic variables, or returning a subset of the states. By default, the states are returned as is. - expectation - A function that computes the expectation of the state. This is done - incrementally, so doesn't require storing all the states. - return_state_history - if False, `run_inference_algorithm` will only return an expectation of the value - of transform, and return that average instead of the full set of samples. This - is useful when memory is a bottleneck. Returns ------- - If return_state_history is True: 1. The final state. - 2. The trace of the state. - 3. The trace of the info of the inference algorithm for diagnostics. - If return_state_history is False: - 1. This is the expectation of state over the chain. Otherwise the final state. - 2. The final state of the inference algorithm. + 2. The history of states. """ if initial_state is None and initial_position is None: @@ -212,58 +196,116 @@ def run_inference_algorithm( keys = split(rng_key, num_steps) - def one_step(average_and_state, xs, return_state): + def one_step(state, xs): _, rng_key = xs - average, state = average_and_state state, info = inference_algorithm.step(rng_key, state) - average = streaming_average_update(expectation(transform(state)), average) - if return_state: - return (average, state), (transform(state), info) - else: - return (average, state), None + return state, transform(state, info) - one_step = jax.jit(partial(one_step, return_state=return_state_history)) - - xs = (jnp.arange(num_steps), keys) scan_fn = gen_scan_fn(num_steps, progress_bar) - ((_, average), final_state), history = scan_fn( - one_step, - ((0, expectation(transform(initial_state))), initial_state), - xs, - ) - if not return_state_history: - return average, transform(final_state) - else: - state_history, info_history = history - return transform(final_state), state_history, info_history + xs = jnp.arange(num_steps), keys + final_state, history = scan_fn(one_step, initial_state, xs) + + return final_state, history -def streaming_average_update( - current_value, previous_weight_and_average, weight=1.0, zero_prevention=0.0 +def store_only_expectation_values( + sampling_algorithm, + state_transform=lambda x: x, + incremental_value_transform=lambda x: x, + burn_in=0, +): + """Takes a sampling algorithm and constructs from it a new sampling algorithm object. The new sampling algorithm has the same + kernel but only stores the streaming expectation values of some observables, not the full states; to save memory. + + It saves incremental_value_transform(E[state_transform(x)]) at each step i, where expectation is computed with samples up to i-th sample. + + Example: + + .. code:: + + init_key, state_key, run_key = jax.random.split(jax.random.PRNGKey(0),3) + model = StandardNormal(2) + initial_position = model.sample_init(init_key) + initial_state = blackjax.mcmc.mclmc.init( + position=initial_position, logdensity_fn=model.logdensity_fn, rng_key=state_key + ) + integrator_type = "mclachlan" + L = 1.0 + step_size = 0.1 + num_steps = 4 + + integrator = map_integrator_type_to_integrator['mclmc'][integrator_type] + state_transform = lambda state: state.position + memory_efficient_sampling_alg, transform = store_only_expectation_values( + sampling_algorithm=sampling_alg, + state_transform=state_transform) + + initial_state = memory_efficient_sampling_alg.init(initial_state) + + final_state, trace_at_every_step = run_inference_algorithm( + + rng_key=run_key, + initial_state=initial_state, + inference_algorithm=memory_efficient_sampling_alg, + num_steps=num_steps, + transform=transform, + progress_bar=True, + ) + """ + + def init_fn(state): + averaging_state = (0.0, state_transform(state)) + return (state, averaging_state) + + def update_fn(rng_key, state_and_incremental_val): + state, averaging_state = state_and_incremental_val + state, info = sampling_algorithm.step( + rng_key, state + ) # update the state with the sampling algorithm + averaging_state = incremental_value_update( + state_transform(state), + averaging_state, + weight=( + averaging_state[0] >= burn_in + ), # If we want to eliminate some number of steps as a burn-in + zero_prevention=1e-10 * (burn_in > 0), + ) + # update the expectation value with the running average + return (state, averaging_state), info + + def transform(state_and_incremental_val, info): + (state, (_, incremental_value)) = state_and_incremental_val + return incremental_value_transform(incremental_value), info + + return SamplingAlgorithm(init_fn, update_fn), transform + + +def incremental_value_update( + expectation, incremental_val, weight=1.0, zero_prevention=0.0 ): """Compute the streaming average of a function O(x) using a weight. Parameters: ---------- - current_value - the current value of the function that we want to take average of - previous_weight_and_average - tuple of (previous_weight, previous_average) where previous_weight is the - sum of weights and average is the current estimated average + expectation + the value of the expectation at the current timestep + incremental_val + tuple of (total, average) where total is the sum of weights and average is the current average weight weight of the current state zero_prevention small value to prevent division by zero Returns: ---------- - new total weight and streaming average + new streaming average """ - previous_weight, previous_average = previous_weight_and_average - current_weight = previous_weight + weight - current_average = jax.tree.map( - lambda x, avg: (previous_weight * avg + weight * x) - / (current_weight + zero_prevention), - current_value, - previous_average, + + total, average = incremental_val + average = tree_map( + lambda exp, av: (total * av + weight * exp) + / (total + weight + zero_prevention), + expectation, + average, ) - return current_weight, current_average + total += weight + return total, average diff --git a/tests/adaptation/test_adaptation.py b/tests/adaptation/test_adaptation.py index 68751bee8..4b34511be 100644 --- a/tests/adaptation/test_adaptation.py +++ b/tests/adaptation/test_adaptation.py @@ -90,7 +90,7 @@ def test_chees_adaptation(adaptation_filters): algorithm = blackjax.dynamic_hmc(logprob_fn, **parameters) chain_keys = jax.random.split(inference_key, num_chains) - _, _, infos = jax.vmap( + _, (_, infos) = jax.vmap( lambda key, state: run_inference_algorithm( rng_key=key, initial_state=state, diff --git a/tests/mcmc/test_sampling.py b/tests/mcmc/test_sampling.py index 18a07625b..c399929da 100644 --- a/tests/mcmc/test_sampling.py +++ b/tests/mcmc/test_sampling.py @@ -135,12 +135,12 @@ def run_mclmc( sqrt_diag_cov=blackjax_mclmc_sampler_params.sqrt_diag_cov, ) - _, samples, _ = run_inference_algorithm( + _, samples = run_inference_algorithm( rng_key=run_key, initial_state=blackjax_state_after_tuning, inference_algorithm=sampling_alg, num_steps=num_steps, - transform=lambda x: x.position, + transform=lambda state, info: state.position, ) return samples @@ -197,7 +197,7 @@ def check_attrs(attribute, keyset): for i, attribute in enumerate(["state", "info", "adaptation_state"]): check_attrs(attribute, keysets[i]) - _, states, _ = run_inference_algorithm( + _, (states, _) = run_inference_algorithm( rng_key=inference_key, initial_state=state, inference_algorithm=inference_algorithm, @@ -223,15 +223,16 @@ def test_mala(self): mala = blackjax.mala(logposterior_fn, 1e-5) state = mala.init({"coefs": 1.0, "log_scale": 1.0}) - _, states, _ = run_inference_algorithm( + _, states = run_inference_algorithm( rng_key=inference_key, initial_state=state, inference_algorithm=mala, + transform=lambda state, info: state.position, num_steps=10_000, ) - coefs_samples = states.position["coefs"][3000:] - scale_samples = np.exp(states.position["log_scale"][3000:]) + coefs_samples = states["coefs"][3000:] + scale_samples = np.exp(states["log_scale"][3000:]) np.testing.assert_allclose(np.mean(scale_samples), 1.0, atol=1e-1) np.testing.assert_allclose(np.mean(coefs_samples), 3.0, atol=1e-1) @@ -375,15 +376,16 @@ def test_pathfinder_adaptation( ) inference_algorithm = algorithm(logposterior_fn, **parameters) - _, states, _ = run_inference_algorithm( + _, states = run_inference_algorithm( rng_key=inference_key, initial_state=state, inference_algorithm=inference_algorithm, num_steps=num_sampling_steps, + transform=lambda state, info: state.position, ) - coefs_samples = states.position["coefs"] - scale_samples = np.exp(states.position["log_scale"]) + coefs_samples = states["coefs"] + scale_samples = np.exp(states["log_scale"]) np.testing.assert_allclose(np.mean(scale_samples), 1.0, atol=1e-1) np.testing.assert_allclose(np.mean(coefs_samples), 3.0, atol=1e-1) @@ -418,17 +420,18 @@ def test_meads(self): inference_algorithm = blackjax.ghmc(logposterior_fn, **parameters) chain_keys = jax.random.split(inference_key, num_chains) - _, states, _ = jax.vmap( + _, states = jax.vmap( lambda key, state: run_inference_algorithm( rng_key=key, initial_state=state, inference_algorithm=inference_algorithm, + transform=lambda state, info: state.position, num_steps=100, ) )(chain_keys, last_states) - coefs_samples = states.position["coefs"] - scale_samples = np.exp(states.position["log_scale"]) + coefs_samples = states["coefs"] + scale_samples = np.exp(states["log_scale"]) np.testing.assert_allclose(np.mean(scale_samples), 1.0, atol=1e-1) np.testing.assert_allclose(np.mean(coefs_samples), 3.0, atol=1e-1) @@ -465,17 +468,18 @@ def test_chees(self, jitter_generator): inference_algorithm = blackjax.dynamic_hmc(logposterior_fn, **parameters) chain_keys = jax.random.split(inference_key, num_chains) - _, states, _ = jax.vmap( + _, states = jax.vmap( lambda key, state: run_inference_algorithm( rng_key=key, initial_state=state, inference_algorithm=inference_algorithm, + transform=lambda state, info: state.position, num_steps=100, ) )(chain_keys, last_states) - coefs_samples = states.position["coefs"] - scale_samples = np.exp(states.position["log_scale"]) + coefs_samples = states["coefs"] + scale_samples = np.exp(states["log_scale"]) np.testing.assert_allclose(np.mean(scale_samples), 1.0, atol=1e-1) np.testing.assert_allclose(np.mean(coefs_samples), 3.0, atol=1e-1) @@ -494,15 +498,16 @@ def test_barker(self): barker = blackjax.barker_proposal(logposterior_fn, 1e-1) state = barker.init({"coefs": 1.0, "log_scale": 1.0}) - _, states, _ = run_inference_algorithm( + _, states = run_inference_algorithm( rng_key=inference_key, initial_state=state, inference_algorithm=barker, + transform=lambda state, info: state.position, num_steps=10_000, ) - coefs_samples = states.position["coefs"][3000:] - scale_samples = np.exp(states.position["log_scale"][3000:]) + coefs_samples = states["coefs"][3000:] + scale_samples = np.exp(states["log_scale"][3000:]) np.testing.assert_allclose(np.mean(scale_samples), 1.0, atol=1e-2) np.testing.assert_allclose(np.mean(coefs_samples), 3.0, atol=1e-2) @@ -679,19 +684,20 @@ def test_latent_gaussian(self): initial_state = inference_algorithm.init(jnp.zeros((1,))) - _, states, _ = self.variant( + _, states = self.variant( functools.partial( run_inference_algorithm, inference_algorithm=inference_algorithm, + transform=lambda state, info: state.position, num_steps=self.sampling_steps, ), )(rng_key=self.key, initial_state=initial_state) np.testing.assert_allclose( - np.var(states.position[self.burnin :]), 1 / (1 + 0.5), rtol=1e-2, atol=1e-2 + np.var(states[self.burnin :]), 1 / (1 + 0.5), rtol=1e-2, atol=1e-2 ) np.testing.assert_allclose( - np.mean(states.position[self.burnin :]), 2 / 3, rtol=1e-2, atol=1e-2 + np.mean(states[self.burnin :]), 2 / 3, rtol=1e-2, atol=1e-2 ) @@ -724,7 +730,7 @@ def univariate_normal_test_case( **kwargs, ): inference_key, orbit_key = jax.random.split(rng_key) - _, states, _ = self.variant( + _, (states, info) = self.variant( functools.partial( run_inference_algorithm, inference_algorithm=inference_algorithm, @@ -855,7 +861,7 @@ def postprocess_samples(states, key): 20_000, burnin, postprocess_samples, - transform=lambda x: (x.positions, x.weights), + transform=lambda state, info: ((state.positions, state.weights), info), ) @chex.all_variants(with_pmap=False) @@ -997,14 +1003,15 @@ def test_mcse(self, algorithm, parameters, is_mass_matrix_diagonal): functools.partial( run_inference_algorithm, inference_algorithm=inference_algorithm, + transform=lambda state, info: state.position, num_steps=2_000, ) ) - _, states, _ = inference_loop_multiple_chains( + _, states = inference_loop_multiple_chains( rng_key=multi_chain_sample_key, initial_state=initial_states ) - posterior_samples = states.position[:, -1000:] + posterior_samples = states[:, -1000:] posterior_delta = posterior_samples - true_loc posterior_variance = posterior_delta**2.0 posterior_correlation = jnp.prod(posterior_delta, axis=-1, keepdims=True) / ( diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index c2295e7e2..2d108a48d 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -48,7 +48,7 @@ def run_regression(algorithm, **parameters): ) inference_algorithm = algorithm(logdensity_fn, **parameters) - _, states, _ = run_inference_algorithm( + _, (states, _) = run_inference_algorithm( rng_key=inference_key, initial_state=state, inference_algorithm=inference_algorithm, diff --git a/tests/test_util.py b/tests/test_util.py index 1f03498dd..78198f013 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -4,7 +4,7 @@ from absl.testing import absltest, parameterized import blackjax -from blackjax.util import run_inference_algorithm +from blackjax.util import run_inference_algorithm, store_only_expectation_values class RunInferenceAlgorithmTest(chex.TestCase): @@ -30,7 +30,7 @@ def check_compatible(self, initial_state, progress_bar): inference_algorithm=self.algorithm, num_steps=self.num_steps, progress_bar=progress_bar, - transform=lambda x: x.position, + transform=lambda state, info: state.position, ) def test_streaming(self): @@ -41,37 +41,49 @@ def logdensity_fn(x): 10, ) - init_key, run_key = jax.random.split(self.key, 2) - + init_key, state_key, run_key = jax.random.split(self.key, 3) initial_state = blackjax.mcmc.mclmc.init( - position=initial_position, logdensity_fn=logdensity_fn, rng_key=init_key + position=initial_position, logdensity_fn=logdensity_fn, rng_key=state_key + ) + L = 1.0 + step_size = 0.1 + num_steps = 4 + + sampling_alg = blackjax.mclmc( + logdensity_fn, + L=L, + step_size=step_size, ) - alg = blackjax.mclmc(logdensity_fn=logdensity_fn, L=0.5, step_size=0.1) + state_transform = lambda x: x.position - _, states, info = run_inference_algorithm( + _, samples = run_inference_algorithm( rng_key=run_key, initial_state=initial_state, - inference_algorithm=alg, - num_steps=50, - progress_bar=False, - expectation=lambda x: x, - transform=lambda x: x.position, - return_state_history=True, + inference_algorithm=sampling_alg, + num_steps=num_steps, + transform=lambda state, info: state_transform(state), + progress_bar=True, + ) + + print("average of steps (slow way):", samples.mean(axis=0)) + + memory_efficient_sampling_alg, transform = store_only_expectation_values( + sampling_algorithm=sampling_alg, state_transform=state_transform ) - average, _ = run_inference_algorithm( + initial_state = memory_efficient_sampling_alg.init(initial_state) + + final_state, trace_at_every_step = run_inference_algorithm( rng_key=run_key, initial_state=initial_state, - inference_algorithm=alg, - num_steps=50, - progress_bar=False, - expectation=lambda x: x, - transform=lambda x: x.position, - return_state_history=False, + inference_algorithm=memory_efficient_sampling_alg, + num_steps=num_steps, + transform=transform, + progress_bar=True, ) - assert jnp.allclose(states.mean(axis=0), average) + assert jnp.allclose(trace_at_every_step[0][-1], samples.mean(axis=0)) @parameterized.parameters([True, False]) def test_compatible_with_initial_pos(self, progress_bar): @@ -81,7 +93,7 @@ def test_compatible_with_initial_pos(self, progress_bar): inference_algorithm=self.algorithm, num_steps=self.num_steps, progress_bar=progress_bar, - transform=lambda x: x.position, + transform=lambda state, info: state.position, ) @parameterized.parameters([True, False])