From 51c3d1bacffc4d5de3b0a73fa6e09df69226dfcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Louf?= Date: Wed, 12 Oct 2022 15:11:42 +0200 Subject: [PATCH] Let NUTS initialize its parameters --- aemcmc/basic.py | 11 +++-------- aemcmc/nuts.py | 24 +++++++++++++++--------- tests/test_basic.py | 10 +++++++++- tests/test_nuts.py | 20 +++++++++++++------- 4 files changed, 40 insertions(+), 25 deletions(-) diff --git a/aemcmc/basic.py b/aemcmc/basic.py index 4db7958..d0aaaae 100644 --- a/aemcmc/basic.py +++ b/aemcmc/basic.py @@ -1,6 +1,5 @@ from typing import Dict, Tuple -import aesara.tensor as at from aesara.graph.basic import Variable from aesara.graph.fg import FunctionGraph from aesara.tensor.random.utils import RandomStream @@ -113,20 +112,16 @@ def construct_sampler( # apply the transforms on the probabilistic graph, in which case we would # only need to return the transformed graph. if rvs_without_samplers: - inverse_mass_matrix = at.vector("inverse_mass_matrix") - step_size = at.scalar("step_size") - parameters["step_size"] = step_size - parameters["inverse_mass_matrix"] = inverse_mass_matrix - # We condition one the updated values of the other rvs rvs_to_values = {rv: rvs_to_init_vals[rv] for rv in rvs_without_samplers} rvs_to_values.update(posterior_sample_steps) - nuts_sample_steps, updates = construct_nuts_sampler( - srng, rvs_without_samplers, rvs_to_values, inverse_mass_matrix, step_size + nuts_sample_steps, updates, nuts_parameters = construct_nuts_sampler( + srng, rvs_without_samplers, rvs_to_values ) posterior_sample_steps.update(nuts_sample_steps) posterior_updates.update(updates) + parameters.update(nuts_parameters) return ( { diff --git a/aemcmc/nuts.py b/aemcmc/nuts.py index 4ea3a3f..546b1d8 100644 --- a/aemcmc/nuts.py +++ b/aemcmc/nuts.py @@ -1,6 +1,7 @@ from typing import Callable, Dict, Tuple import aesara +import aesara.tensor as at from aehmc import nuts as aehmc_nuts from aehmc.utils import RaveledParamsMap from aeppl import joint_logprob @@ -9,6 +10,7 @@ TransformValuesRewrite, _default_transformed_rv, ) +from aesara import config from aesara.tensor.random import RandomStream from aesara.tensor.random.op import RandomVariable from aesara.tensor.var import TensorVariable @@ -31,9 +33,7 @@ def construct_nuts_sampler( srng: RandomStream, to_sample_rvs, # RVs to sample rvs_to_values, # All RVs to values - inverse_mass_matrix: TensorVariable, - step_size: TensorVariable, -) -> Tuple[Dict[RandomVariable, TensorVariable], Dict]: +) -> Tuple[Dict[RandomVariable, TensorVariable], Dict, Dict[str, TensorVariable]]: """Build a NUTS kernel and the initial state. This function currently assumes that we will update the value of all of the @@ -47,11 +47,6 @@ def construct_nuts_sampler( rvs_to_values A dictionary that maps all random variables in the model (including those not sampled with NUTS) to their value variable. - step_size - The step size used in the symplectic integrator. - inverse_mass_matrix - One or two-dimensional array used as the inverse mass matrix that - defines the euclidean metric. Returns ------- @@ -104,6 +99,13 @@ def logprob_fn(q): initial_q = rp_map.ravel_params(tuple(transformed_vvs.values())) initial_state = aehmc_nuts.new_state(initial_q, logprob_fn) + # Initialize the parameter values + step_size = at.scalar("step_size", dtype=config.floatX) + if initial_q.ndim == 0: + inverse_mass_matrix = at.scalar("inverse_mass_matrix", dtype=config.floatX) + else: + inverse_mass_matrix = at.vector("inverse_mass_matrix") + # TODO: Does that lead to wasteful computation? Or is it handled by Aesara? (new_q, *_), updates = nuts_kernel(*initial_state, step_size, inverse_mass_matrix) transformed_params = rp_map.unravel_params(new_q) @@ -113,7 +115,11 @@ def logprob_fn(q): if rv in to_sample_rvs } - return params, updates + return ( + params, + updates, + {"step_size": step_size, "inverse_mass_matrix": inverse_mass_matrix}, + ) def get_transform(rv: TensorVariable): diff --git a/tests/test_basic.py b/tests/test_basic.py index 3ba9e89..40e20f5 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -74,7 +74,15 @@ def test_nuts_sampler_single_variable(size): tau_post_step = sample_steps[tau_rv] assert y_vv in graph_inputs([tau_post_step]) - # TODO: Test building the sample step function + + inputs = [ + initial_values[tau_rv], + y_vv, + parameters["step_size"], + parameters["inverse_mass_matrix"], + ] + output = tau_post_step + _ = aesara.function(inputs, output) def test_nuts_with_closed_form(): diff --git a/tests/test_nuts.py b/tests/test_nuts.py index 175187f..d10e313 100644 --- a/tests/test_nuts.py +++ b/tests/test_nuts.py @@ -1,5 +1,4 @@ import aesara -import aesara.tensor as at from aesara.tensor.random import RandomStream from aemcmc.nuts import construct_nuts_sampler @@ -20,16 +19,23 @@ def test_nuts(): to_sample_rvs = [mu_rv, sigma_rv] rvs_to_values = {mu_rv: mu_vv, sigma_rv: sigma_vv, Y_rv: y_vv} - inverse_mass_matrix = at.as_tensor([1.0, 1.0]) - step_size = at.as_tensor(0.1) - state_at, step_fn = construct_nuts_sampler( - srng, to_sample_rvs, rvs_to_values, inverse_mass_matrix, step_size + state_at, step_fn, parameters = construct_nuts_sampler( + srng, to_sample_rvs, rvs_to_values ) # Make sure that the state is properly initialized sample_steps = [state_at[rv] for rv in to_sample_rvs] - state_fn = aesara.function((mu_vv, sigma_vv, y_vv), sample_steps) - new_state = state_fn(1.0, 1.0, 1.0) + state_fn = aesara.function( + ( + mu_vv, + sigma_vv, + y_vv, + parameters["step_size"], + parameters["inverse_mass_matrix"], + ), + sample_steps, + ) + new_state = state_fn(1.0, 1.0, 1.0, 0.01, [1.0, 1.0]) # Make sure that the state has advanced assert new_state[0] != 1.0