Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Stan 2.32] Laplace method and other changes #669

Merged
merged 14 commits into from
Jul 26, 2023
51 changes: 43 additions & 8 deletions cmdstanpy/cmdstan_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class Method(Enum):
OPTIMIZE = auto()
GENERATE_QUANTITIES = auto()
VARIATIONAL = auto()
LAPLACE = auto()

def __repr__(self) -> str:
return '<%s.%s>' % (self.__class__.__name__, self.name)
Expand Down Expand Up @@ -398,8 +399,8 @@ def __init__(
tol_rel_grad: Optional[float] = None,
tol_param: Optional[float] = None,
history_size: Optional[int] = None,
jacobian: bool = False,
) -> None:

self.algorithm = algorithm or ""
self.init_alpha = init_alpha
self.iter = iter
Expand All @@ -410,11 +411,10 @@ def __init__(
self.tol_rel_grad = tol_rel_grad
self.tol_param = tol_param
self.history_size = history_size
self.jacobian = jacobian
self.thin = None

def validate(
self, chains: Optional[int] = None # pylint: disable=unused-argument
) -> None:
def validate(self, _chains: Optional[int] = None) -> None:
"""
Check arguments correctness and consistency.
"""
Expand Down Expand Up @@ -511,8 +511,7 @@ def validate(
else:
raise ValueError('history_size must be type of int')

# pylint: disable=unused-argument
def compose(self, idx: int, cmd: List[str]) -> List[str]:
def compose(self, _idx: int, cmd: List[str]) -> List[str]:
"""compose command string for CmdStan for non-default arg values."""
cmd.append('method=optimize')
if self.algorithm:
Expand All @@ -535,7 +534,37 @@ def compose(self, idx: int, cmd: List[str]) -> List[str]:
cmd.append('iter={}'.format(self.iter))
if self.save_iterations:
cmd.append('save_iterations=1')
if self.jacobian:
cmd.append("jacobian=1")
return cmd


class LaplaceArgs:
"""Arguments needed for laplace method."""

def __init__(
self, mode: str, draws: Optional[int] = None, jacobian: bool = True
) -> None:
self.mode = mode
self.jacobian = jacobian
self.draws = draws

def validate(self, _chains: Optional[int] = None) -> None:
"""Check arguments correctness and consistency."""
if not os.path.exists(self.mode):
raise ValueError(f'Invalid path for mode file: {self.mode}')
if self.draws is not None:
if not isinstance(self.draws, (int, np.integer)) or self.draws <= 0:
raise ValueError('draws must be a positive integer')

def compose(self, _idx: int, cmd: List[str]) -> List[str]:
"""compose command string for CmdStan for non-default arg values."""
cmd.append('method=laplace')
cmd.append(f'mode={self.mode}')
if self.draws:
cmd.append(f'draws={self.draws}')
if not self.jacobian:
cmd.append("jacobian=0")
return cmd


Expand Down Expand Up @@ -721,7 +750,11 @@ def __init__(
model_exe: OptionalPath,
chain_ids: Optional[List[int]],
method_args: Union[
SamplerArgs, OptimizeArgs, GenerateQuantitiesArgs, VariationalArgs
SamplerArgs,
OptimizeArgs,
GenerateQuantitiesArgs,
VariationalArgs,
LaplaceArgs,
],
data: Union[Mapping[str, Any], str, None] = None,
seed: Union[int, List[int], None] = None,
Expand Down Expand Up @@ -753,6 +786,8 @@ def __init__(
self.method = Method.GENERATE_QUANTITIES
elif isinstance(method_args, VariationalArgs):
self.method = Method.VARIATIONAL
elif isinstance(method_args, LaplaceArgs):
self.method = Method.LAPLACE
self.method_args.validate(len(chain_ids) if chain_ids else None)
self.validate()

Expand Down Expand Up @@ -913,7 +948,7 @@ def compose_command(
*,
diagnostic_file: Optional[str] = None,
profile_file: Optional[str] = None,
num_chains: Optional[int] = None
num_chains: Optional[int] = None,
) -> List[str]:
"""
Compose CmdStan command for non-default arguments.
Expand Down
123 changes: 122 additions & 1 deletion cmdstanpy/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@
from cmdstanpy.cmdstan_args import (
CmdStanArgs,
GenerateQuantitiesArgs,
LaplaceArgs,
Method,
OptimizeArgs,
SamplerArgs,
VariationalArgs,
)
from cmdstanpy.compiler_opts import CompilerOptions
from cmdstanpy.stanfit import (
CmdStanGQ,
CmdStanLaplace,
CmdStanMCMC,
CmdStanMLE,
CmdStanVB,
Expand Down Expand Up @@ -393,7 +396,7 @@ def format(
+ '.bak-'
+ datetime.now().strftime("%Y%m%d%H%M%S"),
)
with (open(self.stan_file, 'w')) as file_handle:
with open(self.stan_file, 'w') as file_handle:
file_handle.write(result)
else:
print(result)
Expand Down Expand Up @@ -589,6 +592,8 @@ def optimize(
refresh: Optional[int] = None,
time_fmt: str = "%Y%m%d%H%M%S",
timeout: Optional[float] = None,
jacobian: bool = False,
# would be nice to move this further up, but that's a breaking change
) -> CmdStanMLE:
"""
Run the specified CmdStan optimize algorithm to produce a
Expand Down Expand Up @@ -690,6 +695,11 @@ def optimize(

:param timeout: Duration at which optimization times out in seconds.

:param jacobian: Whether or not to use the Jacobian adjustment for
constrained variables in optimization. By default this is false,
meaning optimization yields the Maximum Likehood Estimate (MLE).
Setting it to true yields the Maximum A Posteriori Estimate (MAP).

:return: CmdStanMLE object
"""
optimize_args = OptimizeArgs(
Expand All @@ -703,8 +713,15 @@ def optimize(
history_size=history_size,
iter=iter,
save_iterations=save_iterations,
jacobian=jacobian,
)

if jacobian and cmdstan_version_before(2, 32, self.exe_info()):
raise ValueError(
"Jacobian adjustment for optimization is only supported "
"in CmdStan 2.32 and above."
)

with MaybeDictToFilePath(data, inits) as (_data, _inits):
args = CmdStanArgs(
self._name,
Expand Down Expand Up @@ -1606,6 +1623,8 @@ def log_prob(
self,
params: Union[Dict[str, Any], str, os.PathLike],
data: Union[Mapping[str, Any], str, os.PathLike, None] = None,
*,
jacobian: bool = True,
) -> pd.DataFrame:
"""
Calculate the log probability and gradient at the given parameter
Expand All @@ -1626,6 +1645,9 @@ def log_prob(
either as a dictionary with entries matching the data variables,
or as the path of a data file in JSON or Rdump format.

:param jacobian: Whether or not to enable the Jacobian adjustment
for constrained parameters. Defaults to ``True``.

:return: A pandas.DataFrame containing columns "lp__" and additional
columns for the gradient values. These gradients will be for the
unconstrained parameters of the model.
Expand All @@ -1641,6 +1663,7 @@ def log_prob(
str(self.exe_file),
"log_prob",
f"constrained_params={_params}",
f"jacobian={int(jacobian)}",
]
if _data is not None:
cmd += ["data", f"file={_data}"]
Expand Down Expand Up @@ -1669,6 +1692,104 @@ def log_prob(
result = pd.read_csv(output, comment="#")
return result

def laplace_sample(
self,
data: Union[Mapping[str, Any], str, os.PathLike, None] = None,
mode: Union[CmdStanMLE, str, os.PathLike, None] = None,
draws: Optional[int] = None,
*,
jacobian: bool = True, # NB: Different than optimize!
seed: Optional[int] = None,
output_dir: OptionalPath = None,
sig_figs: Optional[int] = None,
save_profile: bool = False,
show_console: bool = False,
refresh: Optional[int] = None,
time_fmt: str = "%Y%m%d%H%M%S",
timeout: Optional[float] = None,
opt_args: Optional[Dict[str, Any]] = None,
) -> CmdStanLaplace:
if cmdstan_version_before(2, 32, self.exe_info()):
raise ValueError(
"Method 'laplace_sample' not available for CmdStan versions "
"before 2.32"
)
if opt_args is not None and mode is not None:
raise ValueError(
"Cannot specify both 'opt_args' and 'mode' arguments"
)
if mode is None:
optimize_args = {
"seed": seed,
"sig_figs": sig_figs,
"jacobian": jacobian,
"save_profile": save_profile,
"show_console": show_console,
"refresh": refresh,
"time_fmt": time_fmt,
"timeout": timeout,
"output_dir": output_dir,
}
optimize_args.update(opt_args or {})
optimize_args['time_fmt'] = 'opt-' + time_fmt
try:
cmdstan_mode: CmdStanMLE = self.optimize(
data=data,
**optimize_args, # type: ignore
)
except Exception as e:
raise RuntimeError(
"Failed to run optimizer on model. "
"Consider supplying a mode or additional optimizer args"
) from e
elif not isinstance(mode, CmdStanMLE):
cmdstan_mode = from_csv(mode) # type: ignore # we check below
else:
cmdstan_mode = mode

if cmdstan_mode.runset.method != Method.OPTIMIZE:
raise ValueError(
"Mode must be a CmdStanMLE or a path to an optimize CSV"
)

mode_jacobian = (
cmdstan_mode.runset._args.method_args.jacobian # type: ignore
)
if mode_jacobian != jacobian:
raise ValueError(
"Jacobian argument to optimize and laplace must match!\n"
f"Laplace was run with jacobian={jacobian},\n"
f"but optimize was run with jacobian={mode_jacobian}"
)

laplace_args = LaplaceArgs(
cmdstan_mode.runset.csv_files[0], draws, jacobian
)

with MaybeDictToFilePath(data) as (_data,):
args = CmdStanArgs(
self._name,
self._exe_file,
chain_ids=None,
data=_data,
seed=seed,
output_dir=output_dir,
sig_figs=sig_figs,
save_profile=save_profile,
method_args=laplace_args,
refresh=refresh,
)
dummy_chain_id = 0
runset = RunSet(args=args, chains=1, time_fmt=time_fmt)
self._run_cmdstan(
runset,
dummy_chain_id,
show_console=show_console,
timeout=timeout,
)
runset.raise_for_timeouts()
return CmdStanLaplace(runset, cmdstan_mode)

def _run_cmdstan(
self,
runset: RunSet,
Expand Down
5 changes: 4 additions & 1 deletion cmdstanpy/stanfit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from cmdstanpy.utils import check_sampler_csv, get_logger, scan_config

from .gq import CmdStanGQ
from .laplace import CmdStanLaplace
from .mcmc import CmdStanMCMC
from .metadata import InferenceMetadata
from .mle import CmdStanMLE
Expand All @@ -26,6 +27,7 @@
"CmdStanMLE",
"CmdStanVB",
"CmdStanGQ",
"CmdStanLaplace",
]


Expand Down Expand Up @@ -143,7 +145,7 @@ def from_csv(
save_warmup=config_dict['save_warmup'],
fixed_param=True,
)
except (ValueError) as e:
except ValueError as e:
raise ValueError(
'Invalid or corrupt Stan CSV output file, '
) from e
Expand All @@ -170,6 +172,7 @@ def from_csv(
optimize_args = OptimizeArgs(
algorithm=config_dict['algorithm'],
save_iterations=config_dict['save_iterations'],
jacobian=config_dict.get('jacobian', 0),
)
cmdstan_args = CmdStanArgs(
model_name=config_dict['model'],
Expand Down
22 changes: 9 additions & 13 deletions cmdstanpy/stanfit/gq.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,12 @@

from cmdstanpy.cmdstan_args import Method
from cmdstanpy.utils import (
BaseType,
build_xarray_data,
flatten_chains,
get_logger,
scan_generated_quantities_csv,
)
from cmdstanpy.utils.data_munging import extract_reshape

from .mcmc import CmdStanMCMC
from .metadata import InferenceMetadata
Expand Down Expand Up @@ -586,21 +586,17 @@ def stan_variable(

# is gq variable
self._assemble_generated_quantities()

draw1, num_draws = self._draws_start(inc_warmup)
dims = [num_draws * self.chains]
dims = (num_draws * self.chains,)
col_idxs = self._metadata.stan_vars_cols[var]
if len(col_idxs) > 0:
dims.extend(self._metadata.stan_vars_dims[var])
draws = self._draws[draw1:, :, col_idxs]

if self._metadata.stan_vars_types[var] == BaseType.COMPLEX:
draws = draws[..., ::2] + 1j * draws[..., 1::2]
dims = dims[:-1]

draws = draws.reshape(dims, order='F')

return draws
return extract_reshape(
dims=dims + self._metadata.stan_vars_dims[var],
col_idxs=col_idxs,
var_type=self._metadata.stan_vars_types[var],
start_row=draw1,
draws_in=self._draws,
)

def stan_variables(self, inc_warmup: bool = False) -> Dict[str, np.ndarray]:
"""
Expand Down
Loading