-
Notifications
You must be signed in to change notification settings - Fork 241
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #579 from ryankelly-uiuc/lastminutetweaks
Last minute tweaks
- Loading branch information
Showing
8 changed files
with
208 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
##' Clean up a failed PDA run | ||
##' | ||
##' @title Clean up a failed PDA run | ||
##' @param all params are the identically named variables in pda.mcmc / pda.emulator | ||
##' | ||
##' @return An updated settings list | ||
##' | ||
##' @author Ryan Kelly | ||
##' @export | ||
|
||
# This is just a quick kludgey version, that relies on temporary files to recover a failed pda.mcmc() call. It writes all outputs based on whatever runs were done, and returns the same updated settings list that would have been returned if the run completed. So, recover like this: | ||
# | ||
# read.settings(path/to/original/settings/file) | ||
# settings$assim.batch <- pda.mcmc.recover(settings) # wrap up unfinished run | ||
# settings$assim.batch <- pda.mcmc(settings) # start new pda | ||
pda.mcmc.recover <- function(settings, params.id=NULL, param.names=NULL, prior.id=NULL, chain=NULL, | ||
iter=NULL, adapt=NULL, adj.min=NULL, ar.target=NULL, jvar=NULL, n.knot=NULL) { | ||
|
||
if(FALSE){ | ||
params.id <- param.names <- prior.id <- chain <- iter <- NULL | ||
n.knot <- adapt <- adj.min <- ar.target <- jvar <- NULL | ||
} | ||
|
||
require(coda) | ||
|
||
## Handle settings | ||
settings <- pda.settings( | ||
settings=settings, params.id=params.id, param.names=param.names, | ||
prior.id=prior.id, chain=chain, iter=iter, adapt=adapt, | ||
adj.min=adj.min, ar.target=ar.target, jvar=jvar, n.knot=n.knot) | ||
|
||
## Open database connection | ||
if(settings$database$bety$write){ | ||
con <- try(db.open(settings$database$bety), silent=TRUE) | ||
if(is.character(con)){ | ||
con <- NULL | ||
} | ||
} else { | ||
con <- NULL | ||
} | ||
|
||
## Load priors | ||
prior <- pda.load.priors(settings, con)$prior | ||
pname <- rownames(prior) | ||
n.param.all <- nrow(prior) | ||
|
||
# Get start and finish | ||
params.dummy <- pda.init.params(settings, con, pname, n.param.all) | ||
start <- params.dummy$start | ||
finish <- params.dummy$finish | ||
|
||
## Select parameters to constrain | ||
prior.ind <- which(rownames(prior) %in% settings$assim.batch$param.names) | ||
n.param <- length(prior.ind) | ||
|
||
## Get the workflow id | ||
if ("workflow" %in% names(settings)) { | ||
workflow.id <- settings$workflow$id | ||
} else { | ||
workflow.id <- -1 | ||
} | ||
|
||
## Get ensemble id from diagnostic plot dir | ||
ens.ids <- as.numeric(sub("diag.pda", "", dir(settings$outdir, "diag.pda"))) | ||
settings$assim.batch$ensemble.id <- as.character(max(ens.ids)) | ||
|
||
|
||
## Load up temp file to recreate params | ||
params = as.matrix(read.table(file.path(settings$outdir, "pda.mcmc.txt"))) | ||
colnames(params) <- pname | ||
|
||
## Update iters | ||
settings$assim.batch$iter <- finish - nrow(params) | ||
|
||
## Save outputs to plots, files, and db | ||
settings <- pda.postprocess(settings, con, params, pname, prior, prior.ind) | ||
|
||
## close database connection | ||
if(!is.null(con)) db.close(con) | ||
|
||
## Output an updated settings list | ||
return(settings$assim.batch) | ||
|
||
} ## end pda.mcmc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
--- | ||
title: "PEcAn.assim.batch Vignette" | ||
author: "Ryan Kelly" | ||
date: "July 12, 2015" | ||
output: html_document | ||
--- | ||
|
||
## install package from Github | ||
|
||
Only needs to be done the first time | ||
```{r} | ||
library(devtools) | ||
install_github("PecanProject/pecan",subdir="all") | ||
``` | ||
|
||
|
||
## Add <assim.batch> tags to pecan.xml | ||
|
||
The easiest way to run PEcAn's parameter data assimilation is to add an `<assim.batch>` block to pecan.xml, load the file with `read.settings`, and pass the resulting settings object to `pda.mcmc()`. Here is an example `<assim.batch>` block: | ||
|
||
``` | ||
<assim.batch> | ||
<iter>100</iter> | ||
<prior> | ||
<path>/path/to/(prior/post).distns.Rdata</path> | ||
</prior> | ||
<param.names> | ||
<param>Amax</param> | ||
</param.names> | ||
<inputs> | ||
<file> | ||
<id>1000000358</id> | ||
<format>Ameriflux.L4</format> | ||
<data.model> | ||
<likelihood>Laplace</likelihood> | ||
<variable.id>297</variable.id> | ||
</data.model> | ||
</file> | ||
</inputs> | ||
<jump> | ||
<ar.target>0.5</ar.target> | ||
<adapt>25</adapt> | ||
<jvar> | ||
<jvar>0.1</jvar> | ||
</jvar> | ||
<adj.min>0.1</adj.min> | ||
</jump> | ||
<diag.plot.iter>25</diag.plot.iter> | ||
</assim.batch> | ||
``` | ||
|
||
|
||
Here are details about the settings: | ||
|
||
|
||
* `<iter>` Specifies the number of MCMC iterations to run. If continuing a previous MCMC, this is the number of additional iterations, which will be added to the previous total. Defaults to 100 if missing. Ignored by pda.emulator(). | ||
* `<prior>` Identifies the prior to be used for PDA. Can be one of either: | ||
+ `<posterior.id>` A posterior ID in BETY specifying the posterior from a previous PEcAn analysis (e.g., meta-analysis or previous PDA) to be used as the prior for PDA. Defaults to the most recent relevant posterior in the database if omitted (and no `<path>` specified instead; see below). | ||
+ `<path>` As an alternative to using a posterior ID, can specify a file path to either a `prior.distns.Rdata` or `post.distns.Rdata` file generated from an earlier analysis. Conceptually, using a posterior distribution as the prior for PDA is preferred, as this allows the multiple analyses to work together to iteratively constrain parameters. In practice, previous analyses may have over-constrained parameters to ranges that do not actually optimize model outputs, so using a less informative prior for PDA might yield better results. | ||
* `<param.names>` The names of parameters to be constrained by assimilation, listed in individual `<param>` tags. These must be the standard names given by the trait dictionary, i.e.: | ||
|
||
``` | ||
data(trait.dictionary, package = "PEcAn.utils") | ||
trait.dictionary[,c("id", "figid")] | ||
``` | ||
|
||
* `<inputs>` Observation data to be compared to the model. In principle, can be one or more datasets, specified in a variety of ways. In practice, the code is only tested for a single dataset currently, and assumes the input is Ameriflux NEE. | ||
+ `<file>` Denotes a set of tags for a single input. Would be repeated for multiple datasets once that functionality is supported. | ||
+ `<id>` BETY input ID for looking up the input. Will be used preferentially over `<path>` or `<source>` (below). | ||
+ `<path>` File path to the input. Only used if no `<id>` is given. | ||
+ `<source>` A standardized source of input data (e.g., Ameriflux). Not implemented yet, but the idea would be similar to the met workflow, PEcAn would be able to use standard data sources automatically where available. Only used if no `<id>` or `<path>` is given. | ||
+ `<format>` An identifier to tell PEcAn how to handle the input. Currently allows either `Ameriflux.L2` or `Ameriflux.L4`. | ||
+ `<data.model>` Block for specifying the data model to be used with this dataset. Currently ignored since all data are assumed to be Ameriflux NEE and treated identically. | ||
+ `<likelihood>` Identifier for the likelihood to use. E.g., the Ameriflux data use a Laplacian likelihood. | ||
+ `<variable.id>` The BETY variable ID associated with this dataset. The idea is that specific preprocessing steps (e.g., estimating heteroskedastic error for tower NEE) would be associated with particular IDs. Could automate further by assigning default `<likelihood>` to variable.id values (allowing `<likelihood>` to be omitted from pecan.xml). And/or could add a separate tag `<preprocess>` that could specify a function to override any default associated with the variable ID. | ||
* `<jump>` | ||
+ `<ar.target>` Target acceptance rate for the adaptive jump algorithm. Defaults to 0.5 if missing. | ||
+ `<adapt>` Number of iterations between jump variance adaptations. Defaults to `floor(iter/10)` if missing. | ||
+ `<jvar>` Initial jump variances for proposing parameter values, listed in individual `<jvar>` tags (one for each `<param>` specified in `<param.names>`). Will be adjusted adaptively. Defaults to 1/10 the prior variance if missing. | ||
+ `<adj.min>` Minimum factor by which to reduce jump variance when adapting. Prevents jump variances from degenerating to 0. Defaults to 0.1 if missing. | ||
* `<diag.plot.iter>` Interval between saving diagnostic plots. Omit or set to NULL to skip them. | ||
* `<params.id>` (Not shown.) A BETY dbfile ID for an MCMC output from previous PDA. If specified, that file is loaded, the new MCMC starts from the last parameter values of the previous, and when finished the extended chain is saved as a new output. If missing, then MCMC starts fresh from prior median parameter values. Regardless, the MCMC parameter values of the PDA are saved to file and inserted in BETY, and the new dbfile ID is inserted into `<params.id>`. The `pda.mcmc()` funtion returns the `<assim.batch>` settings, which can then be saved. Then, calling a new round of PDA using these returned settings will automatically continue the previous MCMC. | ||
|
||
|
||
|
||
## Run PDA | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
library(PEcAn.all) | ||
|
||
# Insert settings file path here. Use the same one supplied to the original failed workflow | ||
settings.file = "" | ||
|
||
# Read in settings | ||
settings <- read.settings(settings.file) | ||
|
||
# Clean and wrap up failed PDA MCMC | ||
settings$assim.batch <- pda.mcmc.recover(settings) | ||
|
||
# If desired, now proceed to complete the run | ||
status.start("PDA") | ||
settings$assim.batch <- pda.mcmc(settings) | ||
status.end() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters