-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathBiomass_validationKNN.Rmd
806 lines (632 loc) · 35.8 KB
/
Biomass_validationKNN.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
---
title: "LandR _Biomass_validationKNN_ Manual"
date: "Last updated: `r Sys.Date()`"
output:
bookdown::html_document2:
toc: true
toc_float: true
toc_depth: 4
theme: sandstone
number_sections: false
df_print: paged
keep_md: yes
editor_options:
chunk_output_type: console
markdown:
wrap: 80
bibliography: citations/references_Biomass_validationKNN.bib
citation-style: citations/ecology-letters.csl
link-citations: true
always_allow_html: true
---
<!-- the following are text references used in captions for LaTeX compatibility -->
(ref:Biomass-validationKNN) *Biomass_validationKNN*
(ref:percent) %
```{r setup-Biomass-validationKNN, include = FALSE}
## set cache.rebuild = TRUE whenever there are changes to the module code/metadata
knitr::opts_chunk$set(echo = TRUE, eval = FALSE, warning = FALSE,
cache = TRUE, cache.rebuild = FALSE, results = "hold", dpi = 300)
## get citation style
if (!file.exists("citations/ecology-letters.csl")) {
dir.create("citations", showWarnings = FALSE)
download.file("https://www.zotero.org/styles/ecology-letters?source=1", destfile = "citations/ecology-letters.csl")
}
library(Require)
Require(c("SpaDES.core", "git2r", "dplyr", "data.table", "kableExtra",
"pander", "PredictiveEcology/SpaDES.docs"),
upgrade = FALSE, install = FALSE)
```
```{r badgeFigs-Biomass-validationKNN, include = FALSE, eval = TRUE, cache = FALSE}
dir.create("figures", showWarnings = FALSE)
if (!file.exists("figures/markdownBadge.png")) {
download.file(url = "https://img.shields.io/badge/Made%20with-Markdown-1f425f.png",
destfile = "figures/markdownBadge.png",
mode = 'wb')
}
if (!file.exists("figures/issuesBadge.png")) {
download.file(url = "https://img.shields.io/badge/Get%20help-Report%20issues-%3CCOLOR%3E.png",
destfile = "figures/issuesBadge.png",
mode = 'wb')
}
modversion <- paste(unlist(moduleMetadata(module = 'Biomass_validationKNN', path = '..')$version), collapse = ".")
download.file(url = paste0("https://img.shields.io/badge/Biomass_validationKNN-", paste0("v.%20", modversion),
"-%3CCOLOR%3E.png"),
destfile = "figures/moduleVersionBadge.png",
mode = 'wb')
```
``` {r moduleBadge-Biomass-validationKNN, echo = FALSE, eval = TRUE, cache = FALSE, results = "asis"}
## try to automatically get the commit URL and the path to the badge image
modulePath <- if (grepl("Biomass_validationKNN$", normPath("."))) {
normPath(".")
} else {
modulePath <- grep("Biomass_validationKNN$",
list.files(pattern = "Biomass_validationKNN", recursive = TRUE, include.dirs = TRUE),
value = TRUE)
modulePath <- grep("docs/", modulePath, value = TRUE, invert = TRUE) ## exclude "copied" modules dirs for bookdown
normPath(modulePath)
}
badgeURL <- if (!is_detached(modulePath)) {
commitSHA <- sha(revparse_single(modulePath, "HEAD"))
repo <- sub("[.]git$", "/commit/",
branch_remote_url(branch_get_upstream(repository_head(modulePath))))
paste0(repo, commitSHA)
} else {
## if detached point to the first remote
remote_url(modulePath)[1]
}
badgeURL <- sub(".*github[.]com:", "https://github.com/", badgeURL)
badgePath <- normPath("figures/moduleVersionBadge.png")
## make string of markdown code to be executed as-is
cat(paste0("[![module-version-Badge](", badgePath, ")](", badgeURL, ")"))
```
``` {r issuesBadge-Biomass-validationKNN, echo = FALSE, eval = TRUE, cache = FALSE, results = "asis"}
badgeURL <- "https://github.com/PredictiveEcology/Biomass_validationKNN/issues"
badgePath <- normPath("figures/issuesBadge.png")
## make string of markdown code to be executed as-is
cat(paste0("[![Issues-badge](", badgePath, ")](", badgeURL,")"))
```
<!-- if knitting to pdf remember to add the pandoc_args: ["--extract-media", "."] option to yml in order to get the badge images -->
#### Authors:
`r paste(as.character(moduleMetadata(module = 'Biomass_validationKNN', path = '..')$authors), sep = ', ')`
<!-- ideally separate authors with new lines, '\n' not working -->
**This documentation is work in progress. Potential discrepancies and omissions
may exist for the time being. If you find any, contact us using the "Get help"
link above.**
## Module Overview
### Quick links
- [General functioning](#bvalid-general-functioning)
- [List of input objects](#bvalid-inputs-list)
- [List of parameters](#bvalid-params-list)
- [List of outputs](#bvalid-outputs-list)
- [Simulation flow and module events](#bvalid-sim-flow)
### Summary
LandR *Biomass_validationKNN* (hereafter *Biomass_validationKNN*) provides an
approach to validate outputs from LandR Biomass (i.e., *Biomass_core* linked
with other modules or not) simulations, using publicly available data for
Canadian forests. It produces both a visual and statistical validation of
*Biomass_core* outputs related to species abundance and presence/absence in the
landscape. To do so, it downloads and prepares all necessary data (observed and
simulated), calculates validation statistics and produces/saves validation
plots.
### Links to other modules {#bvalid-links-modules}
*Biomass_validationKNN* is intended to be used with *Biomass_core* and any other
modules that link to it and affect cohort biomass (e.g., disturbance modules and
calibration modules may both affect resulting biomass). See
[here](https://rpubs.com/PredictiveEcology/LandR_Module_Ecosystem) for all
available modules in the LandR ecosystem and select *Biomass_validationKNN* from
the drop-down menu to see potential linkages. By default, disturbed pixels are
excluded from the validation, but the user can bypass this option. The
following is a list of the modules commonly validated with
*Biomass_validationKNN*.
- [*Biomass_core*](https://github.com/PredictiveEcology/Biomass_core): core
forest dynamics simulation module. Used downstream from
*Biomass_borealDataPrep*;
**Data and calibration modules:**
- [*Biomass_speciesData*](https://github.com/PredictiveEcology/Biomass_speciesData):
grabs and merges several sources of species cover data, making species
percent cover ((ref:percent) cover) layers used by other LandR Biomass
modules. Default source data spans the entire Canadian territory;
- [*Biomass_borealDataPrep*](https://github.com/PredictiveEcology/Biomass_borealDataPrep):
prepares all parameters and inputs (including initial landscape conditions)
that *Biomass_core* needs to run a realistic simulation. Default
values/inputs produced are relevant for boreal forests of Western Canada;
- [*Biomass_speciesParameters*](https://github.com/PredictiveEcology/Biomass_speciesParameters):
calibrates four-species level traits using permanent sample plot data (i.e.,
repeated tree biomass measurements) across Western Canada.
**Disturbance-related modules:**
- [*Biomass_regeneration*](https://github.com/PredictiveEcology/Biomass_regeneration):
simulates cohort biomass responses to stand-replacing fires (as in the LANDIS-II
Biomass Succession Extension v.3.2.1),
including cohort mortality and regeneration through resprouting and/or
serotiny;
- [*Biomass_regenerationPM*](https://github.com/PredictiveEcology/Biomass_regenerationPM):
like *Biomass_regeneration*, but allowing partial mortality. Based on the
LANDIS-II Dynamic Fuels & Fire System extension [@SturtevantEtAl2018];
- *fireSense*: climate- and land-cover-sensitive fire model simulating fire
ignition, escape and spread processes as a function of climate and
land-cover. Includes built-in parameterisation of these processes using
climate, land-cover, fire occurrence and fire perimeter data. Requires using
*Biomass_regeneration* or *Biomass_regenerationPM*. See modules prefixed
"*fireSense\_*" at <https://github.com/PredictiveEcology/>;
- [*LandMine*](https://github.com/PredictiveEcology/LandMine): wildfire
ignition and cover-sensitive wildfire spread model based on a fire return
interval input. Requires using *Biomass_regeneration* or
*Biomass_regenerationPM*;
- [*scfm*](https://github.com/PredictiveEcology/scfm): spatially explicit fire
spread module parameterised and modelled as a stochastic three-part process
of ignition, escape, and spread. Requires using *Biomass_regeneration* or
*Biomass_regenerationPM*.
## Module manual
### General functioning {#bvalid-general-functioning}
*Biomass_validationKNN* compares simulated outputs of two years (across
replicates), with corresponding years of observed data. It was designed to
compare the observed data for years 2001 (start point for the simulation) and
2011 (i.e., after 10 years of simulation) of the kNN forest layers of the
Canadian National Forest Inventory -- these are currently the only available
FAIR datasets [*sensu* @StallEtAl2019] on stand biomass and species
(ref:percent) cover changes across Canada. However, the user can supply other
sources of observed data, as long as they have an identical format.
The validation is done both visually (using barplots and boxplots) and using two
statistics: mean absolute deviation of simulated biomass (per species) and the
sum of negative log-likelihoods (SNLL) of predictions with respect to observed
data for species biomass, species presences/absences and changes in biomass
($\Delta$B) -- the later is still under development.
This module assumes that the simulation data preparation was carried out by
*Biomass_borealDataPrep*, and so, to ensure that the comparison and the
simulated datasets are built with the same assumptions, the data treatment steps
in *Biomass_borealDataPrep* are repeated here.
The module may also exclude disturbed pixels identified in `rstLCCChange` raster
layer and in the fire perimeter data (`firePerimeters` layer). If this is not
intended, the user can provide a `rstLCCChange` with `NA`'s only and/or an empty
`firePerimeters` `sf` object.
*Biomass_validationKNN* then compares simulated species biomass, presences,
dominance, and changes in biomass against observed data available for the
starting conditions (2011 by default) and for second time point (e.g. 2011, or
after 10 years of simulation). To do so, for each year and replicate, and for
both the simulated and observed data, the module calculates:
- species relative abundances at the pixel- and landscape-level (across all
pixels);
- species presences and dominance at the landscape level;
- changes in species biomass ($\Delta$B) at the pixel- and landscape-level for
both the simulated and observed data. Biomass units respect those used in
*Biomass_core* ($g/m^2$).
Pixel-level relative abundances are calculated as the species biomass (summed
across cohorts) divided by the total pixel biomass (summed across cohorts and
species), while landscape-wide relative abundances are calculated as the sum of
a species biomass across all pixels divided by the sum of total biomass across
all pixels.
Species presences are calculated as the number of pixels where a given species
is present, and species dominance is calculated as the number of pixels where a
species has the highest relative biomass in a given pixel. Pixels where two or
more species share the highest biomass value are classified as 'mixed forest',
and pixels without any biomass are classified as 'no veg.'.
Finally, $\Delta$B is calculated per species as the final biomass (e.g., year
2011) minus the initial biomass (e.g., year 2001), either at the pixel- or
landscape-level.
### Validation approaches {#bvalid-valid}
#### Visual validation {#bvalid-valid-visual}
The module plots the above metrics as barplots showing landscape-level values
(averaged across replicates for the simulated data) or boxplots showing
pixel-level values. Plotting can be live and/or in the form of exported images
(or both turned off completely).
#### Mean absolute deviation {#bvalid-valid-MAD}
Mean absolute deviance (MAD) values are calculated on landscape- and pixel-level
species relative abundances and $\Delta$B, and landscape-level species presences
and dominance. MAD values are calculated per replicate and year, except $\Delta$B
MAD values, which integrate across years. Output tables with MAD values are exported as `landscapeMAD` and
`pixelMAD`, and the module also produces visual inspection of these values as
dot-and-whisker plots.
#### Sum of negative log-likelihood (SNLL) {#bvalid-valid-SNLL}
To provide a measure of overall goodness of fit of the model set-up that
gave rise to the outputs, this is the set of starting conditions, parameters and
simulation mechanisms that generated predictions (which includes the LandR
modules used), *Biomass_validationKNN* estimates sum of negative log-likelihoods
(SNLL) of species presences (at the landscape-level), simulated species biomasses,
and $\Delta$B (the latter two at the landscape and pixel levels),
with respect to their observed counterparts.
More precisely, let $\ell$ be the log-likelihood function denoting the
probability of observing $x$ of $X$ (a random variable following a continuous
probability distribution $f(x)$), given a parameter $\theta$:
```{=tex}
\begin{equation}
\ell(\theta \mid x) = f(x)
(\#eq:loglik)
\end{equation}
```
In our case, $\theta$ is equivalent to the model's starting conditions and
structure, $X$ is the observed data with $x$ being the simulated values, and
$f(x)$ the continuous probability distribution of $X$. For each variable that we
wanted to evaluate and for each simulation replicate, Equation \@ref(eq:loglik)
is applied to calculate the SNLL estimated for each value of $x$ at the pixel or
landscape-level, $i$:
```{=tex}
\begin{equation}
-\sum_{i = 1}^{N} \ell(\theta \mid x_{i})
(\#eq:negsumloglik)
\end{equation}
```
where $N$ is equal to total number of pixels. At the landscape scale $N = 1$.
For species presences and species biomass, we draw the probability of observing
$x_{i}$ (a vector of species presences/biomasses in pixel/landscape $i$) from a
multinomial density distribution
($f(x_{i}) = {\sf Multi}(n_{i}, \mathrm{p}_{i})$), where
$n_{i} = \sum_{j = 1}^{K} X_{i,j}$ ($X$ being the observed values of biomass of
$j = 1, ..., K$ species in a pixel/landscape $i$) and $\mathrm{p_{i}}$ is the
vector of simulated values $x_{i,j}$.
**The computation of SNLL for $\Delta$B is still under development**. The
following approach is currently implemented, but presents issues:
For $\Delta$B, we draw the probability of observing $x_{i,j}$ (the simulated
$\Delta$B of $j = 1, ..., K$ species in a pixel/landscape $i$) from a
multivariate Gaussian distribution,
$f(x_{i}) = \mathcal {N}(\mu_{i}, \mathrm{M}_{i})$, where $\mu_{i}$ is the
vector of observed mean $\Delta$B for each species $j = 1, ..., K$, and
$\mathrm{M}$ is the observed $K * K$ variance-covariance matrix of species
$\Delta$B. Unfortunately this is presenting problems, due to $\mathrm{M}$ not
being strictly positive definite.
After calculating SNLL across pixels (or for the entire landscape), values are
averaged across replicates for an overall model estimate and exported in the
`logLikelihood` table.
We refer to the Wikipedia pages on the [multinomial
distribution](https://en.wikipedia.org/wiki/Multinomial_distribution) and on the
[multivariate Gaussian
distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Density_function)
for a good summary of these two distributions and their use in SNLL estimation.
### List of input objects {#bvalid-inputs-list}
The full list of input objects *Biomass_validationKNN* requires is presented
below (Table \@ref(tab:moduleInputs2-Biomass-validationKNN)). All have defaults
except `studyArea`, which **must** be provided by the user, or another module.
Of these, the input spatial layers land-cover change (change type and year),
fire perimeters, (ref:percent) species cover, stand age and stand biomass are
obtained from National Forest Inventory kNN layers for years 2001 and 2011.
We strongly recommend that for the "starting point layers" (those suffixed with
`*Start`, which by default correspond to 2001) the user supplies the same
objects used as the starting input layers to initialise the simulation to make
sure that they match.
Note that objects suffixed with `*Start` correspond to the same objects in the
main simulation without this suffix (e.g. `rawBiomassMapStart` is
`rawBiomassMap` in *Biomass_borealDataPrep*), whereas other objects like
`studyArea` and `rasterToMatch` have the same names in the simulation and should
be **exactly** the same object.
Of the inputs in Table \@ref(tab:moduleInputs2-Biomass-validationKNN), the
following deserve special attention:
**Spatial layers**
- `biomassMap` -- a map of simulated stand biomass (in $g/m^2$ ) filtered for
the pixels where cohort dynamics were simulated. This corresponds to the
`sim$biomassMap` object produced by *Biomass_borealDataPrep* or to the
`sim$simulatedBiomassMap` produced by *Biomass_core*.
- `firePerimeters` -- a fire perimeters polygon map that should be used to
exclude recently burned pixels from the analysis. If this is not desired the
user needs to provide an empty `sf` object (e.g., `sf::st_polygon()`) .
- `rawBiomassMapStart` -- raw biomass data used to initialise and parametrise
`Biomass_core`. By default, the module uses the stand biomass map from kNN
for the year 2001. The user must make sure this appropriate for their use
case, or else supply the correct raster layer.
- `rawBiomassMapEnd` -- raw biomass data used to validate the model after
several simulation years. By default, the module uses the kNN stand biomass map
from 2011, which is compared with the 10th year of a simulation initialised
using the kNN 2001 data. The user must make sure this
appropriate for their use case, or else supply the correct raster layer.
- `rstLCChange` -- a binary raster layer with disturbed pixels that should be
removed from the analyses. Can be combined with `rstLCChangeYr` to filter
pixels disturbed in a given time period defined by `P(sim)$LCChangeYr`.
Defaults to [Canada's forest change national map between 1985-2011
(CFS)](https://opendata.nfis.org/downloads/forest_change/C2C_change_type_1985_2011.zip).
- `rstLCChangeYr` -- a raster layer with year of disturbance. This is an
optional layer that can be combined with `rstLCChange` and
`P(sim)$LCChangeYr` to filter disturbed pixels by year of disturbance. Not
used by default. Defaults to [Canada's forest change year national map
between 1985-2011
(CFS)](https://opendata.nfis.org/downloads/forest_change/C2C_change_year_1985_2011.zip).
- `speciesLayersStart` -- same as `rawBiomassMapStart`, but with respect to
species (ref:percent) cover data.
- `speciesLayersEnd` -- same as `rawBiomassMapEnd`, but with respect to
species (ref:percent) cover data.
- `studyArea` -- shapefile. A `SpatialPolygonsDataFrame` with a single polygon
determining the where the simulation will take place. This is the only input
object that **must be supplied by the user**.
**Simulation-related objects**
- `allCohortData` -- OPTIONAL. A `data.table` containing all `cohortData`
objects relevant for the validation (e.g., as many `cohortData` objects as
simulation replicates times 2, for the beginning and end year). If not
supplied, *Biomass_validationKNN* attempts to produce this object using the
`cohortData` object file listed in `simulationOutputs` . Hence, the user
must either supply *both* `allCohortData` and `pixelGroupMapStk` *or*
`simulationOutputs.`
- `pixelGroupMapStk` -- OPTIONAL. As `allCohortData`, but with respect to
`pixelGroupMap` objects.
- `simulationOutputs` -- OPTIONAL. A `data.frame` that has the same structure
as the `data.frame`'s specifying outputs to be saved in
`spades(..., outputs = data.frame(...))`. We advise passing the same
`data.frame` that was supplied to `spades` during the simulation call, but
filtered by the relevant `cohortData` and `pixelGroupMap` objects and,
potentially, with file paths corrected to match the current working
directory (see [Usage example](#bvalid-example)). Only used if
`allCohortData` and `pixelGroupMapStk` are not supplied.
- `pixelGroupMap` -- a raster layer with *pixelGroup* IDs per pixel. Pixels
are grouped based on identical *ecoregionGroup*, *speciesCode*, *age*
and *B* composition, even if the user supplies other initial groupings
(e.g., this is possible in the *Biomass_borealDataPrep* data module).
\newpage
\blandscape
```{r moduleInputs2-Biomass-validationKNN, echo = FALSE, eval = TRUE, message = FALSE, results = 'asis'}
df_inputs <- moduleInputs("Biomass_validationKNN", "..")
caption <- "List of (ref:Biomass-validationKNN) input objects and their description."
## pander's hyphenation doesn't work with URLs and big/strange words (like obj names). split manually
if (knitr::is_latex_output()) {
df_inputs$objectName <- wrapStrFun(df_inputs$objectName, size = 10)
df_inputs$objectClass <- wrapStrFun(df_inputs$objectClass, size = 10)
df_inputs$desc <- wrapStrFun(df_inputs$desc, size = 40)
df_inputs$sourceURL <- wrapStrFun(df_inputs$sourceURL, size = 10)
}
panble(df_inputs, caption, landscape = TRUE,
panderArgs = list("justify" = "left", "split.tables" = Inf,
"keep.line.breaks" = TRUE),
kable_stylingArgs = list(full_width = TRUE))
```
\elandscape
### List of parameters {#bvalid-params-list}
Table \@ref(tab:moduleParams2-Biomass-validationKNN) lists all parameters used
in *Biomass_validationKNN* and their detailed information. All have default
values specified in the module's metadata
Of the parameters listed in Table
\@ref(tab:moduleParams2-Biomass-validationKNN), the following are particularly
important:
- `LCChangeYr` -- integer. Optional parameter defining the years of
disturbance that should be filtered out of the analysis using the
`rstLCChangeYr` layer. This parameter is set to `NULL` by default, meaning
that `rstLCChangeYr` will not be used.
- `sppEquivCol` -- character. the column name in `speciesEquivalency`
data.table that defines the naming convention to use throughout the
simulation.
- `validationReps` -- integer. which simulation replicates should be used for
the validation.
- `validationYears` -- integer. What simulation years should be used for the
validation - the year number needs to match the observed data year. For
instance, if the first observed data year is 2001, that must be the first
simulation year.
\newpage
\blandscape
```{r moduleParams2-Biomass-validationKNN, echo = FALSE, eval = TRUE, message = FALSE, results = 'asis'}
df_params <- moduleParams("Biomass_validationKNN", "..")
caption <- "List of (ref:Biomass-validationKNN) parameters and their description."
panble(df_params, caption, landscape = TRUE,
panderArgs = list("justify" = "left", "digits" = 3, "split.cells" = c(15,15, 5, 5, 5, 40), "split.tables" = Inf),
kable_stylingArgs = list(full_width = TRUE))
```
### List of outputs {#bvalid-outputs-list}
The module produces the following outputs (Table
\@ref(tab:moduleOutputs-Biomass-validationKNN)):
```{r moduleOutputs-Biomass-validationKNN, echo = FALSE, eval = TRUE, message = FALSE, results = 'asis'}
df_outputs <- moduleOutputs("Biomass_validationKNN", "..")
caption <- "List of (ref:Biomass-validationKNN) output objects and their description."
panble(df_outputs, caption,
panderArgs = list("justify" = "left", "digits" = 3, "split.cells" = c(15, 15, 40), "split.tables" = Inf),
kable_stylingArgs = list(full_width = TRUE))
```
\elandscape
### Simulation flow and module events {#bvalid-sim-flow}
*Biomass_validationKNN* initialises itself and prepares all inputs provided that
it has access to outputs of simulations from *Biomass_core*, and internet access
to retrieve the observed kNN datasets used for
validation[^biomass_validationknn-1].
[^biomass_validationknn-1]: Raw data layers downloaded by the module are saved
in \`dataPath(sim)\`, which can be controlled via
\`options(reproducible.destinationPath = ...)\`.
The module then compiles all simulation output data provided that the user
supplies the object names and their file paths via the `simulationOutputs` input
object. Alternatively, the user may pass the pre-compiled outputs (namely the
`cohortData` and `pixelGroupMap` objects) via the `allCohortData` and
`pixelGroupMapStk` input objects. See [list of input
objects](#bvalid-inputs-list) for more detail.
Future users should run *Biomass_validationKNN* with defaults and inspect what
the objects are like before supplying their own data, or alternative data URLs.
Alternatively, users may develop their own validation modules using
*Biomass_validationKNN* as a template. We expect the number of validation
modules to increase as other validation approaches are developed based on
project needs.
The general flow of *Biomass_validationKNN* processes is (note that this module
only runs once, i.e. in one "time step"):
1. Preparation of all necessary objects, namely obtaining the observed data
layers from on-line repositories (or if available stored local copies) and
the compiling simulated data if the user has not done so previously (see
[list of input objects](#bvalid-inputs-list)) -- (`init` event).
2. Calculation of summary variables for validation (`calculateValidVars`
event), namely :
- relative biomass per species per pixel and across the landscape (per year
and per replicate)
- changes in species biomass per pixel and across the landscape (per
replicate), with respect to the first year.
- species dominance across the landscape
- species presences across the landscape
3. Calculation of validation statistics (`validationStats` event), namely mean
absolute deviations (MAD) and sum of negative log-likelihoods (SNLL).
4. Assessment of the relationship between observed $\Delta$B and observed
$\Delta$Age (`obsDeltaMaps` event) -- this is an optional visual diagnostic
of the observed data that produces scatterplots of $\Delta$B \~ $\Delta$Age
of three types:
- With raw observed values of $\Delta$B and $\Delta$Age
- With $\Delta$B and $\Delta$Age calculated on observed data *after*
pre-processing (i.e., the data clean-steps done in `Biomass_borealDataPrep`,
which are also done to the observed data before validation)
- With the data shown in 2) above, but filtered by pixels where there was only
a stand age increment corresponding to the number of years of between the
two validation time points. This is not necessarily a *correct* filter, as
stands may have suffered an age reduction due to the loss of old cohorts
from background mortality (i.e., not coming from disturbances. However, if
using the default input datasets, it is unlikely that this is a widespread
phenomenon in only 10 years. We remind the user that disturbed pixels should
be removed from the analyses when validating succession dynamics in the
absence of disturbance - the default option.
5. Plots (`landscapeWidePlots`, `pixelLevelPlots` and `deltaBComparisons`
events):
- Barplots of landscape-wide and pixel-level comparisons between observed and
simulated data, with respect to relative biomass, dominance and presences.
- Boxplots of biomass changes ($\Delta$B) in observed and simulated data, with
respect to the first year.
- Maps of biomass and age changes ($\Delta$B, $\Delta$Age) with respect to the
first year, in observed and simulated data.
All module default outputs are in the form of plots, but the user can chose to
save any objects (see Table \@ref(tab:moduleOutputs-Biomass-validationKNN)).
## Usage example {#bvalid-example}
### Set up R libraries {#bvalid-example-libs}
```{r load-Rlibs-Biomass-validationKNN, eval=FALSE}
options(repos = c(CRAN = "https://cloud.r-project.org"))
tempDir <- tempdir()
pkgPath <- file.path(tempDir, "packages", version$platform,
paste0(version$major, ".", strsplit(version$minor, "[.]")[[1]][1]))
dir.create(pkgPath, recursive = TRUE)
.libPaths(pkgPath, include.site = FALSE)
if (!require(Require, lib.loc = pkgPath)) {
remotes::install_github(
paste0("PredictiveEcology/",
"Require@5c44205bf407f613f53546be652a438ef1248147"),
upgrade = FALSE, force = TRUE)
library(Require, lib.loc = pkgPath)
}
setLinuxBinaryRepo()
```
### Get the module and module dependencies {#b-valid-example-pkg-mods}
Because *Biomass_validationKNN* is meant to validate simulation outputs against
observed data, we need to first run a simulation of forest dynamics with
*Biomass_core*. To do that we get both modules' code from the PredictiveEcology
GitHub repository (all install all necessary packages). Notice that we are
placing all packages, module code, inputs and outputs in temporary directories.
```{r getModule-Biomass-validationKNN, eval=FALSE}
Require(paste0("PredictiveEcology/",
"SpaDES.project@6d7de6ee12fc967c7c60de44f1aa3b04e6eeb5db"),
require = FALSE, upgrade = FALSE, standAlone = TRUE)
paths <- list(inputPath = normPath(file.path(tempDir, "inputs")),
cachePath = normPath(file.path(tempDir, "cache")),
modulePath = normPath(file.path(tempDir, "modules")),
outputPath = normPath(file.path(tempDir, "outputs")))
SpaDES.project::getModule(modulePath = paths$modulePath,
c("PredictiveEcology/Biomass_core@master",
"PredictiveEcology/Biomass_validationKNN@master"),
overwrite = TRUE)
## make sure all necessary packages are installed:
outs <- SpaDES.project::packagesInModules(modulePath = paths$modulePath)
Require(c(unname(unlist(outs)),
"PredictiveEcology/SpaDES.experiment@development",
"future"),
require = FALSE, standAlone = TRUE)
## load necessary packages
Require(c("SpaDES.core", "LandR", "reproducible", "pemisc",
"SpaDES.experiment", "future"), upgrade = FALSE, install = FALSE)
```
### Setup simulation
```{r module usage example setup2 -Biomass-validationKNN, eval = FALSE}
times <- list(start = 2001, end = 2011)
studyArea <- Cache(randomStudyArea, size = 1e7) # cache this so it creates a random one only once on a machine
# Pick the species you want to work with -- using the naming convention in "Boreal" column of LandR::sppEquivalencies_CA
speciesNameConvention <- "Boreal"
speciesToUse <- c("Pice_Gla", "Popu_Tre", "Pinu_Con")
sppEquiv <- sppEquivalencies_CA[get(speciesNameConvention) %in% speciesToUse]
# Assign a colour convention for graphics for each species
sppColorVect <- sppColors(sppEquiv, speciesNameConvention,
newVals = "Mixed", palette = "Set1")
## Usage example
modules <- as.list("Biomass_core")
objects <- list(studyArea = studyArea, sppEquiv = sppEquiv, sppColorVect = sppColorVect)
successionTimestep <- 20L
## keep default values for most parameters
## (omitted from this list)
parameters <- list(
Biomass_core = list(
"sppEquivCol" = speciesNameConvention
, "successionTimestep" = successionTimestep
, ".plotInitialTime" = times$start
, ".plotInterval" = 1L
, ".plots" = "png"
, ".saveInitialTime" = times$start
, ".useCache" = "init"
, ".useParallel" = FALSE
)
)
outputs <- data.frame(expand.grid(objectName = "cohortData",
saveTime = unique(seq(times$start, times$end, by = 1)),
eventPriority = 1,
stringsAsFactors = FALSE))
outputs <- rbind(outputs, data.frame(objectName = "pixelGroupMap",
saveTime = unique(seq(times$start, times$end, by = 1)),
eventPriority = 1))
```
### Run simulation
Here we run a simulation with three replicates using the `experiment2` function
of the `SpaDES.experiment` R package [@McIntireChubaty2021], which builds a
folder structure where simulation outputs are conveniently organised.
```{r module usage example2 -Biomass-validationKNN, eval=FALSE}
opts <- options(reproducible.useCache = TRUE,
reproducible.destinationPath = paths$inputPath,
spades.useRequire = FALSE)
graphics.off()
mySimInit <- simInit(times = times,
params = parameters,
modules = modules,
objects = objects,
paths = paths,
outputs = outputs)
plan(sequential)
mySimExperiment <- experiment2(
sim1 = mySimInit,
clearSimEnv = FALSE,
replicates = 3)
```
### Validate simulation outputs with *Biomass_validationKNN*
Note that because we ran *Biomass_core* by itself using theoretical input data,
we can expect the validation to reveal that the module didn't do a great job at
reproducing observed patterns.
*Note that a Google Account is needed to download some of the input files.*
```{r module usage example validation, eval=FALSE}
simulationOutputs <- lapply(mySimExperiment, FUN = function(x, localSimPaths) {
oldPath <- dirname(outputPath(x)) ## exclude sim*_rep* folder
DT <- as.data.table(outputs(x))
DT[, file := sub(oldPath, localSimPaths$outputPath, file)]
DT
}, localSimPaths = as.list(normPath(paths)))
simulationOutputs <- rbindlist(simulationOutputs)
validationPaths <- as.list(normPath(paths))
validationPaths$outputPath <- file.path(validationPaths$outputPath, "validation")
validationTimes <- list(start = 1, end = 1)
validationParams <- list(
Biomass_validationKNN = list(
"sppEquivCol" = params(mySimInit)$Biomass_core$sppEquivCol
, "validationReps" = as.integer(1:3) ## or length of simLists
, "validationYears" = as.integer(c(2001, 2011))
, ".plots" = c("png")
)
)
## make an empty fire polygon object to bypass removing fire-disturbed pixels
noFires <- sf::st_polygon()
validationObjects <- list(
"biomassMap" = mySimExperiment$sim1_rep1$biomassMap
, "firePerimeters" = noFires
, "rasterToMatch" = mySimExperiment$sim1_rep1$rasterToMatch
, "rawBiomassMapStart" = mySimExperiment$sim1_rep1$biomassMap
, "simulationOutputs" = simulationOutputs
, "speciesLayersStart" = mySimExperiment$sim1_rep1$speciesLayers
, "sppColorVect" = mySimExperiment$sim1_rep1$sppColorVect
, "sppEquiv" = mySimExperiment$sim1_rep1$sppEquiv
, "studyArea" = mySimExperiment$sim1_rep1$studyArea
)
mySimValidation <- simInitAndSpades(times = validationTimes
, params = validationParams
, modules = "Biomass_validationKNN"
, objects = validationObjects
, paths = validationPaths
, .studyAreaName = SAname)
```
Here are some of the output figures automatically produced by
*Biomass_validationKNN*
```{r fig-Biomass-validationKNNOutPlots, echo = FALSE, eval = TRUE, fig.show='hold', out.width = ifelse(knitr::is_latex_output(), "100%", "70%"), fig.cap = "(ref:Biomass-validationKNN) automatically generates plots showing a visual comparison between simulated and observed species presences (right) across the landscape, and relative species biomass per pixel (left)."}
## make sure all include_graphics have wd attached to figure path
## this is necessary to knit to pdf using child Rmds - see https://stackoverflow.com/questions/61065560/cant-compile-rmarkdown-pdf-with-image-in-child-rmd-latex-error-image-file
knitr::include_graphics(normPath(c("figures/LandscapeComparisons_PresAbs.png",
"figures/PixelComparisons_relB.png")))
```
```{r fig-Biomass-validationKNNOutPlots2, echo = FALSE, eval = TRUE, fig.align = "center", fig.show = "hold", out.width = ifelse(knitr::is_latex_output(), "100%", "70%"), fig.cap = "A plot of landscape-wide mean absolute deviations (MAD) from (top to bottom) observed mean relative abundance, no. of presences, no. of pixels where the species is dominant and $\\Delta$B."}
knitr::include_graphics(normPath(c("figures/landscapeMAD.png")))
```
```{r fig-Biomass-validationKNNOutPlots3, echo = FALSE, eval = TRUE, fig.align = "center", fig.show = "hold", out.width = ifelse(knitr::is_latex_output(), "100%", "70%"), fig.cap = "Diagnostic plot of observed changes in biomass and age $\\Delta$B and $\\Delta$Age, respectively)."}
knitr::include_graphics(normPath(c("figures/observedDeltaBDeltaAge.png")))
```
## References {#bvalid-refs}