-
Notifications
You must be signed in to change notification settings - Fork 5
/
write_dwc.R
108 lines (103 loc) · 4.4 KB
/
write_dwc.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#' Transform ETN data to Darwin Core
#'
#' Transforms and downloads data from a European Tracking Network
#' **animal project** to [Darwin Core](https://dwc.tdwg.org/).
#' The resulting CSV file(s) can be uploaded to an [IPT](
#' https://www.gbif.org/ipt) for publication to OBIS and/or GBIF.
#' A `meta.xml` or `eml.xml` file are not created.
#'
#' @param connection Connection to the ETN database.
#' @param animal_project_code Animal project code.
#' @param directory Path to local directory to write file(s) to.
#' If `NULL`, then a list of data frames is returned instead, which can be
#' useful for extending/adapting the Darwin Core mapping before writing with
#' [readr::write_csv()].
#' @param rights_holder Acronym of the organization owning or managing the
#' rights over the data.
#' @param license Identifier of the license under which the data will be
#' published.
#' - [`CC-BY`](https://creativecommons.org/licenses/by/4.0/legalcode) (default).
#' - [`CC0`](https://creativecommons.org/publicdomain/zero/1.0/legalcode).
#' @return CSV file(s) written to disk or list of data frames when
#' `directory = NULL`.
#' @export
#' @section Transformation details:
#' Data are transformed into an
#' [Occurrence core](https://rs.gbif.org/core/dwc_occurrence_2022-02-02.xml).
#' This **follows recommendations** discussed and created by Peter Desmet,
#' Jonas Mortelmans, Jonathan Pye, John Wieczorek and others.
#' See the [SQL file(s)](https://github.com/inbo/etn/tree/main/inst/sql)
#' used by this function for details.
#'
#' Key features of the Darwin Core transformation:
#' - Deployments (animal+tag associations) are parent events, with capture,
#' surgery, release, recapture (human observations) and acoustic detections
#' (machine observations) as child events.
#' No information about the parent event is provided other than its ID,
#' meaning that data can be expressed in an Occurrence Core with one row per
#' observation and `parentEventID` shared by all occurrences in a deployment.
#' - The release event often contains metadata about the animal (sex,
#' lifestage, comments) and deployment as a whole.
#' - Acoustic detections are downsampled to the **first detection per hour**,
#' to reduce the size of high-frequency data.
#' Duplicate detections (same animal, tag and timestamp) are excluded.
#' It is possible for a deployment to contain no detections, e.g. if the
#' tag malfunctioned right after deployment.
write_dwc <- function(connection = con,
animal_project_code,
directory = ".",
rights_holder = NULL,
license = "CC-BY") {
# Check connection
check_connection(connection)
# Check animal_project_code
assertthat::assert_that(
length(animal_project_code) == 1,
msg = "`animal_project_code` must be a single value."
)
# Check license
licenses <- c("CC-BY", "CC0")
assertthat::assert_that(
license %in% licenses,
msg = glue::glue(
"`license` must be `{licenses}`.",
licenses = glue::glue_collapse(licenses, sep = "`, `", last = "` or `")
)
)
license <- switch(
license,
"CC-BY" = "https://creativecommons.org/licenses/by/4.0/legalcode",
"CC0" = "https://creativecommons.org/publicdomain/zero/1.0/legalcode"
)
# Get imis dataset id and title
project <- get_animal_projects(connection, animal_project_code)
imis_dataset_id <- project$imis_dataset_id
imis_url <- "https://www.vliz.be/en/imis?module=dataset&dasid="
imis_json <- jsonlite::read_json(paste0(imis_url, imis_dataset_id, "&show=json"))
dataset_id <- paste0(imis_url, imis_dataset_id)
dataset_name <- imis_json$datasetrec$StandardTitle
# Query database
message("Reading data and transforming to Darwin Core.")
dwc_occurrence_sql <- glue::glue_sql(
readr::read_file(system.file("sql/dwc_occurrence.sql", package = "etn")),
.con = connection
)
dwc_occurrence <- DBI::dbGetQuery(connection, dwc_occurrence_sql)
# Return object or write files
if (is.null(directory)) {
list(
dwc_occurrence = dplyr::as_tibble(dwc_occurrence)
)
} else {
dwc_occurrence_path <- file.path(directory, "dwc_occurrence.csv")
message(glue::glue(
"Writing data to:",
dwc_occurrence_path,
.sep = "\n"
))
if (!dir.exists(directory)) {
dir.create(directory, recursive = TRUE)
}
readr::write_csv(dwc_occurrence, dwc_occurrence_path, na = "")
}
}