diff --git a/src/download_griis_checklist.R b/src/download_griis_checklist.R index f0563c5..58af7d6 100644 --- a/src/download_griis_checklist.R +++ b/src/download_griis_checklist.R @@ -1,4 +1,5 @@ library(knitr) +library(magrittr) # create temporary R file #### tempR <- tempfile(fileext = ".R") @@ -14,7 +15,7 @@ file.copy(from = "./data/interim/data_input_checklist_indicators.tsv", file.remove("./data/interim/data_input_checklist_indicators.tsv") # add vernicular names -checklist_raw <- read_delim("data/output/UAT_processing/data_input_checklist_indicators.tsv", +checklist_raw <- readr::read_delim("data/output/UAT_processing/data_input_checklist_indicators.tsv", delim = "\t", escape_double = FALSE, trim_ws = TRUE) @@ -29,19 +30,19 @@ all_vernicular_names <- data.frame( ) for(t in taxon_keys){ - temp_name_usage <- name_usage(key = t, data = "vernacularNames") + temp_name_usage <- rgbif::name_usage(key = t, data = "vernacularNames") spec_vernicular_names <- temp_name_usage$data if(nrow(spec_vernicular_names) > 0){ spec_vernicular_names <- spec_vernicular_names %>% - filter(language %in% c("eng", "fra", "nld")) %>% - mutate(vernacularName = str_to_sentence(vernacularName), - language = str_sub(language, 0, 2)) %>% - distinct(taxonKey, language, vernacularName) %>% - group_by(language, taxonKey) %>% - summarise(vernacular_name = paste(vernacularName, collapse = ", ")) %>% - ungroup() %>% - pivot_wider(id_cols = taxonKey, + dplyr::filter(language %in% c("eng", "fra", "nld")) %>% + dplyr::mutate(vernacularName = stringr::str_to_sentence(vernacularName), + language = stringr::str_sub(language, 0, 2)) %>% + dplyr::distinct(taxonKey, language, vernacularName) %>% + dplyr::group_by(language, taxonKey) %>% + dplyr::summarise(vernacular_name = paste(vernacularName, collapse = ", ")) %>% + dplyr::ungroup() %>% + tidyr::pivot_wider(id_cols = taxonKey, names_from = language, names_prefix = "vernacular_name_", values_from = vernacular_name) @@ -49,15 +50,31 @@ for(t in taxon_keys){ if(nrow(all_vernicular_names) == 0){ all_vernicular_names <- spec_vernicular_names }else{ - all_vernicular_names <- bind_rows(all_vernicular_names, spec_vernicular_names) + all_vernicular_names <- dplyr::bind_rows(all_vernicular_names, spec_vernicular_names) } }else{ warning(paste0("No vernicular names for ", t)) } } +missing_scientific_names <- checklist_raw %>% + dplyr::filter(is.na(species)) + +if(nrow(missing_scientific_names) > 0){ + checklist_raw <- checklist_raw %>% + dplyr::mutate(species = dplyr::case_when(is.na(species) ~ canonicalName, + TRUE ~ species)) + + missing_scientific_names_recheck <- checklist_raw %>% + dplyr::filter(is.na(species)) + + if(nrow(missing_scientific_names_recheck) > 0){ + stop(paste0("After the fix these taxa are still missing their scientific names: ", paste(missing_scientific_names_recheck$nubKey, collapse = ", "))) + } +} + checklist <- checklist_raw %>% - left_join(all_vernicular_names, by = c("nubKey" = "taxonKey")) %>% - write_delim("data/output/UAT_processing/data_input_checklist_indicators.tsv", + dplyr::left_join(all_vernicular_names, by = c("nubKey" = "taxonKey")) %>% + readr::write_delim("data/output/UAT_processing/data_input_checklist_indicators.tsv", delim = "\t")