diff --git a/R/functions.R b/R/functions.R index e317063c..8267cbd0 100755 --- a/R/functions.R +++ b/R/functions.R @@ -3,9 +3,9 @@ get_data <- function(url, elements) { } get_inputfile <- function(.file) { - path <- fs::dir_info("inputfiles", type="file") %>% + path <- fs::dir_info("inputfiles", type = "file") %>% dplyr::select(path, change_time, birth_time) %>% - dplyr::filter(stringr::str_detect(path, file)) %>% + dplyr::filter(stringr::str_detect(path, .file)) %>% dplyr::filter(birth_time == max(birth_time)) %>% dplyr::pull(path) @@ -17,13 +17,15 @@ get_inputfile <- function(.file) { } extract_total <- function(indsn, x) { - total <- indsn[[x]] %>% rvest::html_text2() %>% + total <- indsn[[x]] %>% + rvest::html_text2() %>% stringr::str_extract("\\d+(?= \\b)") %>% readr::parse_double() } extract_origin <- function(indsn, x) { - indsn[[x]] %>% rvest::html_element("img") %>% + indsn[[x]] %>% + rvest::html_element("img") %>% rvest::html_attr("src") %>% stringr::str_extract("(Flag_of_the_|Flag_of_)([a-zA-Z_]+|[a-zA-Z]+)") %>% stringr::str_remove("(Flag_of_the_|Flag_of_)") %>% @@ -31,7 +33,8 @@ extract_origin <- function(indsn, x) { } extract_counts <- function(indsn, x, condition) { - counts <- indsn[[x]] %>% rvest::html_text2() %>% + counts <- indsn[[x]] %>% + rvest::html_text2() %>% stringr::str_remove_all(".*(?=:)") %>% stringr::str_remove_all(": ") %>% stringr::str_remove_all("\\(") %>% @@ -53,7 +56,8 @@ extract_counts <- function(indsn, x, condition) { } extract_system <- function(indsn, x) { - indsn[[x]] %>% rvest::html_text2() %>% + indsn[[x]] %>% + rvest::html_text2() %>% stringr::str_remove_all("^\\d+ ") %>% stringr::str_extract(".*(?=:)") } @@ -76,8 +80,9 @@ extract_url <- function(indsn, x) { trim_all <- function(indsn) { - indsn %>% dplyr::ungroup() %>% - dplyr::mutate(dplyr::across(tidyr::everything(), ~ stringr::str_trim(.,))) + indsn %>% + dplyr::ungroup() %>% + dplyr::mutate(dplyr::across(tidyr::everything(), ~ stringr::str_trim(., ))) } #' create_keys @@ -97,21 +102,21 @@ create_keys <- function(indsn) { dplyr::mutate(sysID = dplyr::row_number()) indsn <- indsn %>% - dplyr::left_join(sysID, by="system") + dplyr::left_join(sysID, by = "system") imageID <- indsn %>% dplyr::distinct(url) %>% dplyr::mutate(imageID = dplyr::row_number()) indsn <- indsn %>% - dplyr::left_join(imageID, by="url") + dplyr::left_join(imageID, by = "url") statusID <- indsn %>% dplyr::distinct(status) %>% dplyr::mutate(statusID = dplyr::row_number()) indsn <- indsn %>% - dplyr::left_join(statusID, by="status") + dplyr::left_join(statusID, by = "status") matID <- indsn %>% dplyr::distinct(country, sysID, imageID, statusID) %>% @@ -121,7 +126,7 @@ create_keys <- function(indsn) { )) indsn <- indsn %>% - dplyr::left_join(matID, by=c("country", "sysID", "imageID", "statusID")) + dplyr::left_join(matID, by = c("country", "sysID", "imageID", "statusID")) return(indsn) } diff --git a/R/per_event.R b/R/per_event.R index a7cfff3f..15e25264 100755 --- a/R/per_event.R +++ b/R/per_event.R @@ -9,11 +9,13 @@ create_event_tables <- function(indsn, ...) { idnsn <- indsn %>% dplyr::ungroup() - x <- indsn %>% dplyr::group_by(...) %>% + x <- indsn %>% + dplyr::group_by(...) %>% { - setNames(group_split(.), group_keys(.)[[1]]) + setNames(dplyr::group_split(.), dplyr::group_keys(.)[[1]]) } - x %>% names(.) %>% - purrr::map( ~ write_csv(x[[.]], glue::glue("outputfiles/event_{.}.csv"))) + x %>% + names(.) %>% + purrr::map(~ write_csv(x[[.]], glue::glue("outputfiles/event_{.}.csv"))) } diff --git a/R/scrape_data.R b/R/scrape_data.R index 7bd1890e..033ac65e 100755 --- a/R/scrape_data.R +++ b/R/scrape_data.R @@ -13,8 +13,10 @@ scrape_data <- function(country) { } materiel <- - get_data(url, - "article") %>% + get_data( + url, + "article" + ) %>% rvest::html_elements("li") data <- @@ -26,11 +28,11 @@ scrape_data <- function(country) { url = character() ) - counter = 0 + counter <- 0 for (a in seq_along(materiel)) { status <- materiel[[a]] %>% rvest::html_elements("a") for (b in seq_along(status)) { - counter = counter + 1 + counter <- counter + 1 data[counter, 1] <- country data[counter, 2] <- extract_origin(materiel, a) data[counter, 3] <- extract_system(materiel, a) @@ -55,10 +57,10 @@ create_data <- function() { dplyr::select(country, origin, system, status, url, date_recorded) %>% dplyr::distinct() - previous <- get_inputfile("totals_by_system") %>% + previous <- get_inputfile(.file="totals_by_system") %>% trim_all() %>% dplyr::mutate(date_recorded = as.Date(date_recorded)) %>% - dplyr::select(country,origin,system,status,url,date_recorded) %>% + dplyr::select(country, origin, system, status, url, date_recorded) %>% dplyr::distinct() check <- data %>% @@ -66,15 +68,14 @@ create_data <- function() { dplyr::mutate(date_recorded = as.Date(date_recorded)) if (nrow(check) > 0) { - data <- check %>% dplyr::bind_rows(previous, .id = NULL) %>% + data <- check %>% + dplyr::bind_rows(previous, .id = NULL) %>% dplyr::arrange(country, system, date_recorded) previous %>% readr::write_csv("inputfiles/totals_by_system.csv.bak") data %>% readr::write_csv(glue::glue( - "inputfiles/totals_by_system{lubridate::today()+1}.csv" - )) - + "inputfiles/totals_by_system{lubridate::today()+1}.csv")) } else { data <- previous } @@ -86,15 +87,16 @@ create_data <- function() { dplyr::ungroup() return(data) - } total_by_system_wide <- function(indsn) { - indsn %>% dplyr::select(country, system, status) %>% + indsn %>% + dplyr::select(country, system, status) %>% dplyr::group_by(country, system, status) %>% dplyr::summarise(count = n()) %>% tidyr::pivot_wider(names_from = status, values_from = count) %>% dplyr::ungroup() %>% dplyr::mutate(dplyr::across(where(is.numeric), ~ tidyr::replace_na(.x, 0)), - total = destroyed + captured + damaged + abandoned) + total = destroyed + captured + damaged + abandoned + ) } diff --git a/R/totals_by_type.R b/R/totals_by_type.R index 0f98f476..910a06e3 100755 --- a/R/totals_by_type.R +++ b/R/totals_by_type.R @@ -23,7 +23,9 @@ create_by_type <- function(country) { heads <- heads[nchar(heads) > 0] # Get the positons of the Russia and Ukraine headers - pos <- heads %>% stringr::str_which(country) %>% as.double() + pos <- heads %>% + stringr::str_which(country) %>% + as.double() totals <- tibble( country = character(), @@ -38,16 +40,20 @@ create_by_type <- function(country) { totals[l, "equipment"] <- heads[l] %>% stringr::str_remove_all(" \\(.*\\)") totals[l, "destroyed"] <- - heads[l] %>% stringr::str_extract("destroyed: \\d+") %>% + heads[l] %>% + stringr::str_extract("destroyed: \\d+") %>% stringr::str_remove_all("[:alpha:]|[:punct:]") totals[l, "abandoned"] <- - heads[l] %>% stringr::str_extract("(abandoned|aboned): \\d+") %>% + heads[l] %>% + stringr::str_extract("(abandoned|aboned): \\d+") %>% stringr::str_remove_all("[:alpha:]|[:punct:]") totals[l, "captured"] <- - heads[l] %>% stringr::str_extract("captured: \\d+") %>% + heads[l] %>% + stringr::str_extract("captured: \\d+") %>% stringr::str_remove_all("[:alpha:]|[:punct:]") totals[l, "damaged"] <- - heads[l] %>% stringr::str_extract("damaged: \\d+") %>% + heads[l] %>% + stringr::str_extract("damaged: \\d+") %>% stringr::str_remove_all("[:alpha:]|[:punct:]") } @@ -73,10 +79,7 @@ totals_by_type <- function() { ukraine <- create_by_type("Ukraine") totals_df <- russia %>% - dplyr::bind_rows(ukraine, .id=NULL) + dplyr::bind_rows(ukraine, .id = NULL) return(totals_df) } - - - diff --git a/index.Rmd b/index.Rmd index 9d68ecae..81c3cfe9 100755 --- a/index.Rmd +++ b/index.Rmd @@ -39,24 +39,38 @@ graph_counts(daily_count, "All Types", "type_total") ### Russia ```{r russia-total-system-type, echo=FALSE, warning=FALSE, message=FALSE} -totals_by_type() %>% dplyr::filter(country=="Russia") %>% kableExtra::kbl(caption="Russia: Totals by System Type and Status") %>% kableExtra::kable_classic(font_size=16) +totals_by_type() %>% + dplyr::filter(country == "Russia") %>% + kableExtra::kbl(caption = "Russia: Totals by System Type and Status") %>% + kableExtra::kable_classic(font_size = 16) ``` ### Ukraine ```{r total-system-type, echo=FALSE, warning=FALSE, message=FALSE} -totals_by_type() %>% dplyr::filter(country=="Ukraine") %>% kableExtra::kbl(caption="Ukraine: Totals by System Type and Status") %>% kableExtra::kable_classic(font_size=16) +totals_by_type() %>% + dplyr::filter(country == "Ukraine") %>% + kableExtra::kbl(caption = "Ukraine: Totals by System Type and Status") %>% + kableExtra::kable_classic(font_size = 16) ``` ## By System^["Due to how the data is recorded, weapons systems may be recorded in here twice—*e.g.*, if it is listed as abandoned and captured, a system will be counted in both the abandoned and captured tallies."] {.tabset} ### Russia ```{r russia-total-system, echo=FALSE, warning=FALSE, message=FALSE} -total_by_system_wide(totals_by_system) %>% dplyr::filter(country=="Russia") %>% dplyr::arrange(desc(total)) %>% kableExtra::kbl(caption="Russia: Totals by System and Status") %>% kableExtra::kable_classic(font_size=16) +total_by_system_wide(totals_by_system) %>% + dplyr::filter(country == "Russia") %>% + dplyr::arrange(desc(total)) %>% + kableExtra::kbl(caption = "Russia: Totals by System and Status") %>% + kableExtra::kable_classic(font_size = 16) ``` ### Ukraine ```{r total-system, warning=FALSE, echo=FALSE, message=FALSE} -total_by_system_wide(totals_by_system) %>% dplyr::filter(country=="Ukraine") %>% dplyr::arrange(desc(total)) %>% kableExtra::kbl(caption="Ukraine: Totals by System and Status") %>% kableExtra::kable_classic(font_size=16) +total_by_system_wide(totals_by_system) %>% + dplyr::filter(country == "Ukraine") %>% + dplyr::arrange(desc(total)) %>% + kableExtra::kbl(caption = "Ukraine: Totals by System and Status") %>% + kableExtra::kable_classic(font_size = 16) ```