From 34c5d4d40d226f44a0d99bef03f977ce256d012f Mon Sep 17 00:00:00 2001 From: Johannes Koch Date: Thu, 10 Oct 2024 17:30:23 +0200 Subject: [PATCH 1/2] Fixes #30 and fixes #36 Fix implementation of "with_USA" and change use_USA_def_for_all argument to use_USA_cf_for_all. --- R/adapt_source.R | 75 +++++++++++++++---- R/check_user_input.R | 13 ++-- R/convertGDP.R | 13 ++-- R/transform_user_input.R | 12 ++- man/convertGDP.Rd | 11 ++- tests/testthat/test-01_check_user_input.R | 42 +++-------- tests/testthat/test-02_transform_user_input.R | 8 +- tests/testthat/test-05_convertGDP.R | 2 +- tests/testthat/test-06_replace_NAs.R | 39 +++++++++- 9 files changed, 135 insertions(+), 80 deletions(-) diff --git a/R/adapt_source.R b/R/adapt_source.R index 52fbf46..f928772 100644 --- a/R/adapt_source.R +++ b/R/adapt_source.R @@ -1,18 +1,10 @@ -adapt_source_USA <- function(gdp, source, replace_NAs) { - source_USA <- source %>% +adapt_source_USA <- function(gdp, source) { + # Filter for USA, and then complete source for all countries with the unique pairs of all columns in source, + # excluding iso3c. + source %>% dplyr::filter(.data$iso3c == "USA") %>% - dplyr::select("year", "USA_deflator" = "GDP deflator") - - source_adapted <- source %>% - dplyr::left_join(source_USA, by = dplyr::join_by("year")) %>% - dplyr::mutate("GDP deflator" = .data$USA_deflator) - - if (!is.null(replace_NAs) && replace_NAs[1] == "with_USA") { - source_USA <- source %>% dplyr::filter(.data$iso3c == "USA") - source_adapted <- purrr::map(unique(gdp$iso3c), ~dplyr::mutate(source_USA, "iso3c" = .x)) %>% - purrr::list_rbind() - } - source_adapted + tidyr::complete(iso3c = unique(gdp$iso3c), + tidyr::nesting(!!!(rlang::syms(colnames(source)[-1])))) } # @@ -99,6 +91,61 @@ adapt_source <- function(gdp, source, with_regions, replace_NAs, require_year_co dplyr::ungroup() } + if ("with_USA" %in% replace_NAs) { + USA_def_growth <- source %>% + dplyr::filter(.data$iso3c == "USA") %>% + dplyr::select("year", "gd" = "GDP deflator") %>% + dplyr::mutate("gd" = .data$gd / dplyr::lag(.data$gd)) + + source_adapted <- source_adapted %>% + dplyr::filter(.data$iso3c %in% unique(gdp$iso3c)) %>% + # Fill in the MER and PPPs with the growth rates from the USA (= 1) + dplyr::group_by(.data$iso3c) %>% + tidyr::fill(c("MER (LCU per US$)", + "PPP conversion factor, GDP (LCU per international $)"), + .direction = "downup") %>% + dplyr::ungroup() %>% + # For the deflator, we need to multiply the bordering values with the actual USA growth + dplyr::left_join(USA_def_growth, by = dplyr::join_by("year")) %>% + # Forward + dplyr::mutate( + `GDP deflator` = purrr::accumulate( + dplyr::row_number(), + ~ dplyr::coalesce(.data$`GDP deflator`[.y], .x * .data$gd[.y]), + .init = NA + )[-1], + .by = c("iso3c") + ) %>% + # Backward + dplyr::arrange(-.data$year) %>% + dplyr::mutate( + `GDP deflator` = purrr::accumulate( + dplyr::row_number(), + ~ dplyr::coalesce(.data$`GDP deflator`[.y], .x / .data$gd[.y]), + .init = NA + )[-1], + .by = c("iso3c") + ) %>% + dplyr::arrange(.data$iso3c, .data$year) %>% + dplyr::select(-"gd") + + # If there is no data whatsoever for the country, use US values + ec <- dplyr::group_by(source_adapted, .data$iso3c) %>% + dplyr::filter(all(is.na(.data$`GDP deflator`))) %>% + dplyr::pull("iso3c") %>% + unique() + + source_ec <- source %>% + dplyr::filter(.data$iso3c == "USA") %>% + tidyr::complete(iso3c = ec, + tidyr::nesting(!!!(rlang::syms(colnames(source)[-1])))) %>% + dplyr::filter(.data$iso3c %in% ec) + + source_adapted <- source_adapted %>% + dplyr::filter(!.data$iso3c %in% ec) %>% + dplyr::bind_rows(source_ec) + } + source_adapted } diff --git a/R/check_user_input.R b/R/check_user_input.R index 9186dfc..3aa950c 100644 --- a/R/check_user_input.R +++ b/R/check_user_input.R @@ -6,7 +6,7 @@ check_user_input <- function(gdp, unit_in, unit_out, source, - use_USA_deflator_for_all, + use_USA_cf_for_all, with_regions, replace_NAs, verbose, @@ -15,7 +15,7 @@ check_user_input <- function(gdp, check_gdp(gdp) check_unit_in_out(unit_in, unit_out) source <- check_source(source) - check_use_USA_deflator_for_all(use_USA_deflator_for_all, unit_in, unit_out) + check_use_USA_cf_for_all(use_USA_cf_for_all, unit_in, unit_out) check_with_regions(unit_in, unit_out, source, with_regions) check_replace_NAs(with_regions, replace_NAs) check_verbose(verbose) @@ -97,12 +97,9 @@ check_source <- function(source) { } # Check input parameter 'verbose' -check_use_USA_deflator_for_all <- function(use_USA_deflator_for_all, unit_in, unit_out) { - if (!is.logical(use_USA_deflator_for_all)) { - abort("Invalid 'use_USA_deflator_for_all' argument. Has to be either TRUE or FALSE.") - } - if (use_USA_deflator_for_all && any(grepl("current", c(unit_in, unit_out)))) { - abort("Setting 'use_USA_deflator_for_all' to TRUE should only be applied between conversion of constant units.") +check_use_USA_cf_for_all <- function(use_USA_cf_for_all, unit_in, unit_out) { + if (!is.logical(use_USA_cf_for_all)) { + abort("Invalid 'use_USA_cf_for_all' argument. Has to be either TRUE or FALSE.") } } diff --git a/R/convertGDP.R b/R/convertGDP.R index 1e45edc..80574ac 100644 --- a/R/convertGDP.R +++ b/R/convertGDP.R @@ -57,9 +57,8 @@ #' a data-frame that exists in the calling environment. #' Use [print_source_info()](https://pik-piam.github.io/GDPuc/reference/print_source_info.html) #' to learn about the available sources. -#' @param use_USA_deflator_for_all TRUE or FALSE (default). If TRUE, then only the USA deflator is used to adjust for -#' inflation, regardless of the country codes provided. This is a very specific deviation from the correct conversion -#' process, which nevertheless is often used in the integrated assessment community. Use carefully! +#' @param use_USA_cf_for_all TRUE or FALSE (default). If TRUE, then the USA conversion factors are used for all +#' countries. #' @param with_regions NULL or a data-frame. The data-frame should be "country to region #' mapping": one column named "iso3c" with iso3c country codes, and one column named #' "region" with region codes to which the countries belong. Any regions in the gdp @@ -76,8 +75,8 @@ #' \item "regional_average": missing conversion factors in the source object are replaced with #' the regional average of the region to which the country belongs. This requires a region-mapping to #' be passed to the function, see the with_regions argument. -#' \item "with_USA": missing conversion factors in the source object are replaced with -#' the conversion factors of the USA. +#' \item "with_USA": missing conversion factors in the source object are extended using US growth rates, or +#' if missing entirely, replaced with the conversion factors of the USA. #' } #' Can also be a vector with "linear" as first element, e.g. c("linear", 0) or c("linear", "no_conversion"), #' in which case, the operations are done in sequence. @@ -104,7 +103,7 @@ convertGDP <- function(gdp, unit_in, unit_out, source = "wb_wdi", - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL, verbose = getOption("GDPuc.verbose", default = FALSE), @@ -125,7 +124,7 @@ convertGDP <- function(gdp, } # Transform user input for internal use, while performing some last consistency checks - internal <- transform_user_input(gdp, unit_in, unit_out, source, use_USA_deflator_for_all, with_regions, replace_NAs) + internal <- transform_user_input(gdp, unit_in, unit_out, source, use_USA_cf_for_all, with_regions, replace_NAs) # Avoid NOTE in package check for CRAN . <- NULL diff --git a/R/transform_user_input.R b/R/transform_user_input.R index 5c0d222..e4556f1 100644 --- a/R/transform_user_input.R +++ b/R/transform_user_input.R @@ -1,5 +1,5 @@ # Transform user input for package internal use -transform_user_input <- function(gdp, unit_in, unit_out, source, use_USA_deflator_for_all, with_regions, replace_NAs) { +transform_user_input <- function(gdp, unit_in, unit_out, source, use_USA_cf_for_all, with_regions, replace_NAs) { . <- NULL # Convert to tibble, if necessary @@ -75,13 +75,11 @@ transform_user_input <- function(gdp, unit_in, unit_out, source, use_USA_deflato abort("Incompatible 'gdp' and 'source'. No information in source {crayon::bold(source_name)} for years in 'gdp'.") } - # Use different source if required by the replace_NAs argument - if (use_USA_deflator_for_all || + # Use different source if required by the use_USA_cf_for_all and replace_NAs argument + if (use_USA_cf_for_all) source <- adapt_source_USA(gdp, source) + if (!use_USA_cf_for_all && (!is.null(replace_NAs) && !any(sapply(c(NA, 0, "no_conversion"), setequal, replace_NAs))) ) { - if (use_USA_deflator_for_all || replace_NAs[1] == "with_USA") source <- adapt_source_USA(gdp, source, replace_NAs) - if (!is.null(replace_NAs) && !any(sapply(c(NA, 0, "no_conversion", "with_USA"), setequal, replace_NAs))){ - source <- adapt_source(gdp, source, with_regions, replace_NAs, require_year_column) - } + source <- adapt_source(gdp, source, with_regions, replace_NAs, require_year_column) source_name <- paste0(source_name, "_adapted") } diff --git a/man/convertGDP.Rd b/man/convertGDP.Rd index 6f23a22..c7ff676 100644 --- a/man/convertGDP.Rd +++ b/man/convertGDP.Rd @@ -11,7 +11,7 @@ convertGDP( unit_in, unit_out, source = "wb_wdi", - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL, verbose = getOption("GDPuc.verbose", default = FALSE), @@ -62,9 +62,8 @@ a data-frame that exists in the calling environment. Use \href{https://pik-piam.github.io/GDPuc/reference/print_source_info.html}{print_source_info()} to learn about the available sources.} -\item{use_USA_deflator_for_all}{TRUE or FALSE (default). If TRUE, then only the USA deflator is used to adjust for -inflation, regardless of the country codes provided. This is a very specific deviation from the correct conversion -process, which nevertheless is often used in the integrated assessment community. Use carefully!} +\item{use_USA_cf_for_all}{TRUE or FALSE (default). If TRUE, then the USA conversion factors are used for all +countries.} \item{with_regions}{NULL or a data-frame. The data-frame should be "country to region mapping": one column named "iso3c" with iso3c country codes, and one column named @@ -83,8 +82,8 @@ For the extrapolation, the closest 5 data points are used. \item "regional_average": missing conversion factors in the source object are replaced with the regional average of the region to which the country belongs. This requires a region-mapping to be passed to the function, see the with_regions argument. -\item "with_USA": missing conversion factors in the source object are replaced with -the conversion factors of the USA. +\item "with_USA": missing conversion factors in the source object are extended using US growth rates, or +if missing entirely, replaced with the conversion factors of the USA. } Can also be a vector with "linear" as first element, e.g. c("linear", 0) or c("linear", "no_conversion"), in which case, the operations are done in sequence.} diff --git a/tests/testthat/test-01_check_user_input.R b/tests/testthat/test-01_check_user_input.R index dbf8dc2..3d9a159 100644 --- a/tests/testthat/test-01_check_user_input.R +++ b/tests/testthat/test-01_check_user_input.R @@ -50,7 +50,7 @@ test_that("with_regions argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = "blabla")) with_regions <- tibble::tibble("blabla" = "FRA", "region" = "USA") @@ -58,7 +58,7 @@ test_that("with_regions argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = with_regions)) with_regions <- tibble::tibble("iso3c" = "FRA", "region" = "USA") @@ -66,7 +66,7 @@ test_that("with_regions argument", { unit_in, "current LCU", source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = with_regions)) my_bad_source <- wb_wdi %>% dplyr::select( @@ -81,7 +81,7 @@ test_that("with_regions argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = with_regions)) }) @@ -96,7 +96,7 @@ test_that("replace_NAs argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, replace_NAs = 2, with_regions = NULL), glue::glue("Invalid 'replace_NAs' argument. Has to be either NULL, NA, 0, 1, no_conversion, linear, \\ @@ -105,7 +105,7 @@ test_that("replace_NAs argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, replace_NAs = c(0, 1), with_regions = NULL), glue::glue("Invalid 'replace_NAs' argument. The only accepted combinations of arguments start with \\ @@ -115,7 +115,7 @@ test_that("replace_NAs argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, replace_NAs = "linear_regional_average", with_regions = NULL) ) @@ -125,7 +125,7 @@ test_that("replace_NAs argument", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, replace_NAs = "regional_average", with_regions = NULL), glue::glue("Using 'regional_average' requires a region mapping. The 'with_regions' argument can't be NULL.") @@ -143,7 +143,7 @@ test_that("boolean arguments", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL, verbose = "blabla")) @@ -153,7 +153,7 @@ test_that("boolean arguments", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL, verbose = TRUE, @@ -164,27 +164,7 @@ test_that("boolean arguments", { unit_in, unit_out, source = s, - use_USA_deflator_for_all = "blabla", - with_regions = NULL, - replace_NAs = NULL, - verbose = TRUE, - return_cfs = TRUE)) -}) - - -test_that("boolean arguments", { - - gdp <- tibble::tibble("iso3c" = "EUR", "year" = 2010, "value" = 100) - unit_in <- "current Int$PPP" - unit_out <- "constant 2010 US$MER" - s <- wb_wdi - - expect_error(check_user_input( - gdp, - unit_in, - unit_out, - source = s, - use_USA_deflator_for_all = TRUE, + use_USA_cf_for_all = "blabla", with_regions = NULL, replace_NAs = NULL, verbose = TRUE, diff --git a/tests/testthat/test-02_transform_user_input.R b/tests/testthat/test-02_transform_user_input.R index d8d8ae1..bb2d814 100644 --- a/tests/testthat/test-02_transform_user_input.R +++ b/tests/testthat/test-02_transform_user_input.R @@ -5,14 +5,14 @@ test_that("source and unit year compatibility", { unit_in = "constant 2010 LCU", unit_out = "constant 2100 LCU", source = "wb_wdi", - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL)) expect_error(transform_user_input(gdp, unit_in = "constant 2100 LCU", unit_out = "constant 2010 LCU", source = "wb_wdi", - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL)) @@ -26,7 +26,7 @@ test_that("unit and year availability compatibility", { unit_in = "current LCU", unit_out = "constant 2010 LCU", source = "wb_wdi", - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL), glue::glue("Invalid 'gdp' argument. 'gdp' does not have a 'year' column, required when \\ @@ -35,7 +35,7 @@ test_that("unit and year availability compatibility", { unit_in = "current LCU", unit_out = "constant 2010 LCU", source = "wb_wdi", - use_USA_deflator_for_all = FALSE, + use_USA_cf_for_all = FALSE, with_regions = NULL, replace_NAs = NULL), glue::glue("Invalid 'gdp' argument. 'gdp' does not have a 'year' column, required when \\ diff --git a/tests/testthat/test-05_convertGDP.R b/tests/testthat/test-05_convertGDP.R index 73633a5..2e3a351 100644 --- a/tests/testthat/test-05_convertGDP.R +++ b/tests/testthat/test-05_convertGDP.R @@ -172,7 +172,7 @@ test_that("convertGDP using US conversion factors", { gdp2_conv <- convertGDP(gdp_2, unit_in = "constant 2015 LCU", unit_out = "constant 2017 LCU", - use_USA_deflator_for_all = TRUE) + use_USA_cf_for_all = TRUE) gdp3_conv <- convertGDP(gdp_3, unit_in = "constant 2015 LCU", unit_out = "constant 2017 LCU", diff --git a/tests/testthat/test-06_replace_NAs.R b/tests/testthat/test-06_replace_NAs.R index 80c1d0b..86b53be 100644 --- a/tests/testthat/test-06_replace_NAs.R +++ b/tests/testthat/test-06_replace_NAs.R @@ -81,7 +81,6 @@ test_that("convertGDP replace_NAs = NA", { expect_equal(gdp_1, gdp_2) }) - test_that("convertGDP replace_NAs = 'no_conversion'", { # wb_wi does not have info for AFG in 2022 gdp <- tidyr::expand_grid("iso3c" = c("AFG", "DEU", "USA"), @@ -118,12 +117,48 @@ test_that("convertGDP replace_NAs = linear", { expect_true(!any(is.na(gdp_conv$value))) }) +test_that("convertGDP replace_NAs = with_USA", { + # wb_wi does not have info for AIA at all, nor for AFG in 2022 + gdp <- tidyr::expand_grid("iso3c" = c("AIA", "AFG", "DEU", "USA"), + "year" = c(2010, 2015, 2025), + "SSP" = c("SSP1", "SSP2"), "value" = 100) + + expect_warning(convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2022 US$MER")) + + gdp_conv <- convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2022 US$MER", + replace_NAs = "with_USA", + return_cfs = TRUE) + + expect_true(!any(is.na(gdp_conv$result$value))) + expect_true(!any(is.na(gdp_conv$cfs))) + expect_identical(dplyr::filter(gdp_conv$cfs, .data$iso3c == "AIA") %>% dplyr::select(-"iso3c"), + dplyr::filter(gdp_conv$cfs, .data$iso3c == "USA") %>% dplyr::select(-"iso3c")) + + gdp_conv2 <- convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2022 US$MER", + replace_NAs = c("linear"), + return_cfs = TRUE) + + gdp_conv3 <- convertGDP(gdp, + unit_in = "constant 2005 Int$PPP", + unit_out = "constant 2022 US$MER", + replace_NAs = c("linear", "with_USA"), + return_cfs = TRUE) + + expect_true(any(is.na(gdp_conv2$result$value))) + expect_true(!any(is.na(gdp_conv3$result$value))) +}) + test_that("lin_int_ext", { x <- c(NA,NA,NA,NA,NA,NA,2,3,4,5,NA,7,8,NA,NA,NA,NA,NA,NA) expect_equal(lin_int_ext(x), -4:14) }) - test_that("convertGDP replace_NAs = c('linear', 'no_conversion')", { # wb_wi does not have info for ABW in 2019 gdp <- tidyr::expand_grid("iso3c" = c("ABW", "DEU", "USA", "JJJ"), From fdf660097e71df7cef71d80d7f1344fcd1373458 Mon Sep 17 00:00:00 2001 From: Johannes Koch Date: Thu, 10 Oct 2024 17:47:10 +0200 Subject: [PATCH 2/2] Bump version --- .buildlibrary | 2 +- CITATION.cff | 4 ++-- DESCRIPTION | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.buildlibrary b/.buildlibrary index 1d20f87..fe26a30 100644 --- a/.buildlibrary +++ b/.buildlibrary @@ -1,4 +1,4 @@ -ValidationKey: '2198460' +ValidationKey: '2400720' AutocreateReadme: no AcceptedWarnings: - 'Warning: package ''.*'' was built under R version' diff --git a/CITATION.cff b/CITATION.cff index 9d3b989..05fa6de 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -2,8 +2,8 @@ cff-version: 1.2.0 message: If you use this software, please cite it using the metadata from this file. type: software title: 'GDPuc: Easily Convert GDP Data' -version: 1.1.0 -date-released: '2024-09-20' +version: 1.2.0 +date-released: '2024-10-10' abstract: Convert GDP time series data from one unit to another. All common GDP units are included, i.e. current and constant local currency units, US$ via market exchange rates and international dollars via purchasing power parities. diff --git a/DESCRIPTION b/DESCRIPTION index 904d26a..4c1b67f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: GDPuc Title: Easily Convert GDP Data -Version: 1.1.0 -Date: 2024-09-20 +Version: 1.2.0 +Date: 2024-10-10 Authors@R: person("Johannes", "Koch", , "jokoch@pik-potsdam.de", role = c("aut", "cre")) Description: Convert GDP time series data from one unit to