Merge pull request #634 from pepfar-datim/Release-6.2.0

Release 6.2.0
pepfar-datim · Mar 8, 2023 · 19e77b5 · 19e77b5
2 parents 6f25347 + 755bfe5
commit 19e77b5
Show file tree

Hide file tree

Showing 63 changed files with 1,404 additions and 235 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -3,7 +3,7 @@ description: Datapackr Test Suite
 jobs:
  build:
  docker:
- - image: rocker/verse:4.1.1
+ - image: rocker/verse:4.2.1
  steps:
  - checkout
  - restore_cache:
@@ -12,7 +12,7 @@ jobs:
  name: Install curl sytem deps
  command: |
  sudo apt-get update
- sudo apt-get -qq -y install libcurl4-openssl-dev libxml2-dev libsodium-dev libgit2-dev libreoffice-calc-nogui
+ sudo apt-get -qq -y install libcurl4-openssl-dev libxml2-dev libsodium-dev libgit2-dev libreoffice-calc-nogui libicu-dev
  - run:
  name: Install package dependencies
  command: R -e "install.packages(c('renv','rlang'))"
@@ -30,7 +30,7 @@ jobs:
  name: Test package
  no_output_timeout: 30m
  command: |
- R -e "devtools::check(error_on='error')"
+ R -e "devtools::test(stop_on_failure = TRUE, error_on='error')"
  - run:
  name: Lint packages
  command: |

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: datapackr
 Type: Package
 Title: A Package that Packs and Unpacks all Data Packs and Target Setting Tools
-Version: 6.1.1
-Date: 2023-02-14
+Version: 6.2.0
+Date: 2023-03-08
 Authors@R: c(
  person("Scott", "Jackson", email = "sjackson@baosystems.com",
  role = c("aut", "cre")),
@@ -55,7 +55,8 @@ Suggests:
  gdtools,
  flextable,
  officer,
- lintr (>= 3.0.0)
+ lintr (>= 3.0.0),
+ stringi
 Remotes:
  pepfar-datim/datimutils,
  pepfar-datim/datim-validation

diff --git a/NAMESPACE b/NAMESPACE
@@ -158,6 +158,7 @@ export(unPackSNUxIM)
 export(unPackSchema)
 export(unPackSheets)
 export(unPackTool)
+export(unpackYear2Sheet)
 export(updateExistingPrioritization)
 export(validationSummary)
 export(writeHomeTab)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,28 @@
+# datapackr 6.2.0
+
+## Breaking changes
+* Removed support for COP21
+
+## New features
+* Added initial support for parsing COP23 Datapacks
+* Added functionality to export COP23 data to export formats
+* Added function writeSpectrumData which can be used to populate the DataPack Spectrum tab during testing.
+* Added testing helper functions to deal with peculiarities of the CI testing environment.
+* Added various tests and testing files for COP23 tools.
+* Adds initial parsing methods for COP23 Year2 tabs
+
+
+## Minor improvements and fixes
+* Upgraded CI testing environment to R 4.2.1
+* Fixed bug in unPackSchemarelated to detecting invalid column and value types.
+* Fixed create schema unit test.
+* Removed superfluous warning related to missing PSNUs.
+* Fixed issue in unPackingChecks related to the lack of the SNU1 column in COP23 tools.
+* Fixed a testing issue related to choosing the correct template to use for testing.
+* Updated several unit tests to favor COP23 over COP21.
+* Disabled two unit tests for COP21.
+* Altered test method from devtools::check to devtools::test, which skips CRAN package checks.
+
 # datapackr 6.1.1
 
 ## Breaking changes

diff --git a/R/adorn_import_file.R b/R/adorn_import_file.R
@@ -134,7 +134,9 @@ adorn_import_file <- function(psnu_import_file,
  unique()
 
  if (length(unknown_psnu) > 0) {
- psnus <- getPSNUInfo(unknown_psnu, d2_session = d2_session) %>%
+ psnus <- getPSNUInfo(snu_uids = unknown_psnu,
+ cop_year = cop_year,
+ d2_session = d2_session) %>%
  dplyr::select(-name)
 
  psnu_import_file %<>%

diff --git a/R/checkAnalytics.R b/R/checkAnalytics.R
@@ -9,10 +9,11 @@ HTS_POS_Modalities <- function(cop_year) {
  # a reference to the cop year. Since the modalities
  # differ from year to year though, this list needs
  # to be determined based on the year we are dealing with.
+ # TODO:
 
  datapackr::getMapDataPack_DATIM_DEs_COCs(cop_year) %>%
  dplyr::select(indicator_code, hts_modality, resultstatus) %>%
- dplyr::filter(!is.na(hts_modality)) %>%
+ tidyr::drop_na() %>%
  dplyr::filter(resultstatus %in% c("Newly Tested Positives", "Positive")) %>%
  dplyr::distinct() %>%
  dplyr::pull(indicator_code)
@@ -29,8 +30,19 @@ HTS_POS_Modalities <- function(cop_year) {
 #' @return a
 #'
 analyze_eid_2mo <- function(data) {
+
  a <- NULL
 
+ required_names <- c("PMTCT_EID.N.12.T",
+ "PMTCT_EID.N.2.T")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "PMTCT_EID coverage by 2 months issues"
+ a$msg <- "Could not analyze PMTCT EID due to missing data."
+ return(a)
+ }
+
  analysis <- data %>%
  dplyr::mutate(
  PMTCT_EID.T = PMTCT_EID.N.12.T + PMTCT_EID.N.2.T,
@@ -100,7 +112,18 @@ analyze_eid_2mo <- function(data) {
 #' @return a
 #'
 analyze_vmmc_indeterminate <- function(data) {
+
  a <- NULL
+ required_names <- c("VMMC_CIRC.Pos.T",
+ "VMMC_CIRC.Neg.T",
+ "VMMC_CIRC.Unk.T")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "VMMC Indeterminate rate issues"
+ a$msg <- "Could not analyze VMMC_CIRC Indeterminate Rate due to missing data."
+ return(a)
+ }
 
  issues <- data %>%
  dplyr::mutate(
@@ -185,6 +208,17 @@ analyze_vmmc_indeterminate <- function(data) {
 analyze_pmtctknownpos <- function(data) {
  a <- NULL
 
+ required_names <- c("PMTCT_STAT.N.New.Pos.T",
+ "PMTCT_STAT.N.KnownPos.T",
+ "PMTCT_STAT.N.New.Neg.T")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "PMTCT Known Pos issues"
+ a$msg <- "Could not analyze PMTCT Known Pos issues due to missing data."
+ return(a)
+ }
+
  issues <- data %>%
  dplyr::filter(is.na(key_population)) %>%
  dplyr::mutate(
@@ -243,6 +277,17 @@ analyze_pmtctknownpos <- function(data) {
 analyze_tbknownpos <- function(data) {
  a <- NULL
 
+ required_names <- c("TB_STAT.N.New.Pos.T",
+ "TB_STAT.N.KnownPos.T",
+ "TB_STAT.N.New.Neg.T")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "TB Known Pos issues"
+ a$msg <- "Could not analyze TB Known Pos issues due to missing data."
+ return(a)
+ }
+
  issues <- data %>%
  dplyr::mutate(
  TB_STAT.N.Total =
@@ -297,6 +342,18 @@ analyze_tbknownpos <- function(data) {
 analyze_retention <- function(data) {
  a <- NULL
 
+
+ required_names <- c("TX_CURR.T",
+ "TX_CURR.T_1",
+ "TX_NEW.T")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "Retention rate issues"
+ a$msg <- "Could not analyze Retention rate issues due to missing data."
+ return(a)
+ }
+
  analysis <- data %>%
  #For COP22, we need to collapse the finer 50+ age bands back to 50+
  # since TX_NEW is not allocated at these finer age bands
@@ -377,8 +434,20 @@ analyze_retention <- function(data) {
 analyze_linkage <- function(data) {
  a <- NULL
 
+
  hts_modalities <- HTS_POS_Modalities(data$cop_year[1])
 
+ required_names <- c("TX_NEW.T", "TX_NEW.KP.T")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "Linkage rate issues"
+ a$msg <- "Could not analyze Linkage rate issues due to missing data."
+ return(a)
+ }
+
+
+
  analysis <- data %>%
  dplyr::mutate(age = dplyr::case_when(age %in% c("50-54", "55-59", "60-64", "65+") ~ "50+",
  TRUE ~ age)) %>%
@@ -473,16 +542,27 @@ analyze_linkage <- function(data) {
 #' @return a
 #'
 analyze_indexpos_ratio <- function(data) {
+
  a <- NULL
+ required_names <- c("HTS_INDEX_COM.New.Pos.T",
+ "HTS_INDEX_FAC.New.Pos.T",
+ "PLHIV.T_1",
+ "TX_CURR_SUBNAT.T_1")
+
+ if (any(!(required_names %in% names(data)))) {
+ a$test_results <- data.frame(msg = "Missing data.")
+ attr(a$test_results, "test_name") <- "HTS_INDEX_POS Rate Issues"
+ a$msg <- "Could not analyze HTS_INDEX_POS Rate Issues due to missing data."
+ return(a)
+ }
 
  hts_modalities <- HTS_POS_Modalities(data$cop_year[1])
 
  analysis <- data %>%
  dplyr::filter(is.na(key_population)) %>%
  dplyr::select(-age, -sex, -key_population) %>%
  dplyr::group_by(psnu, psnu_uid) %>%
- dplyr::summarise(dplyr::across(dplyr::everything(), sum)) %>%
- dplyr::ungroup() %>%
+ dplyr::summarise(dplyr::across(dplyr::everything(), sum), .groups = "drop") %>%
  dplyr::mutate(
  HTS_TST_POS.T = rowSums(dplyr::select(., tidyselect::any_of(hts_modalities))),
  HTS_INDEX.total =
@@ -541,7 +621,7 @@ analyze_indexpos_ratio <- function(data) {
  "\n")
  }
 
- return(a)
+ a
 
 }
 
@@ -629,14 +709,13 @@ checkAnalytics <- function(d,
  addcols((d$info$schema %>%
  dplyr::filter(col_type %in% c("target", "past"),
  sheet_name != "PSNUxIM") %>%
- dplyr::pull(indicator_code)),
+ dplyr::pull(indicator_code) %>%
+ unique(.)),
  type = "numeric") %>%
  dplyr::mutate(dplyr::across(c(-psnu, -psnu_uid, -age, -sex, -key_population),
  ~tidyr::replace_na(.x, 0))) %>%
  dplyr::mutate(cop_year = d$info$cop_year)
 
-
-
  #Apply the list of analytics checks functions
  funs <- list(
  retention = analyze_retention,

diff --git a/R/getCOPDataFromDATIM.R b/R/getCOPDataFromDATIM.R
@@ -18,7 +18,7 @@ getCOPDataFromDATIM <- function(country_uids,
  inherits = TRUE)) {
 
 
- if (!cop_year %in% c(2020:2022)) {
+ if (!cop_year %in% c(2020:2023)) {
 
  stop("The COP year provided is not supported by the internal function getCOPDataFromDATIM")
  ### NOTE for COP23 some special handling of SUBNAT data for FY23 like the code below may be
@@ -61,7 +61,7 @@ getCOPDataFromDATIM <- function(country_uids,
  return(NULL)
  })
 
- if (is.null(datim_data)) {
+ if (is.null(datim_data) || NROW(datim_data) == 0) {
  return(NULL)
  } else {
  datim_data %>%

diff --git a/R/getMemoIndicators.R b/R/getMemoIndicators.R
@@ -41,6 +41,7 @@ getMemoIndicators <- function(cop_year,
  ind_group <- switch(as.character(cop_year),
  "2021" = "TslxbFe3VUZ",
  "2022" = "zRApVEi7qjo",
+ "2023" = "ZTGhB3qIPsi",
  NULL)
  #Bail out early if don't have a group
  if (is.null(ind_group)) {

diff --git a/R/memoStructure.R b/R/memoStructure.R
@@ -8,7 +8,7 @@
 memoStructure <- function(d, d2_session = dynGet("d2_default_session",
  inherits = TRUE)) {
 
- if (!(d$info$cop_year %in% c("2021", "2022"))) {
+ if (!(d$info$cop_year %in% supportedCOPYears())) {
  warning("COP Memo structure unknown for given COP year")
  return(d)
  }
@@ -77,7 +77,7 @@ memoStructure <- function(d, d2_session = dynGet("d2_default_session",
  "AGYW_PREV", "Total", NA)
  }
 
- if (d$info$cop_year == "2022") {
+ if (d$info$cop_year %in% c("2022", "2023")) {
  row_order <- tibble::tribble(
  ~ind, ~options, ~partner_chunk,
  "TX_NEW", "<15", 1,

diff --git a/R/packDataPack.R b/R/packDataPack.R
@@ -11,6 +11,7 @@
 #'
 packDataPack <- function(d,
  model_data = NULL,
+ spectrum_data = NULL,
  d2_session = dynGet("d2_default_session",
  inherits = TRUE)) {
 
@@ -53,6 +54,12 @@ packDataPack <- function(d,
  sheets = NULL,
  cop_year = d$info$cop_year)
 
+ if (!is.null(spectrum_data)) {
+ interactive_print("Writing Spectrum data...")
+ d$tool$wb <- writeSpectrumData(wb = d$tool$wb,
+ spectrum_data = spectrum_data)
+ }
+
  # Hide unneeded sheets ####
  sheets_to_hide <- which(stringr::str_detect(names(d$tool$wb), "PSNUxIM"))
  openxlsx::sheetVisibility(d$tool$wb)[sheets_to_hide] <- "hidden"

diff --git a/R/packTool.R b/R/packTool.R
@@ -23,6 +23,7 @@ packTool <- function(model_data = NULL,
  output_folder,
  results_archive = TRUE,
  expand_formulas = FALSE,
+ spectrum_data = NULL,
  d2_session = dynGet("d2_default_session",
  inherits = TRUE)) {
 
@@ -68,6 +69,7 @@ packTool <- function(model_data = NULL,
  if (d$info$tool == "Data Pack") {
  d <- packDataPack(d,
  model_data = model_data,
+ spectrum_data = spectrum_data,
  d2_session = d2_session)
  } else if (d$info$tool == "OPU Data Pack") {
 

diff --git a/R/unPackCountryUIDs.R b/R/unPackCountryUIDs.R
@@ -136,11 +136,9 @@ unPackCountryUIDs <- function(submission_path,
  ~ .x %in% country_uids))) {
  warning("Deduced or provided Country UIDs do no match Country UIDs observed in submission.")
  }
- } else {
- warning("No PSNUs were detected.")
  }
 
- return(country_uids)
+ country_uids
 
 }
 

diff --git a/R/unPackDataPack.R b/R/unPackDataPack.R
@@ -51,8 +51,10 @@ unPackDataPack <- function(d,
  d <- unPackSheets(d)
 
  # Unpack the SNU x IM sheet ####
- interactive_print("Unpacking the PSNUxIM tab...")
- d <- unPackSNUxIM(d)
+ if (d$info$cop_year < 2023) {
+ interactive_print("Unpacking the PSNUxIM tab...")
+ d <- unPackSNUxIM(d)
+ }
 
  # Prepare undistributed import file for use in analytics if necessary ####
  # TODO: Allow packForDATIM to auto-detect what is present and what should be packed.